All of lore.kernel.org
 help / color / mirror / Atom feed
From: Abhi Das <adas@redhat.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	cluster-devel@redhat.com
Cc: Abhi Das <adas@redhat.com>
Subject: [RFC PATCH 5/5] gfs2: Add xreaddir file operation and supporting functions
Date: Fri, 25 Jul 2014 12:38:08 -0500	[thread overview]
Message-ID: <1406309888-10749-6-git-send-email-adas@redhat.com> (raw)
In-Reply-To: <1406309888-10749-1-git-send-email-adas@redhat.com>

This patch adds support in GFS2 for the xgetdents syscall by
implementing the xreaddir file operation.

GFS2 uses vbufs (buffer backed by a vector of pages) to store
intermediate data like dirents, stat info and extended attribute
keys/values to eventually bundle them into a container structure
to return to the user.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/Makefile     |    3 +-
 fs/gfs2/dir.c        |   80 ++--
 fs/gfs2/dir.h        |   13 +-
 fs/gfs2/export.c     |    2 +-
 fs/gfs2/file.c       |   17 +-
 fs/gfs2/incore.h     |    6 +
 fs/gfs2/inode.c      |    3 +-
 fs/gfs2/inode.h      |    5 +
 fs/gfs2/ops_fstype.c |    4 +
 fs/gfs2/sys.c        |   26 +-
 fs/gfs2/util.c       |    9 +
 fs/gfs2/xattr.c      |   27 +-
 fs/gfs2/xattr.h      |   23 ++
 fs/gfs2/xreaddir.c   | 1024 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/xreaddir.h   |   84 +++++
 15 files changed, 1260 insertions(+), 66 deletions(-)
 create mode 100644 fs/gfs2/xreaddir.c
 create mode 100644 fs/gfs2/xreaddir.h

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 8612820..da8253b 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -4,7 +4,8 @@ gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
 	glops.o log.o lops.o main.o meta_io.o \
 	aops.o dentry.o export.o file.o \
 	ops_fstype.o inode.o quota.o \
-	recovery.o rgrp.o super.o sys.o trans.o util.o
+	recovery.o rgrp.o super.o sys.o \
+	trans.o util.o xreaddir.o
 
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
 
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 1a349f9..21f5926 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -74,15 +74,13 @@
 #include "trans.h"
 #include "bmap.h"
 #include "util.h"
+#include "xreaddir.h"
 
 #define IS_LEAF     1 /* Hashed (leaf) directory */
 #define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
 
 #define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
 
-#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
-#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
-
 struct qstr gfs2_qdot __read_mostly;
 struct qstr gfs2_qdotdot __read_mostly;
 
@@ -1185,17 +1183,13 @@ out_kfree:
  *   lt: returns -1
  *   eq: returns 0
  */
-
-static int compare_dents(const void *a, const void *b)
+int compare_dents_i(const struct gfs2_dirent *dent_a,
+		  const struct gfs2_dirent *dent_b)
 {
-	const struct gfs2_dirent *dent_a, *dent_b;
 	u32 hash_a, hash_b;
 	int ret = 0;
 
-	dent_a = *(const struct gfs2_dirent **)a;
 	hash_a = be32_to_cpu(dent_a->de_hash);
-
-	dent_b = *(const struct gfs2_dirent **)b;
 	hash_b = be32_to_cpu(dent_b->de_hash);
 
 	if (hash_a > hash_b)
@@ -1217,6 +1211,12 @@ static int compare_dents(const void *a, const void *b)
 	return ret;
 }
 
+int compare_dents(const void *a, const void *b)
+{
+	return compare_dents_i(*(const struct gfs2_dirent **)a,
+			       *(const struct gfs2_dirent **)b);
+}
+
 /**
  * do_filldir_main - read out directory entries
  * @dip: The GFS2 inode
@@ -1234,13 +1234,14 @@ static int compare_dents(const void *a, const void *b)
  */
 
 static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
-			   const struct gfs2_dirent **darr, u32 entries,
-			   int *copied)
+			   struct gfs2_xrdir_ctx *xc, const struct gfs2_dirent **darr,
+			   u32 entries, int *copied)
 {
 	const struct gfs2_dirent *dent, *dent_next;
 	u64 off, off_next;
+	u64 *dst_pos = xc ? &xc->xc_offset : &ctx->pos;
 	unsigned int x, y;
-	int run = 0;
+	int run = 0, error = 0;
 
 	sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
 
@@ -1256,29 +1257,39 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
 			dent_next = darr[y];
 			off_next = be32_to_cpu(dent_next->de_hash);
 			off_next = gfs2_disk_hash2offset(off_next);
-
-			if (off < ctx->pos)
+			if (off < *dst_pos)
 				continue;
-			ctx->pos = off;
+
+			*dst_pos = off;
 
 			if (off_next == off) {
-				if (*copied && !run)
+				if (*copied && !run) {
+					if (xc)
+						gfs2_xrdir_partial_collect(xc);
 					return 1;
+				}
 				run = 1;
 			} else
 				run = 0;
 		} else {
-			if (off < ctx->pos)
+			if (off < *dst_pos)
 				continue;
-			ctx->pos = off;
+			*dst_pos = off;
 		}
 
-		if (!dir_emit(ctx, (const char *)(dent + 1),
-				be16_to_cpu(dent->de_name_len),
-				be64_to_cpu(dent->de_inum.no_addr),
-				be16_to_cpu(dent->de_type)))
-			return 1;
-
+		if (xc) {
+			error = gfs2_xrdir_collect_dents(dent, off, xc);
+			if (error) {
+				gfs2_xrdir_partial_collect(xc);
+				return 1;
+			}
+		} else {
+			if (!dir_emit(ctx, (const char *)(dent + 1),
+				      be16_to_cpu(dent->de_name_len),
+				      be64_to_cpu(dent->de_inum.no_addr),
+				      be16_to_cpu(dent->de_type)))
+				return 1;
+		}
 		*copied = 1;
 	}
 
@@ -1286,8 +1297,7 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
 	   do_filldir fxn, we get the next entry instead of the last one in the
 	   current leaf */
 
-	ctx->pos++;
-
+	(*dst_pos)++;
 	return 0;
 }
 
@@ -1311,8 +1321,8 @@ static void gfs2_free_sort_buffer(void *ptr)
 }
 
 static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
-			      int *copied, unsigned *depth,
-			      u64 leaf_no)
+			      struct gfs2_xrdir_ctx *xc, int *copied,
+			      unsigned *depth, u64 leaf_no)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1389,7 +1399,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 	} while(lfn);
 
 	BUG_ON(entries2 != entries);
-	error = do_filldir_main(ip, ctx, darr, entries, copied);
+	error = do_filldir_main(ip, ctx, xc, darr, entries, copied);
 out_free:
 	for(i = 0; i < leaf; i++)
 		brelse(larr[i]);
@@ -1454,7 +1464,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
  */
 
 static int dir_e_read(struct inode *inode, struct dir_context *ctx,
-		      struct file_ra_state *f_ra)
+		      struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra)
 {
 	struct gfs2_inode *dip = GFS2_I(inode);
 	u32 hsize, len = 0;
@@ -1465,7 +1475,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 	unsigned depth = 0;
 
 	hsize = 1 << dip->i_depth;
-	hash = gfs2_dir_offset2hash(ctx->pos);
+	hash = gfs2_dir_offset2hash(xc ? xc->xc_offset : ctx->pos);
 	index = hash >> (32 - dip->i_depth);
 
 	if (dip->i_hash_cache == NULL)
@@ -1477,7 +1487,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 	gfs2_dir_readahead(inode, hsize, index, f_ra);
 
 	while (index < hsize) {
-		error = gfs2_dir_read_leaf(inode, ctx,
+		error = gfs2_dir_read_leaf(inode, ctx, xc,
 					   &copied, &depth,
 					   be64_to_cpu(lp[index]));
 		if (error)
@@ -1493,7 +1503,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 }
 
 int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
-		  struct file_ra_state *f_ra)
+		  struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra)
 {
 	struct gfs2_inode *dip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1507,7 +1517,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
 		return 0;
 
 	if (dip->i_diskflags & GFS2_DIF_EXHASH)
-		return dir_e_read(inode, ctx, f_ra);
+		return dir_e_read(inode, ctx, xc, f_ra);
 
 	if (!gfs2_is_stuffed(dip)) {
 		gfs2_consist_inode(dip);
@@ -1539,7 +1549,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
 			error = -EIO;
 			goto out;
 		}
-		error = do_filldir_main(dip, ctx, darr,
+		error = do_filldir_main(dip, ctx, xc, darr,
 					dip->i_entries, &copied);
 out:
 		kfree(darr);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 126c65d..8d40590 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -12,6 +12,10 @@
 
 #include <linux/dcache.h>
 #include <linux/crc32.h>
+#include "util.h"
+
+#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
+#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
 
 struct inode;
 struct gfs2_inode;
@@ -25,6 +29,13 @@ struct gfs2_diradd {
 	struct buffer_head *bh;
 };
 
+typedef int (*process_dent_t)(const struct gfs2_dirent *, loff_t, void *, filldir_t);
+extern int compare_dents_i(const struct gfs2_dirent *dent_a,
+			   const struct gfs2_dirent *dent_b);
+extern int foreach_dent(u64 *offset, void *opaque, filldir_t filldir,
+			const struct gfs2_dirent **darr, u32 entries,
+			int *copied, process_dent_t pd_fn);
+ 
 extern struct inode *gfs2_dir_search(struct inode *dir,
 				     const struct qstr *filename,
 				     bool fail_on_exist);
@@ -40,7 +51,7 @@ static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
 }
 extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
 extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
-			 struct file_ra_state *f_ra);
+			 struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra);
 extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 			  const struct gfs2_inode *nip, unsigned int new_type);
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 8b9b377..1f5085d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -114,7 +114,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra);
+	error = gfs2_dir_read(dir, &gnfd.ctx, NULL, &f_ra);
 
 	gfs2_glock_dq_uninit(&gh);
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26b3f95..d2d7561f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -16,6 +16,8 @@
 #include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/stat.h>
+#include <linux/sort.h>
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/falloc.h>
@@ -40,6 +42,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "xreaddir.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
@@ -100,7 +103,7 @@ static int gfs2_readdir(struct file *file, struct dir_context *ctx)
 	if (error)
 		return error;
 
-	error = gfs2_dir_read(dir, ctx, &file->f_ra);
+	error = gfs2_dir_read(dir, ctx, NULL, &file->f_ra);
 
 	gfs2_glock_dq_uninit(&d_gh);
 
@@ -562,8 +565,13 @@ int gfs2_open_common(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	mutex_init(&fp->f_fl_mutex);
-
 	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
+
+	if (S_ISDIR(inode->i_mode)) {
+		ret = gfs2_xrdir_ctx_init(fp, GFS2_SB(inode));
+		if (ret)
+			return ret;
+	}
 	file->private_data = fp;
 	return 0;
 }
@@ -617,6 +625,9 @@ static int gfs2_release(struct inode *inode, struct file *file)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
+	if (S_ISDIR(ip->i_inode.i_mode))
+		gfs2_xrdir_ctx_uninit((struct gfs2_file *)file->private_data);
+
 	kfree(file->private_data);
 	file->private_data = NULL;
 
@@ -1075,6 +1086,7 @@ const struct file_operations gfs2_file_fops = {
 
 const struct file_operations gfs2_dir_fops = {
 	.iterate	= gfs2_readdir,
+	.xreaddir       = gfs2_xreaddir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
 	.release	= gfs2_release,
@@ -1105,6 +1117,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 
 const struct file_operations gfs2_dir_fops_nolock = {
 	.iterate	= gfs2_readdir,
+	.xreaddir       = gfs2_xreaddir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
 	.release	= gfs2_release,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67d310c..f86b6d3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -414,6 +414,7 @@ static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
 struct gfs2_file {
 	struct mutex f_fl_mutex;
 	struct gfs2_holder f_fl_gh;
+	struct gfs2_xrdir_ctx *f_xrctx;
 };
 
 struct gfs2_revoke_replay {
@@ -570,6 +571,8 @@ struct gfs2_tune {
 	unsigned int gt_complain_secs;
 	unsigned int gt_statfs_quantum;
 	unsigned int gt_statfs_slow;
+	unsigned int gt_max_vb_pages; /* Max pages to utilize for vector-page buffers */
+	unsigned int gt_max_xrdir_dents; /* Maximum dents to process per collect cycle (conserves memory) */
 };
 
 enum {
@@ -812,6 +815,9 @@ struct gfs2_sbd {
 	struct dentry *debugfs_dentry_glocks;
 	struct dentry *debugfs_dentry_glstats;
 	struct dentry *debugfs_dentry_sbstats;
+
+	/* Vector Pages accounting */
+	atomic_t sd_vb_page_count;
 };
 
 static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..46c3602 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1833,7 +1833,8 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		}
 	}
 
-	generic_fillattr(inode, stat);
+	gfs2_getattr_i(ip, stat);
+
 	if (unlock)
 		gfs2_glock_dq_uninit(&gh);
 	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index ba4d949..665f508 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -93,6 +93,11 @@ err:
 	return -EIO;
 }
 
+static inline void gfs2_getattr_i(struct gfs2_inode *ip, struct kstat *stat)
+{
+	generic_fillattr(&ip->i_inode, stat);
+}
+
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
 				       u64 no_addr, u64 no_formal_ino,
 				       int non_block);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0..2d541ba 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -60,6 +60,8 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	gt->gt_new_files_jdata = 0;
 	gt->gt_max_readahead = 1 << 18;
 	gt->gt_complain_secs = 10;
+	gt->gt_max_vb_pages = 65536;
+	gt->gt_max_xrdir_dents = 25000;
 }
 
 static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -135,6 +137,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	atomic_set(&sdp->sd_frozen_root, 0);
 	init_waitqueue_head(&sdp->sd_frozen_root_wait);
 
+	atomic_set(&sdp->sd_vb_page_count, 0);
+
 	return sdp;
 }
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 3ab566b..279aa86 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -548,8 +548,8 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
 	return len;
 }
 
-static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
-			int check_zero, const char *buf, size_t len)
+static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, int check_zero,
+			unsigned int min, unsigned int max, const char *buf, size_t len)
 {
 	struct gfs2_tune *gt = &sdp->sd_tune;
 	unsigned int x;
@@ -562,6 +562,12 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
 	if (check_zero && !x)
 		return -EINVAL;
 
+	if (min && x < min)
+		return -EINVAL;
+
+	if (max && x > max)
+		return -EINVAL;
+
 	spin_lock(&gt->gt_spin);
 	*field = x;
 	spin_unlock(&gt->gt_spin);
@@ -578,13 +584,21 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
 }                                                                             \
 TUNE_ATTR_3(name, name##_show, store)
 
-#define TUNE_ATTR(name, check_zero)                                           \
+#define TUNE_ATTR(name, check_zero)                                                \
+static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)     \
+{                                                                                  \
+	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, 0, 0, buf, len); \
+}                                                                                  \
+TUNE_ATTR_2(name, name##_store)
+
+#define TUNE_ATTR_B(name, min, max)                                           \
 static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 {                                                                             \
-	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len);  \
+	return tune_set(sdp, &sdp->sd_tune.gt_##name, 0, min, max, buf, len); \
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
+
 TUNE_ATTR(quota_warn_period, 0);
 TUNE_ATTR(quota_quantum, 0);
 TUNE_ATTR(max_readahead, 0);
@@ -593,6 +607,8 @@ TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(statfs_quantum, 1);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
+TUNE_ATTR_B(max_vb_pages, 32, 8388608); /* total capacity can be 128K to 32G bytes */
+TUNE_ATTR(max_xrdir_dents, 0);
 
 static struct attribute *tune_attrs[] = {
 	&tune_attr_quota_warn_period.attr,
@@ -603,6 +619,8 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_statfs_quantum.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
+	&tune_attr_max_vb_pages.attr,
+	&tune_attr_max_xrdir_dents.attr,
 	NULL,
 };
 
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 2c1aee3..793f69e 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -301,6 +301,9 @@ static int vp_extend(struct vp_ctx *vpx, int size)
 {
 	struct gfs2_sbd *sdp = vpx->vp_sdp;
 
+	if ((gfs2_tune_get(sdp, gt_max_vb_pages)
+	     - atomic_read(&sdp->sd_vb_page_count)) < size)
+		goto out;
 	/* first make room for more pointers */
 	if (size <= 0)
 		return -EINVAL;
@@ -317,6 +320,7 @@ static int vp_extend(struct vp_ctx *vpx, int size)
 		goto out;
 
 	vpx->vp_size += size;
+	atomic_add(size, &sdp->sd_vb_page_count);
 	return 0;
 out:
 	return -ENOMEM;
@@ -328,6 +332,9 @@ int vp_init(struct gfs2_sbd *sdp, struct vbuf *vb, int init_cap)
 	struct vp_ctx *vpx;
 
 	cap = DIV_ROUND_UP(init_cap, PAGE_SIZE);
+	if ((gfs2_tune_get(sdp, gt_max_vb_pages)
+	     - atomic_read(&sdp->sd_vb_page_count)) < cap)
+		goto out;
 
 	vpx = kmalloc(sizeof(struct vp_ctx), GFP_KERNEL);
 	if (vpx == NULL)
@@ -344,6 +351,7 @@ int vp_init(struct gfs2_sbd *sdp, struct vbuf *vb, int init_cap)
 
 	vpx->vp_baseptr = vpx->vp_top = page_address(vpx->vp_pages[0]);
 	vpx->vp_sdp = sdp;
+	atomic_add(cap, &sdp->sd_vb_page_count);
 	vb->v_ptr = vpx->vp_baseptr;
 	vb->v_opaque = vpx;
 
@@ -373,6 +381,7 @@ void vp_uninit(struct vbuf *vb)
 
 	vp_free_pages(vpx);
 	kfree(vpx->vp_pages);
+	atomic_sub(vpx->vp_size, &vpx->vp_sdp->sd_vb_page_count);
 	kfree(vpx);
 	vb->v_ptr = vb->v_opaque = NULL;
 }
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f78..f156b21 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -11,6 +11,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/sort.h>
 #include <linux/xattr.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/posix_acl_xattr.h>
@@ -19,6 +20,7 @@
 #include "gfs2.h"
 #include "incore.h"
 #include "acl.h"
+#include "dir.h"
 #include "xattr.h"
 #include "glock.h"
 #include "inode.h"
@@ -27,6 +29,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "xreaddir.h"
 
 /**
  * ea_calc_size - returns the acutal number of bytes the request will take up
@@ -72,10 +75,6 @@ static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
 	return 0;
 }
 
-typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
-			  struct gfs2_ea_header *ea,
-			  struct gfs2_ea_header *prev, void *private);
-
 static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
 			ea_call_t ea_call, void *data)
 {
@@ -113,7 +112,7 @@ fail:
 	return -EIO;
 }
 
-static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
+int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 {
 	struct buffer_head *bh, *eabh;
 	__be64 *eablk, *end;
@@ -374,28 +373,14 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
 		return 0;
 
 	if (er->er_data_len) {
-		char *prefix = NULL;
+		char prefix[9];
 		unsigned int l = 0;
 		char c = 0;
 
 		if (ei->ei_size + ea_size > er->er_data_len)
 			return -ERANGE;
 
-		switch (ea->ea_type) {
-		case GFS2_EATYPE_USR:
-			prefix = "user.";
-			l = 5;
-			break;
-		case GFS2_EATYPE_SYS:
-			prefix = "system.";
-			l = 7;
-			break;
-		case GFS2_EATYPE_SECURITY:
-			prefix = "security.";
-			l = 9;
-			break;
-		}
-
+		l = ea_prefix(ea, prefix, 9);
 		BUG_ON(l == 0);
 
 		memcpy(er->er_data + ei->ei_size, prefix, l);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index d392f83..c09f090 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -10,6 +10,8 @@
 #ifndef __EATTR_DOT_H__
 #define __EATTR_DOT_H__
 
+#include "dir.h"
+
 struct gfs2_inode;
 struct iattr;
 
@@ -53,9 +55,30 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
+static __inline__ int ea_prefix(struct gfs2_ea_header *ea, char *buf, int size)
+{
+	BUG_ON(size < 9);
+	switch (ea->ea_type) {
+	case GFS2_EATYPE_USR:
+		strncpy(buf, "user.", 5);
+		return 5;
+	case GFS2_EATYPE_SYS:
+		strncpy(buf, "system.", 7);
+		return 7;
+	case GFS2_EATYPE_SECURITY:
+		strncpy(buf, "security.", 9);
+		return 9;
+	}
+	return 0;
+}
+
 extern int __gfs2_xattr_set(struct inode *inode, const char *name,
 			    const void *value, size_t size,
 			    int flags, int type);
+typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
+			  struct gfs2_ea_header *ea,
+			  struct gfs2_ea_header *prev, void *private);
+extern int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data);
 extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
 extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/xreaddir.c b/fs/gfs2/xreaddir.c
new file mode 100644
index 0000000..44e0232
--- /dev/null
+++ b/fs/gfs2/xreaddir.c
@@ -0,0 +1,1024 @@
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/stat.h>
+#include <linux/sort.h>
+#include <linux/fs.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
+#include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <asm/uaccess.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "bmap.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+#include "util.h"
+#include "xattr.h"
+#include "xreaddir.h"
+
+static int gfs2_dirent_dot_or_dotdot(const struct gfs2_dirent *dent)
+{
+	const char *name = (char *)(dent + 1);
+
+	if (be16_to_cpu(dent->de_type) == DT_DIR) {
+		if (be16_to_cpu(dent->de_name_len) == 1 && name[0] == '.')
+			return 1;
+		if (be16_to_cpu(dent->de_name_len) == 2 &&
+		    strncmp(name, "..", 2) == 0)
+			return 1;
+	}
+	return 0;   
+}
+
+/*
+ * Compare the inode blocks of two entries
+ */
+int ctx_compare_dent_iblks(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u64 a_blkno, b_blkno;
+
+	vp_read(&xc->xc_dirents, &a_blkno, &a_vb_p->x_ino, sizeof(u64));
+	vp_read(&xc->xc_dirents, &b_blkno, &b_vb_p->x_ino, sizeof(u64));
+
+	if (a_blkno > b_blkno)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Compare the xattr blocks of two entries
+ */
+int ctx_compare_dent_eablks(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u64 a_blkno, b_blkno;
+
+	vp_read(&xc->xc_dirents, &a_blkno, &a_vb_p->x_eablk, sizeof(u64));
+	vp_read(&xc->xc_dirents, &b_blkno, &b_vb_p->x_eablk, sizeof(u64));
+
+	if (a_blkno > b_blkno)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Compare two entries based on their hash value
+ */
+int ctx_compare_dents(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u32 a_hash, b_hash;
+	int ret = 0;
+
+	vp_read(&xc->xc_dirents, &a_hash, &a_vb_p->x_hash, sizeof(u32));
+	vp_read(&xc->xc_dirents, &b_hash, &b_vb_p->x_hash, sizeof(u32));
+
+	if (a_hash > b_hash)
+		ret = 1;
+	else if (a_hash < b_hash)
+		ret = -1;
+	else {
+		unsigned int len_a, len_b;
+		vp_read(&xc->xc_dirents, &len_a, &a_vb_p->x_namelen, sizeof(unsigned int));
+		vp_read(&xc->xc_dirents, &len_b, &b_vb_p->x_namelen, sizeof(unsigned int));
+
+		if (len_a > len_b)
+			ret = 1;
+		else if (len_a < len_b)
+			ret = -1;
+		else {
+			char *a, *b, *buf;
+			buf = kmalloc(len_a * 2, GFP_KERNEL);
+			if (buf == NULL) {
+				ret = 0;
+				goto out;
+			}
+			a = buf;
+			b = buf + len_a;
+
+			vp_read(&xc->xc_dirents, a, a_vb_p->x_name, len_a);
+			vp_read(&xc->xc_dirents, b, b_vb_p->x_name, len_b);
+
+			ret = memcmp(a, b, len_a);
+
+			kfree(buf);
+		}
+	}
+out:
+	return ret;
+}
+
+void gfs2_xrdir_ctx_uninit(struct gfs2_file *fp)
+{
+	struct gfs2_xrdir_ctx *xc;
+
+	if (!fp || !fp->f_xrctx)
+		return;
+
+	xc = fp->f_xrctx;
+	if (xc->xc_vb_dptrs)
+		kfree(xc->xc_vb_dptrs);
+	vp_uninit(&xc->xc_xattr_values);
+	vp_uninit(&xc->xc_xattr_keys);
+	vp_uninit(&xc->xc_dirents);
+	kfree(xc);
+	fp->f_xrctx = NULL;
+}
+
+int gfs2_xrdir_ctx_init(struct gfs2_file *fp, struct gfs2_sbd *sdp)
+{
+	struct gfs2_xrdir_ctx *xc;
+	if (!fp)
+		return -EINVAL;
+
+	BUG_ON(fp->f_xrctx != NULL);
+
+	xc = kzalloc(sizeof(struct gfs2_xrdir_ctx), GFP_KERNEL);
+	if (xc == NULL)
+		return -ENOMEM;
+
+	if (vp_init(sdp, &xc->xc_dirents, 1) ||
+	    vp_init(sdp, &xc->xc_xattr_keys, 1) ||
+	    vp_init(sdp, &xc->xc_xattr_values, 1)) {
+		gfs2_xrdir_ctx_uninit(fp);
+		kfree(xc);
+		return -ENOMEM;
+	}
+	xc->xc_flags |= XC_FL_ALLOCATED;
+	fp->f_xrctx = xc;
+
+	return 0;
+}
+
+/*
+ * There was an error while collecting entries.
+ * Figure out what happened and twiddle flags
+ * appropriately.
+ */
+void gfs2_xrdir_partial_collect(struct gfs2_xrdir_ctx *xc)
+{
+	if (xc->xc_flags & XC_FL_GATHER_PART_INT ||
+	    xc->xc_flags & XC_FL_ERROR)
+	  return;
+
+	/*
+	 * We encountered a hash collision situation. We've read
+	 * entries in hash order up to the point (not including)
+	 * the colliding hashes. Setting XC_FL_HASH_COLL denotes
+	 * that. Also setting XC_FL_HASH_COLL_NXT so we know
+	 * that the next time we collect entries, the hash
+	 * colliding entries will be part of the collection
+	 */
+	xc->xc_flags |= (XC_FL_HASH_COLL | XC_FL_HASH_COLL_NXT);
+	xc->xc_flags |= (XC_FL_GATHER_PARTS | XC_FL_GATHER_PART_INT);
+	xc->xc_hash_coll_off = xc->xc_offset;
+
+	return;
+}
+
+/*
+ * We have run out of memory while collecting entries and
+ * don't have a single entry to return to the user. We deal
+ * with such a situation by halving the number of dents we
+ * tried to read last time and returning -EAGAIN to the user
+ * so we can have a go at it again
+ */
+static int gfs2_xrdir_handle_oom(struct gfs2_xrdir_ctx *xc)
+{
+	/* next time, only try half the number of dents */
+	xc->xc_dent_cap = DIV_ROUND_UP(xc->xc_count, 2);
+	/* clear out some flags */
+	xc->xc_flags &= ~(XC_FL_ERROR_OOM | XC_FL_ERROR);
+	xc->xc_flags &= ~XC_FL_GATHER_PART_INT;
+	/* In an oom situation, we're going to re-read fewer
+	 * entries from the same collection. This may or may
+	 * not hit the hash collision we recorded (if any).
+	 * So, we reset the relevant flags */
+	xc->xc_flags &= ~(XC_FL_HASH_COLL | XC_FL_HASH_COLL_NXT);
+	xc->xc_hash_coll_off = 0;
+
+	return -EAGAIN;
+}
+
+static int gfs2_xrdir_collect_errcheck(struct gfs2_xrdir_ctx *xc, int error)
+{
+	if (error < 0) { /* If we're out of memory */
+		if (error == -ENOMEM)
+			xc->xc_flags |= XC_FL_ERROR_OOM;
+		xc->xc_flags |= XC_FL_ERROR;
+		return error;
+	} else {
+		if ((xc->xc_dent_cap && xc->xc_count >= xc->xc_dent_cap) ||
+		    (xc->xc_dent_memcap && vp_get_size(&xc->xc_dirents) 
+		     >= xc->xc_dent_memcap)) {
+			/* We hit one of our limits, flag and return */
+			xc->xc_flags |= XC_FL_GATHER_PARTS;
+			xc->xc_flags |= XC_FL_GATHER_PART_INT;
+			return -EOVERFLOW;
+		}
+		return 0;
+	}
+}
+
+/*
+ * To reduce disk-seeking, we collect all the info in stages.
+ * In each stage, we access relevant disk blocks in order
+ * by pre-sorting the entries correspondingly.
+ *
+ * 1. Collect entry info (name, ino, type, offset) etc for all the
+ *    entries. Obtained by reading the directory inode
+ * 2. Collect stat info for all the entries. Obtained by reading
+ *    the file inode blocks.
+ * 3. Collect xattr info for all the entries. Obtained by reading
+ *    the eattr block of each inode.
+ *
+ * With this scheme of collecting data, we don't know what the final
+ * size of a dirent would be ahead of time. gfs2_xrdir_estimate_dent_memcap()
+ * attempts to guess the size. Right now it statically computes and
+ * reserves a fixed percentage of available space for entry+stat info
+ * and xattr info based on what data is requested by the user.
+ *
+ * TODO: Make this dynamic. Analyse the directory being processed
+ * and use observed ratios to improve throughput.
+ */
+static u64 gfs2_xrdir_estimate_dent_memcap(struct gfs2_sbd *sdp,
+					   struct gfs2_xrdir_ctx *xc)
+{
+	u64 avail;
+	int perc = 80;
+	unsigned int mask = xc->xc_xattr_mask;
+
+	avail = (gfs2_tune_get(sdp, gt_max_vb_pages) +
+		 vp_get_page_count(&xc->xc_dirents) +
+		 vp_get_page_count(&xc->xc_xattr_keys) +
+		 vp_get_page_count(&xc->xc_xattr_values) -
+		 atomic_read(&sdp->sd_vb_page_count)) * PAGE_SIZE;
+	if ((mask & XSTAT_XATTR_ALL) && (mask & XSTAT_XATTR_VALUES))
+		perc = 50;
+
+	return (avail * perc) / 100;
+}
+
+/*
+ * We setup the xreaddir context before every collect run
+ */
+static int gfs2_xrdir_ctx_setup(struct file *file, struct gfs2_xrdir_ctx *xc,
+				unsigned int flags, unsigned int mask)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
+
+	if (!(xc->xc_flags & XC_FL_GATHER_PARTS)) {
+		/*
+		 * We only update flags and mask once per readdirplus
+		 * initiation. If there are multiple parts, use the
+		 * same values as initialized at the start
+		 */
+		xc->xc_xst_flags = flags;
+		xc->xc_xattr_mask = mask;
+		xc->xc_offset = file->f_pos;
+	}
+
+	/*
+	 * Set limits for this part based on how much memory is available
+	 * or how many entries per cycle as defined by sysfs file.
+	 * If dent_cap established in a previous run, leave it alone
+	 */
+	xc->xc_dent_cap = xc->xc_dent_cap ? xc->xc_dent_cap : 
+		gfs2_tune_get(sdp, gt_max_xrdir_dents);
+	xc->xc_dent_memcap = gfs2_xrdir_estimate_dent_memcap(sdp, xc);
+
+	xc->xc_dent_valid = 0;
+	xc->xc_count = 0;
+	xc->xc_next_dent = NULL;
+	kfree(xc->xc_vb_dptrs);
+	xc->xc_vb_dptrs = NULL;
+	vp_reset(&xc->xc_dirents);
+	vp_reset(&xc->xc_xattr_keys);
+	vp_reset(&xc->xc_xattr_values);
+
+	return 0;
+}
+
+/*
+ * Add a gfs2_dirent to the xreaddir context
+ */
+int gfs2_xrdir_collect_dents(const struct gfs2_dirent *dent, loff_t off,
+			     struct gfs2_xrdir_ctx *xc)
+{
+	struct gfs2_xdirent *x;
+	u64 x_ino;
+	u32 x_hash;
+	u8 x_valid = 0;
+	char x_type;
+	unsigned int x_xattr_count, x_namelen;
+	const void *nullptr = NULL;
+	int error = 0;
+
+	if (gfs2_dirent_dot_or_dotdot(dent))
+		return 0;
+
+	if (xc->xc_next_dent == NULL)
+		xc->xc_next_dent = xc->xc_dirents.v_ptr;
+	x = xc->xc_next_dent;
+	vp_memset(&xc->xc_dirents, x, 0, sizeof(struct gfs2_xdirent));
+
+	/*
+	 * If we know that we're encountering hash-colliding
+	 * entries this time around, we read only these in
+	 * and nothing else
+	 */
+	if (xc->xc_flags & XC_FL_HASH_COLL_NXT &&
+	    off != xc->xc_hash_coll_off) {
+		/*
+		 * setting dent_cap to how many we've read in
+		 * so we don't read anymore
+		 */
+		xc->xc_dent_cap = xc->xc_count;
+		xc->xc_flags &= ~XC_FL_HASH_COLL_NXT;
+		/*
+		 * xc_offset will get incremented to read
+		 * at the next offset when everything
+		 * is written out properly this cycle
+		 */
+		xc->xc_offset = xc->xc_hash_coll_off;
+		xc->xc_hash_coll_off = 0;
+		goto err_check;
+	}
+
+	/* Copy the dirent contents */
+	x_ino = be64_to_cpu(dent->de_inum.no_addr);
+	x_hash = be32_to_cpu(dent->de_hash);
+	x_type = be16_to_cpu(dent->de_type);
+	x_xattr_count = 0;
+	x_namelen = be16_to_cpu(dent->de_name_len);
+
+	error = vp_write(&xc->xc_dirents, &x->x_ino, &x_ino, sizeof(x->x_ino));
+	if (error != sizeof(x->x_ino)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_hash, &x_hash, sizeof(x->x_hash));
+	if (error != sizeof(x->x_hash)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_valid, &x_valid, sizeof(x->x_valid));
+	if (error != sizeof(x->x_valid)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_type, &x_type, sizeof(x->x_type));
+	if (error != sizeof(x->x_type)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_xattr_count, &x_xattr_count,
+			 sizeof(x->x_xattr_count));
+	if (error != sizeof(x->x_xattr_count)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_vb_xattr_arr_ptr, &nullptr,
+			 sizeof(x->x_vb_xattr_arr_ptr));
+	if (error != sizeof(x->x_vb_xattr_arr_ptr)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_namelen, &x_namelen,
+			 sizeof(x->x_namelen));
+	if (error != sizeof(x->x_namelen)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_name, (char*)(dent + 1), x_namelen);
+	if (error != x_namelen) goto err_check;
+
+	xc->xc_next_dent = x->x_name + x_namelen;
+	xc->xc_count++;
+	error = 0;
+err_check:
+	return gfs2_xrdir_collect_errcheck(xc, error);
+}
+
+/*
+ * Create the array of pointers that point to all the
+ * collected entries within the xc_dirents vbuf.
+ */
+static int gfs2_xrdir_create_dptrs(struct gfs2_xrdir_ctx *xc)
+{
+	int i;
+	unsigned int namelen;
+	struct gfs2_xdirent *x = NULL;
+
+	BUG_ON(xc->xc_vb_dptrs || xc->xc_count == 0);
+
+	/* allocate the dirent pointers */
+	xc->xc_vb_dptrs = kmalloc(sizeof(struct gfs2_xdirent *) * xc->xc_count,
+				  GFP_KERNEL);
+	if (xc->xc_vb_dptrs == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		if (!x)
+			x = xc->xc_dirents.v_ptr;
+		xc->xc_vb_dptrs[i] = x;
+		vp_read(&xc->xc_dirents, &namelen, &x->x_namelen,
+			sizeof(x->x_namelen));
+		/* 
+		 * reclen is sizeof(struct gfs2_xdirent) + x_namelen.
+		 * see struct gfs2_xdirent for more info
+		 */
+		x = (void *)x->x_name + namelen;
+	}
+	return 0;
+}
+
+static int gfs2_xrdir_collect_xstat(struct gfs2_xrdir_ctx *xc)
+{
+	int i;
+	struct kstat st;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip, sizeof(struct gfs2_inode *));
+		gfs2_getattr_i(ip, &st);
+
+		vp_write(&xc->xc_dirents, &x_vb_p->x_kstat, &st, sizeof(struct kstat));
+		vp_write(&xc->xc_dirents, &x_vb_p->x_eablk, &ip->i_eattr, 
+			 sizeof(x_vb_p->x_eablk));
+	}
+	return 0;
+}
+
+static inline int xattr_requested(char type, unsigned int mask)
+{
+	if ((type == GFS2_EATYPE_USR) && (mask & XSTAT_XATTR_USER))
+		return 1;
+	if ((type == GFS2_EATYPE_SYS) && (mask & XSTAT_XATTR_SYSTEM))
+		return 1;
+	if ((type == GFS2_EATYPE_SECURITY) && (mask & XSTAT_XATTR_SECURITY))
+		return 1;
+	return 0;
+}
+
+static int gfs2_xrdir_xattr_list_i(struct gfs2_inode *ip, 
+				   struct buffer_head *bh,
+				   struct gfs2_ea_header *ea,
+				   struct gfs2_ea_header *prev, void *private)
+{
+	struct gfs2_xdir_ctx_bndle *bundle = private;
+	struct gfs2_xrdir_ctx *xc = bundle->xcb_xc;
+	struct gfs2_xdirent *x = bundle->xcb_xd;
+	struct gfs2_xd_xattr *xtr;
+	char prefix[9];
+	unsigned int l = 0, xtr_count, namlen, reclen;
+	void *p;
+
+	if (!xattr_requested(ea->ea_type, xc->xc_xattr_mask))
+		return 0;
+
+	if (ea->ea_type == GFS2_EATYPE_UNUSED)
+		return 0;
+
+	l = ea_prefix(ea, prefix, 9);
+	BUG_ON(l == 0);
+
+	xtr = vp_get_top(&xc->xc_xattr_keys);
+	/*
+	 * Only certain vp_XXX ops can trip -ENOMEM where we might be extending
+	 * the vbuf. We ignore the error code of other ops.
+	 */
+	if (vp_memset(&xc->xc_xattr_keys, xtr, 0, 
+		      sizeof(struct gfs2_xd_xattr)) == -ENOMEM)
+		goto set_oom;
+
+	/* if mask says don't do values, skip the following lines */
+	if (GFS2_EA_DATA_LEN(ea) > 0 && (xc->xc_xattr_mask & XSTAT_XATTR_VALUES)) {
+		void *valptr = vp_get_top(&xc->xc_xattr_values);
+		unsigned long len = GFS2_EA_DATA_LEN(ea);
+
+		vp_write(&xc->xc_xattr_keys, &xtr->xa_value_len,
+			 &len, sizeof(xtr->xa_value_len));
+		vp_write(&xc->xc_xattr_keys, &xtr->xa_vb_value_ptr, &valptr,
+			 sizeof(void*));
+		vp_read(&xc->xc_xattr_keys, &p, &xtr->xa_vb_value_ptr,
+			sizeof(void*));
+		if (vp_append(&xc->xc_xattr_values, GFS2_EA2DATA(ea), len)
+		    == -ENOMEM)
+			goto set_oom;
+	}
+
+	namlen = l + ea->ea_name_len;
+	vp_write(&xc->xc_xattr_keys, &xtr->xa_keylen, &namlen,
+		 sizeof(xtr->xa_keylen));
+	if (vp_write(&xc->xc_xattr_keys, xtr->xa_keyname, &prefix, l) == -ENOMEM)
+		goto set_oom;
+	if (vp_write(&xc->xc_xattr_keys, xtr->xa_keyname + l, 
+		     GFS2_EA2NAME(ea), namlen) == -ENOMEM)
+		goto set_oom;
+
+	/* gfs2_xd_xattr.xa_keyname[1] has an extra byte */
+	reclen = (xtr->xa_keyname + l + namlen) - (char *)xtr;
+	vp_write(&xc->xc_xattr_keys, &xtr->xa_reclen, &reclen,
+		 sizeof(xtr->xa_reclen));
+
+	vp_read(&xc->xc_dirents, &xtr_count, &x->x_xattr_count,
+		sizeof(x->x_xattr_count));
+	xtr_count++;
+	vp_write(&xc->xc_dirents, &x->x_xattr_count, &xtr_count,
+		 sizeof(x->x_xattr_count));
+
+	return 0;
+set_oom:
+	xc->xc_flags |= XC_FL_ERROR_OOM;
+	return -ENOMEM;
+}
+
+int gfs2_xrdir_collect_xattrs(struct gfs2_xrdir_ctx *xc)
+{
+	int error = 0, i;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *xtop, *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+		struct gfs2_xdir_ctx_bndle bundle;
+		u8 valid = 1;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+
+		if (!ip->i_eattr || !(xc->xc_xattr_mask & XSTAT_XATTR_ALL))
+			goto mark_valid;
+
+		bundle.xcb_xc = xc;
+		bundle.xcb_xd = x_vb_p;
+
+		xtop = vp_get_top(&xc->xc_xattr_keys);
+		vp_write(&xc->xc_dirents, &x_vb_p->x_vb_xattr_arr_ptr, &xtop,
+			 sizeof(struct gfs2_xd_xattr*));
+
+		error = ea_foreach(ip, gfs2_xrdir_xattr_list_i, &bundle);
+		if (error)
+			break;
+	mark_valid:
+		/* Read the xattrs for this dent, so mark it as valid */
+		vp_write(&xc->xc_dirents, &x_vb_p->x_valid, &valid,
+			 sizeof(x_vb_p->x_valid));
+		xc->xc_dent_valid++;
+	}
+	return error;
+}
+
+static int gfs2_xrdir_collect_extra_info(struct gfs2_xrdir_ctx *xc,
+					 struct gfs2_inode *dip)
+{
+	int error = -ENOMEM, i;
+	struct gfs2_holder *ghs;
+
+	/* First sort the dents according to inode blk order for stat */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dent_iblks, NULL);
+	
+	/* Lookup all the inodes for stat info */
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		u64 ino;
+		struct inode *inode;
+		struct gfs2_inode *ip, *nullptr = NULL;
+	
+		vp_read(&xc->xc_dirents, &ino, &x_vb_p->x_ino,
+			sizeof(x_vb_p->x_ino));
+
+		inode = gfs2_lookup_by_inum(GFS2_SB(&dip->i_inode), ino, NULL,
+					    GFS2_BLKST_DINODE);
+		if (IS_ERR(inode)) {
+			vp_write(&xc->xc_dirents, &ip, &nullptr,
+				 sizeof(struct gfs2_inode *));
+			error = -1;
+			goto iput_iarr;
+		}
+		ip = GFS2_I(inode);
+		vp_write(&xc->xc_dirents, &x_vb_p->x_ip, &ip,
+			 sizeof(struct gfs2_inode *));
+	}
+
+	/* lock all inodes */
+	ghs = kcalloc(xc->xc_count, sizeof(struct gfs2_holder), GFP_NOFS);
+	if (!ghs)
+		goto iput_iarr;
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, ghs + i);
+	}
+
+	error = gfs2_glock_nq_m(xc->xc_count, ghs);
+	if (error)
+		goto free_ghs;
+
+	if (gfs2_xrdir_collect_xstat(xc))
+		goto free_ghs;
+
+	/* Sort the dents according to eattr blk order */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dent_eablks, NULL);
+
+	error = gfs2_xrdir_collect_xattrs(xc);
+
+	for (i = 0; i < xc->xc_count; i++)
+		gfs2_glock_dq_uninit(&ghs[i]);
+free_ghs:
+	kfree(ghs);
+iput_iarr:
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+		if (ip)
+			iput(&ip->i_inode);
+	}
+	/* Sort the pointers back to dent order */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dents, NULL);
+
+	if (error == -ENOMEM) {
+		/*
+		 * If at least one dent has been collected in full,
+		 * void -ENOMEM
+		 * We shuffled the order of dents multiple times while
+		 * retrieving stat and xattrs, so we have to ensure that
+		 * at least the first dent in the final ordering is valid
+		 * in order to be able to return at least 1 entry. This
+		 * is because we need to preserve the order (hash order)
+		 * when we return the dents to the user. XXX: OR DO WE??
+		 */
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[0];
+		u8 valid;
+		vp_read(&xc->xc_dirents, &valid, &x_vb_p->x_valid,
+			sizeof(x_vb_p->x_valid));
+
+		if (valid)
+			error = 0;
+		else {
+			u32 hash;
+			vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash,
+				sizeof(hash));
+			xc->xc_offset = gfs2_disk_hash2offset(hash);
+		}
+	}
+	if (!error)
+		xc->xc_flags |= XC_FL_DATA_AVAIL;
+	
+	return error;
+}
+
+static int gfs2_xrdir_to_user_xattrs(struct gfs2_xrdir_ctx *xc,
+				     struct gfs2_xdirent *x, 
+				     struct gfs2_xd_xattr *xdx_vb_p,
+				     struct xdirent_xattr __user *xx,
+				     size_t count, size_t *bytes, char *tempbuf)
+{
+	struct gfs2_xd_xattr xdx;
+	int attrcount = 0, error = -EINVAL;
+	
+	while (attrcount < x->x_xattr_count) {
+		vp_read(&xc->xc_xattr_keys, &xdx, xdx_vb_p,
+			sizeof(struct gfs2_xd_xattr));
+
+		if ((count - *bytes) < 
+		    (sizeof(struct xdirent_xattr) + 
+		     xdx.xa_keylen + xdx.xa_value_len)) {
+			error = -EOVERFLOW;
+			goto out;
+		}
+
+		if (__put_user(xdx.xa_value_len, &xx->xa_value_len))
+			goto out;
+
+		vp_read(&xc->xc_xattr_keys, tempbuf, xdx_vb_p->xa_keyname,
+			xdx.xa_keylen);
+
+		if (copy_to_user(xx->xa_name_val, tempbuf, xdx.xa_keylen))
+			goto out;
+		if (__put_user(0, xx->xa_name_val + xdx.xa_keylen))
+			goto out;
+
+		if ((xc->xc_xattr_mask & XSTAT_XATTR_VALUES) &&
+		    xdx.xa_vb_value_ptr) {
+			vp_read(&xc->xc_xattr_values, tempbuf, xdx.xa_vb_value_ptr,
+				xdx.xa_value_len);
+
+			if (copy_to_user(xx->xa_name_val + xdx.xa_keylen + 1, tempbuf,
+					 xdx.xa_value_len))
+				goto out;
+		}
+
+		xx = (struct xdirent_xattr __user *)
+			((char *)xx + sizeof(xx->xa_value_len)
+			 + xdx.xa_keylen + 1 + xdx.xa_value_len);
+		xdx_vb_p = (void*) xdx_vb_p + xdx.xa_reclen;
+
+		*bytes += sizeof(struct xdirent_xattr) + xdx.xa_keylen +
+			xdx.xa_value_len;
+		attrcount++;
+	}
+	error = 0;
+out:
+	return error;
+}
+
+static int gfs2_xrdir_to_user_vars(struct gfs2_xrdir_ctx *xc,
+				   struct gfs2_xdirent *x,
+				   struct gfs2_xdirent *x_vb_p,
+				   struct linux_xdirent __user *lxd,
+				   size_t count, size_t *bytes)
+{
+	int error = -EINVAL;
+	char *tempbuf = NULL;
+	struct xdirent_blob __user *xblob;
+	struct xdirent_xattr __user *xx;
+	struct gfs2_xd_xattr *xdx_vb_p;
+
+	tempbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!tempbuf) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	xblob = &lxd->xd_blob;
+
+	/* copy all the variable length fields */
+	if ((count - *bytes) < x->x_namelen) {
+		error = -EOVERFLOW;
+		goto free;
+	}
+
+	vp_read(&xc->xc_dirents, tempbuf, x_vb_p->x_name, x->x_namelen);
+
+	if (copy_to_user(xblob->xb_blob, tempbuf, x->x_namelen))
+		goto free;
+	if (__put_user(0, xblob->xb_blob + x->x_namelen))
+		goto free;
+
+	*bytes += x->x_namelen;
+	error = 0;
+
+	if ((xc->xc_xattr_mask & XSTAT_XATTR_ALL) &&
+		lxd->xd_blob.xb_xattr_count) {
+		xx = (struct xdirent_xattr __user *)
+			(xblob->xb_blob + x->x_namelen + 1);
+		xdx_vb_p = x->x_vb_xattr_arr_ptr;
+
+		error = gfs2_xrdir_to_user_xattrs(xc, x, xdx_vb_p, xx,
+						  count, bytes, tempbuf);
+	}
+free:
+	kfree(tempbuf);
+out:
+	return error;
+}
+
+static int gfs2_xrdir_to_user_fixed(struct gfs2_xrdir_ctx *xc,
+				    struct gfs2_xdirent *x,
+				    struct gfs2_xdirent *x_vb_p,
+				    struct linux_xdirent __user *lxd, 
+				    size_t count, size_t *bytes)
+{
+	struct xdirent_blob __user *xblob;
+	int error = -EINVAL;
+
+	vp_read(&xc->xc_dirents, x, x_vb_p, sizeof(struct gfs2_xdirent));
+
+	if ((count - *bytes) < sizeof(struct linux_xdirent)) {
+		error = -EOVERFLOW;
+		goto out;
+	}
+
+	if (__put_user(x->x_ino, &lxd->xd_ino))
+		goto out;
+	if (__put_user(x->x_type, &lxd->xd_type))
+		goto out;
+	if (__put_user(0, &lxd->xd_off))
+		goto out;
+
+	error = xstat_set_result(&x->x_kstat, &lxd->xd_stat);
+	if (error)
+		goto out;
+
+	xblob = &lxd->xd_blob;
+
+	error = -EINVAL;
+	if (__put_user(x->x_xattr_count, &xblob->xb_xattr_count))
+		goto out;
+
+	/* copied all the fixed size fields */
+	*bytes += sizeof(struct linux_xdirent);
+	error = 0;
+out:
+	return error;
+}
+
+static size_t gfs2_xrdir_to_user(struct gfs2_xrdir_ctx *xc, void __user *buf,
+				 size_t count)
+{
+	size_t error = -EINVAL, bytes = 0, bytes_bef = 0;
+	int i, skip = 1, written = 0;
+	struct gfs2_xdirent x, *x_vb_p;
+	struct linux_xdirent __user *lxd = buf;
+	u8 valid;
+
+	if (!(xc->xc_flags & XC_FL_DATA_AVAIL))
+		goto out;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		u32 hash;
+		x_vb_p = xc->xc_vb_dptrs[i];
+		vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash, sizeof(hash));
+
+		if (skip && xc->xc_vb_dptrs[i] != xc->xc_next_dent)
+			continue;
+		skip = 0;
+		vp_read(&xc->xc_dirents, &valid, &x_vb_p->x_valid,
+			sizeof(x_vb_p->x_valid));
+		if (!valid)
+			break;
+
+		/* This will fill up x from x_vb_p and subsequently lxd from x */
+		error = gfs2_xrdir_to_user_fixed(xc, &x, x_vb_p, lxd, count,
+						 &bytes);
+		if (error) {
+			if (error == -EOVERFLOW)
+				goto overflow;
+			goto out;
+		}
+
+		error = gfs2_xrdir_to_user_vars(xc, &x, x_vb_p, lxd, count,
+						&bytes);
+		if (error) {
+			u64 ino;
+			vp_read(&xc->xc_dirents, &ino, &x_vb_p->x_ino, sizeof(ino));
+			if (error == -EOVERFLOW)
+				goto overflow;
+			goto out;
+		}
+
+		if (__put_user(bytes - bytes_bef, &lxd->xd_reclen))
+			goto out;
+
+		lxd = (void *)lxd + (bytes - bytes_bef);
+		xc->xc_next_dent = xc->xc_vb_dptrs[i+1];
+		written++;
+		bytes_bef = bytes;
+	}
+overflow:
+	if (written) {
+		if (!valid) {
+			u32 hash;
+			x_vb_p = xc->xc_vb_dptrs[i];
+			vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash,
+				sizeof(hash));
+			/*
+			 * Some of the entries we collected were incomplete,
+			 * so we only wrote the ones that were complete. For
+			 * next time, we'll only try to collect half the 
+			 * number of entries. This will also invalidate the
+			 * assumption that we'll encounter hash-colliding
+			 * entries in the next pass
+			 */
+			xc->xc_offset = gfs2_disk_hash2offset(hash);
+			xc->xc_flags &= ~(XC_FL_GATHER_PART_INT |
+					  XC_FL_DATA_AVAIL |
+					  XC_FL_HASH_COLL |
+					  XC_FL_HASH_COLL_NXT);
+			xc->xc_hash_coll_off = 0;
+			xc->xc_dent_cap = DIV_ROUND_UP(xc->xc_count, 2);
+		} else {
+			/*
+			 * If we didn't overflow the user buffer, we
+			 * have written out all the collected dents to
+			 * the user buffer
+			 */
+			if (error != -EOVERFLOW) {
+				xc->xc_flags &= ~(XC_FL_GATHER_PART_INT |
+						  XC_FL_DATA_AVAIL);
+				xc->xc_dent_cap = 0;
+				if (!(xc->xc_flags & XC_FL_HASH_COLL))
+					xc->xc_offset++;
+			}
+		}
+	}
+	if (!written && !skip) {
+		error = -EOVERFLOW;
+		goto out;
+	}
+	error = bytes_bef;
+out:
+	return error;
+}
+
+/**
+ * gfs2_xreaddir - GFS2's implementation of xreaddir functionality
+ * @file  : The directory to xreaddir
+ * @flags : flags used by xstat
+ * @mask  : field mask for xstat and xattrs
+ * @buf   : User buffer to fill data into
+ * @count : Size of the user buffer in bytes
+ *
+ * Collect extended information (xstat, xattrs) about the dents in the
+ * given directory and fill them into the user buf passed in.
+ *
+ * Returns: 0       if successful.
+ *          -EAGAIN if the user should retry.
+ *          -ve values for other errors
+ */
+
+size_t gfs2_xreaddir(struct file *file, unsigned int flags, unsigned int mask,
+			    void __user *buf, size_t count)
+{
+	struct gfs2_xrdir_ctx *xc = ((struct gfs2_file *)
+				     file->private_data)->f_xrctx;
+	size_t error = 0;
+	struct inode *dir = file->f_mapping->host;
+	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_holder d_gh;
+
+	if (xc->xc_flags & XC_FL_DATA_AVAIL) {
+		error = gfs2_xrdir_to_user(xc, buf, count);
+		file->f_pos = xc->xc_offset;
+		return error;
+	}
+
+	error = gfs2_xrdir_ctx_setup(file, xc, flags, mask);
+	if (error)
+		goto out;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	error = gfs2_glock_nq(&d_gh);
+	if (error) {
+		gfs2_holder_uninit(&d_gh);
+		goto out;
+	}
+
+	xc->xc_flags &= ~XC_FL_HASH_COLL;
+	error = gfs2_dir_read(dir, NULL, xc, &file->f_ra);
+	if (error) {
+		if (xc->xc_flags & XC_FL_ERROR_OOM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	if (xc->xc_count == 0)
+		goto uninit;
+	
+	if (!(xc->xc_flags & XC_FL_GATHER_PARTS))
+		xc->xc_flags |= XC_FL_GATHER_FULL;
+	else if (!(xc->xc_flags & XC_FL_GATHER_PART_INT))
+		xc->xc_flags |= XC_FL_GATHER_PART_END;
+
+	error = gfs2_xrdir_create_dptrs(xc);
+	if (error) {
+		if (error == -ENOMEM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	error = gfs2_xrdir_collect_extra_info(xc, dip);
+	if (error) {
+		if (error == -ENOMEM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	xc->xc_next_dent = xc->xc_vb_dptrs[0];
+	error = gfs2_xrdir_to_user(xc, buf, count);
+
+	file->f_pos = xc->xc_offset;
+uninit:
+	if (xc->xc_flags & XC_FL_HASH_COLL && !(xc->xc_flags & XC_FL_DATA_AVAIL))
+		xc->xc_flags &= ~XC_FL_HASH_COLL;
+
+	gfs2_glock_dq_uninit(&d_gh);
+out:
+	return error;
+}
diff --git a/fs/gfs2/xreaddir.h b/fs/gfs2/xreaddir.h
new file mode 100644
index 0000000..ea6c82c
--- /dev/null
+++ b/fs/gfs2/xreaddir.h
@@ -0,0 +1,84 @@
+#ifndef __XREADDIR_H__
+#define __XREADDIR_H__
+
+struct gfs2_xd_xattr {
+	unsigned int   xa_reclen;
+	void          *xa_vb_value_ptr;
+	unsigned long  xa_value_len;
+	unsigned int   xa_keylen;
+	char           __pad[7];
+	char           xa_keyname[1];
+};
+
+struct gfs2_xdirent {
+	u32                      x_hash;
+	u8                       x_valid;
+	struct gfs2_inode       *x_ip;
+	u64                      x_ino;
+	u64                      x_eablk;
+	char                     x_type;
+	struct kstat             x_kstat;
+	unsigned int             x_xattr_count;
+	void                    *x_vb_xattr_arr_ptr;
+	unsigned int             x_namelen;
+	char                     x_name[1];
+};
+
+#define XC_FL_ALLOCATED                 0x00000001
+#define XC_FL_GATHER_FULL               0x00000002
+#define XC_FL_GATHER_PARTS              0x00000004
+#define XC_FL_GATHER_PART_INT           0x00000008
+#define XC_FL_GATHER_PART_END           0x00000010
+#define XC_FL_HASH_COLL                 0x00000020
+#define XC_FL_HASH_COLL_NXT             0x00000040
+#define XC_FL_ERROR_OOM                 0x00000080
+#define XC_FL_ERROR                     0x00000100
+#define XC_FL_DATA_AVAIL                0x00000200
+#define XC_FL_PRINTOK                   0x10000000
+
+/*
+ * readdir ctx
+ */
+struct gfs2_xrdir_ctx {
+	u32                   xc_flags;           /* XC_FL_XXXX */
+	u64                   xc_dent_memcap;     /* mem limit per collect */
+	u32                   xc_dent_cap;        /* # dent limit per collect */
+	u32                   xc_dent_valid;      /* # valid dents collected */
+	u32                   xc_xattr_mask;      /* XSTAT_XATTR_XXX see stat.h*/
+	u32                   xc_xst_flags;       /* XSTAT_XXX see stat.h */
+	loff_t                xc_offset;          /* offset of next dent */
+	unsigned long         xc_count;           /* # dents collected */
+	loff_t                xc_hash_coll_off;   /* last hash collision offset */
+	void                 *xc_next_dent;       /* next dent to write out */
+	void                **xc_vb_dptrs;        /* ptrs to dents in xc_dirents */
+	struct vbuf           xc_dirents;         /* temp storage for dents */
+	struct vbuf           xc_xattr_keys;      /* xattr keys for dents */
+	struct vbuf           xc_xattr_values;    /* corresponding values */
+};
+
+/*
+ * Ugly struct to blob together these two
+ * structs. Only used in one place to 
+ * retrieve extended attributes.
+ * This is so that we don't have to change
+ * the prototypes of all the existing
+ * xattr handling functions to accept an
+ * extra arg.
+ */
+struct gfs2_xdir_ctx_bndle {
+	struct gfs2_xrdir_ctx *xcb_xc;
+	struct gfs2_xdirent   *xcb_xd;
+};
+
+extern size_t gfs2_xreaddir(struct file *file, unsigned int flags,
+			    unsigned int mask, void __user *buf,
+			    size_t count);
+extern int gfs2_xrdir_collect_dents(const struct gfs2_dirent *dent, loff_t off,
+				    struct gfs2_xrdir_ctx *xc);
+extern void gfs2_xrdir_partial_collect(struct gfs2_xrdir_ctx *xc);
+extern int gfs2_xrdir_collect_xattrs(struct gfs2_xrdir_ctx *xc);
+
+extern int gfs2_xrdir_ctx_init(struct gfs2_file *fp, struct gfs2_sbd *sdp);
+extern void gfs2_xrdir_ctx_uninit(struct gfs2_file *fp);
+
+#endif /* __XREADDIR_H_ */
-- 
1.8.1.4


WARNING: multiple messages have this Message-ID (diff)
From: Abhi Das <adas@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [RFC PATCH 5/5] gfs2: Add xreaddir file operation and supporting functions
Date: Fri, 25 Jul 2014 12:38:08 -0500	[thread overview]
Message-ID: <1406309888-10749-6-git-send-email-adas@redhat.com> (raw)
In-Reply-To: <1406309888-10749-1-git-send-email-adas@redhat.com>

This patch adds support in GFS2 for the xgetdents syscall by
implementing the xreaddir file operation.

GFS2 uses vbufs (buffer backed by a vector of pages) to store
intermediate data like dirents, stat info and extended attribute
keys/values to eventually bundle them into a container structure
to return to the user.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/Makefile     |    3 +-
 fs/gfs2/dir.c        |   80 ++--
 fs/gfs2/dir.h        |   13 +-
 fs/gfs2/export.c     |    2 +-
 fs/gfs2/file.c       |   17 +-
 fs/gfs2/incore.h     |    6 +
 fs/gfs2/inode.c      |    3 +-
 fs/gfs2/inode.h      |    5 +
 fs/gfs2/ops_fstype.c |    4 +
 fs/gfs2/sys.c        |   26 +-
 fs/gfs2/util.c       |    9 +
 fs/gfs2/xattr.c      |   27 +-
 fs/gfs2/xattr.h      |   23 ++
 fs/gfs2/xreaddir.c   | 1024 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/xreaddir.h   |   84 +++++
 15 files changed, 1260 insertions(+), 66 deletions(-)
 create mode 100644 fs/gfs2/xreaddir.c
 create mode 100644 fs/gfs2/xreaddir.h

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 8612820..da8253b 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -4,7 +4,8 @@ gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
 	glops.o log.o lops.o main.o meta_io.o \
 	aops.o dentry.o export.o file.o \
 	ops_fstype.o inode.o quota.o \
-	recovery.o rgrp.o super.o sys.o trans.o util.o
+	recovery.o rgrp.o super.o sys.o \
+	trans.o util.o xreaddir.o
 
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
 
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 1a349f9..21f5926 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -74,15 +74,13 @@
 #include "trans.h"
 #include "bmap.h"
 #include "util.h"
+#include "xreaddir.h"
 
 #define IS_LEAF     1 /* Hashed (leaf) directory */
 #define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
 
 #define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
 
-#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
-#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
-
 struct qstr gfs2_qdot __read_mostly;
 struct qstr gfs2_qdotdot __read_mostly;
 
@@ -1185,17 +1183,13 @@ out_kfree:
  *   lt: returns -1
  *   eq: returns 0
  */
-
-static int compare_dents(const void *a, const void *b)
+int compare_dents_i(const struct gfs2_dirent *dent_a,
+		  const struct gfs2_dirent *dent_b)
 {
-	const struct gfs2_dirent *dent_a, *dent_b;
 	u32 hash_a, hash_b;
 	int ret = 0;
 
-	dent_a = *(const struct gfs2_dirent **)a;
 	hash_a = be32_to_cpu(dent_a->de_hash);
-
-	dent_b = *(const struct gfs2_dirent **)b;
 	hash_b = be32_to_cpu(dent_b->de_hash);
 
 	if (hash_a > hash_b)
@@ -1217,6 +1211,12 @@ static int compare_dents(const void *a, const void *b)
 	return ret;
 }
 
+int compare_dents(const void *a, const void *b)
+{
+	return compare_dents_i(*(const struct gfs2_dirent **)a,
+			       *(const struct gfs2_dirent **)b);
+}
+
 /**
  * do_filldir_main - read out directory entries
  * @dip: The GFS2 inode
@@ -1234,13 +1234,14 @@ static int compare_dents(const void *a, const void *b)
  */
 
 static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
-			   const struct gfs2_dirent **darr, u32 entries,
-			   int *copied)
+			   struct gfs2_xrdir_ctx *xc, const struct gfs2_dirent **darr,
+			   u32 entries, int *copied)
 {
 	const struct gfs2_dirent *dent, *dent_next;
 	u64 off, off_next;
+	u64 *dst_pos = xc ? &xc->xc_offset : &ctx->pos;
 	unsigned int x, y;
-	int run = 0;
+	int run = 0, error = 0;
 
 	sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
 
@@ -1256,29 +1257,39 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
 			dent_next = darr[y];
 			off_next = be32_to_cpu(dent_next->de_hash);
 			off_next = gfs2_disk_hash2offset(off_next);
-
-			if (off < ctx->pos)
+			if (off < *dst_pos)
 				continue;
-			ctx->pos = off;
+
+			*dst_pos = off;
 
 			if (off_next == off) {
-				if (*copied && !run)
+				if (*copied && !run) {
+					if (xc)
+						gfs2_xrdir_partial_collect(xc);
 					return 1;
+				}
 				run = 1;
 			} else
 				run = 0;
 		} else {
-			if (off < ctx->pos)
+			if (off < *dst_pos)
 				continue;
-			ctx->pos = off;
+			*dst_pos = off;
 		}
 
-		if (!dir_emit(ctx, (const char *)(dent + 1),
-				be16_to_cpu(dent->de_name_len),
-				be64_to_cpu(dent->de_inum.no_addr),
-				be16_to_cpu(dent->de_type)))
-			return 1;
-
+		if (xc) {
+			error = gfs2_xrdir_collect_dents(dent, off, xc);
+			if (error) {
+				gfs2_xrdir_partial_collect(xc);
+				return 1;
+			}
+		} else {
+			if (!dir_emit(ctx, (const char *)(dent + 1),
+				      be16_to_cpu(dent->de_name_len),
+				      be64_to_cpu(dent->de_inum.no_addr),
+				      be16_to_cpu(dent->de_type)))
+				return 1;
+		}
 		*copied = 1;
 	}
 
@@ -1286,8 +1297,7 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
 	   do_filldir fxn, we get the next entry instead of the last one in the
 	   current leaf */
 
-	ctx->pos++;
-
+	(*dst_pos)++;
 	return 0;
 }
 
@@ -1311,8 +1321,8 @@ static void gfs2_free_sort_buffer(void *ptr)
 }
 
 static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
-			      int *copied, unsigned *depth,
-			      u64 leaf_no)
+			      struct gfs2_xrdir_ctx *xc, int *copied,
+			      unsigned *depth, u64 leaf_no)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1389,7 +1399,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 	} while(lfn);
 
 	BUG_ON(entries2 != entries);
-	error = do_filldir_main(ip, ctx, darr, entries, copied);
+	error = do_filldir_main(ip, ctx, xc, darr, entries, copied);
 out_free:
 	for(i = 0; i < leaf; i++)
 		brelse(larr[i]);
@@ -1454,7 +1464,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
  */
 
 static int dir_e_read(struct inode *inode, struct dir_context *ctx,
-		      struct file_ra_state *f_ra)
+		      struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra)
 {
 	struct gfs2_inode *dip = GFS2_I(inode);
 	u32 hsize, len = 0;
@@ -1465,7 +1475,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 	unsigned depth = 0;
 
 	hsize = 1 << dip->i_depth;
-	hash = gfs2_dir_offset2hash(ctx->pos);
+	hash = gfs2_dir_offset2hash(xc ? xc->xc_offset : ctx->pos);
 	index = hash >> (32 - dip->i_depth);
 
 	if (dip->i_hash_cache == NULL)
@@ -1477,7 +1487,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 	gfs2_dir_readahead(inode, hsize, index, f_ra);
 
 	while (index < hsize) {
-		error = gfs2_dir_read_leaf(inode, ctx,
+		error = gfs2_dir_read_leaf(inode, ctx, xc,
 					   &copied, &depth,
 					   be64_to_cpu(lp[index]));
 		if (error)
@@ -1493,7 +1503,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
 }
 
 int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
-		  struct file_ra_state *f_ra)
+		  struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra)
 {
 	struct gfs2_inode *dip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1507,7 +1517,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
 		return 0;
 
 	if (dip->i_diskflags & GFS2_DIF_EXHASH)
-		return dir_e_read(inode, ctx, f_ra);
+		return dir_e_read(inode, ctx, xc, f_ra);
 
 	if (!gfs2_is_stuffed(dip)) {
 		gfs2_consist_inode(dip);
@@ -1539,7 +1549,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
 			error = -EIO;
 			goto out;
 		}
-		error = do_filldir_main(dip, ctx, darr,
+		error = do_filldir_main(dip, ctx, xc, darr,
 					dip->i_entries, &copied);
 out:
 		kfree(darr);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 126c65d..8d40590 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -12,6 +12,10 @@
 
 #include <linux/dcache.h>
 #include <linux/crc32.h>
+#include "util.h"
+
+#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
+#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
 
 struct inode;
 struct gfs2_inode;
@@ -25,6 +29,13 @@ struct gfs2_diradd {
 	struct buffer_head *bh;
 };
 
+typedef int (*process_dent_t)(const struct gfs2_dirent *, loff_t, void *, filldir_t);
+extern int compare_dents_i(const struct gfs2_dirent *dent_a,
+			   const struct gfs2_dirent *dent_b);
+extern int foreach_dent(u64 *offset, void *opaque, filldir_t filldir,
+			const struct gfs2_dirent **darr, u32 entries,
+			int *copied, process_dent_t pd_fn);
+ 
 extern struct inode *gfs2_dir_search(struct inode *dir,
 				     const struct qstr *filename,
 				     bool fail_on_exist);
@@ -40,7 +51,7 @@ static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
 }
 extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
 extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
-			 struct file_ra_state *f_ra);
+			 struct gfs2_xrdir_ctx *xc, struct file_ra_state *f_ra);
 extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 			  const struct gfs2_inode *nip, unsigned int new_type);
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 8b9b377..1f5085d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -114,7 +114,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra);
+	error = gfs2_dir_read(dir, &gnfd.ctx, NULL, &f_ra);
 
 	gfs2_glock_dq_uninit(&gh);
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26b3f95..d2d7561f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -16,6 +16,8 @@
 #include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/stat.h>
+#include <linux/sort.h>
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/falloc.h>
@@ -40,6 +42,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "xreaddir.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
@@ -100,7 +103,7 @@ static int gfs2_readdir(struct file *file, struct dir_context *ctx)
 	if (error)
 		return error;
 
-	error = gfs2_dir_read(dir, ctx, &file->f_ra);
+	error = gfs2_dir_read(dir, ctx, NULL, &file->f_ra);
 
 	gfs2_glock_dq_uninit(&d_gh);
 
@@ -562,8 +565,13 @@ int gfs2_open_common(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	mutex_init(&fp->f_fl_mutex);
-
 	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
+
+	if (S_ISDIR(inode->i_mode)) {
+		ret = gfs2_xrdir_ctx_init(fp, GFS2_SB(inode));
+		if (ret)
+			return ret;
+	}
 	file->private_data = fp;
 	return 0;
 }
@@ -617,6 +625,9 @@ static int gfs2_release(struct inode *inode, struct file *file)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
+	if (S_ISDIR(ip->i_inode.i_mode))
+		gfs2_xrdir_ctx_uninit((struct gfs2_file *)file->private_data);
+
 	kfree(file->private_data);
 	file->private_data = NULL;
 
@@ -1075,6 +1086,7 @@ const struct file_operations gfs2_file_fops = {
 
 const struct file_operations gfs2_dir_fops = {
 	.iterate	= gfs2_readdir,
+	.xreaddir       = gfs2_xreaddir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
 	.release	= gfs2_release,
@@ -1105,6 +1117,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 
 const struct file_operations gfs2_dir_fops_nolock = {
 	.iterate	= gfs2_readdir,
+	.xreaddir       = gfs2_xreaddir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
 	.release	= gfs2_release,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67d310c..f86b6d3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -414,6 +414,7 @@ static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
 struct gfs2_file {
 	struct mutex f_fl_mutex;
 	struct gfs2_holder f_fl_gh;
+	struct gfs2_xrdir_ctx *f_xrctx;
 };
 
 struct gfs2_revoke_replay {
@@ -570,6 +571,8 @@ struct gfs2_tune {
 	unsigned int gt_complain_secs;
 	unsigned int gt_statfs_quantum;
 	unsigned int gt_statfs_slow;
+	unsigned int gt_max_vb_pages; /* Max pages to utilize for vector-page buffers */
+	unsigned int gt_max_xrdir_dents; /* Maximum dents to process per collect cycle (conserves memory) */
 };
 
 enum {
@@ -812,6 +815,9 @@ struct gfs2_sbd {
 	struct dentry *debugfs_dentry_glocks;
 	struct dentry *debugfs_dentry_glstats;
 	struct dentry *debugfs_dentry_sbstats;
+
+	/* Vector Pages accounting */
+	atomic_t sd_vb_page_count;
 };
 
 static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..46c3602 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1833,7 +1833,8 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		}
 	}
 
-	generic_fillattr(inode, stat);
+	gfs2_getattr_i(ip, stat);
+
 	if (unlock)
 		gfs2_glock_dq_uninit(&gh);
 	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index ba4d949..665f508 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -93,6 +93,11 @@ err:
 	return -EIO;
 }
 
+static inline void gfs2_getattr_i(struct gfs2_inode *ip, struct kstat *stat)
+{
+	generic_fillattr(&ip->i_inode, stat);
+}
+
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
 				       u64 no_addr, u64 no_formal_ino,
 				       int non_block);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0..2d541ba 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -60,6 +60,8 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	gt->gt_new_files_jdata = 0;
 	gt->gt_max_readahead = 1 << 18;
 	gt->gt_complain_secs = 10;
+	gt->gt_max_vb_pages = 65536;
+	gt->gt_max_xrdir_dents = 25000;
 }
 
 static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -135,6 +137,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	atomic_set(&sdp->sd_frozen_root, 0);
 	init_waitqueue_head(&sdp->sd_frozen_root_wait);
 
+	atomic_set(&sdp->sd_vb_page_count, 0);
+
 	return sdp;
 }
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 3ab566b..279aa86 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -548,8 +548,8 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
 	return len;
 }
 
-static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
-			int check_zero, const char *buf, size_t len)
+static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, int check_zero,
+			unsigned int min, unsigned int max, const char *buf, size_t len)
 {
 	struct gfs2_tune *gt = &sdp->sd_tune;
 	unsigned int x;
@@ -562,6 +562,12 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
 	if (check_zero && !x)
 		return -EINVAL;
 
+	if (min && x < min)
+		return -EINVAL;
+
+	if (max && x > max)
+		return -EINVAL;
+
 	spin_lock(&gt->gt_spin);
 	*field = x;
 	spin_unlock(&gt->gt_spin);
@@ -578,13 +584,21 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
 }                                                                             \
 TUNE_ATTR_3(name, name##_show, store)
 
-#define TUNE_ATTR(name, check_zero)                                           \
+#define TUNE_ATTR(name, check_zero)                                                \
+static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)     \
+{                                                                                  \
+	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, 0, 0, buf, len); \
+}                                                                                  \
+TUNE_ATTR_2(name, name##_store)
+
+#define TUNE_ATTR_B(name, min, max)                                           \
 static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 {                                                                             \
-	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len);  \
+	return tune_set(sdp, &sdp->sd_tune.gt_##name, 0, min, max, buf, len); \
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
+
 TUNE_ATTR(quota_warn_period, 0);
 TUNE_ATTR(quota_quantum, 0);
 TUNE_ATTR(max_readahead, 0);
@@ -593,6 +607,8 @@ TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(statfs_quantum, 1);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
+TUNE_ATTR_B(max_vb_pages, 32, 8388608); /* total capacity can be 128K to 32G bytes */
+TUNE_ATTR(max_xrdir_dents, 0);
 
 static struct attribute *tune_attrs[] = {
 	&tune_attr_quota_warn_period.attr,
@@ -603,6 +619,8 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_statfs_quantum.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
+	&tune_attr_max_vb_pages.attr,
+	&tune_attr_max_xrdir_dents.attr,
 	NULL,
 };
 
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 2c1aee3..793f69e 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -301,6 +301,9 @@ static int vp_extend(struct vp_ctx *vpx, int size)
 {
 	struct gfs2_sbd *sdp = vpx->vp_sdp;
 
+	if ((gfs2_tune_get(sdp, gt_max_vb_pages)
+	     - atomic_read(&sdp->sd_vb_page_count)) < size)
+		goto out;
 	/* first make room for more pointers */
 	if (size <= 0)
 		return -EINVAL;
@@ -317,6 +320,7 @@ static int vp_extend(struct vp_ctx *vpx, int size)
 		goto out;
 
 	vpx->vp_size += size;
+	atomic_add(size, &sdp->sd_vb_page_count);
 	return 0;
 out:
 	return -ENOMEM;
@@ -328,6 +332,9 @@ int vp_init(struct gfs2_sbd *sdp, struct vbuf *vb, int init_cap)
 	struct vp_ctx *vpx;
 
 	cap = DIV_ROUND_UP(init_cap, PAGE_SIZE);
+	if ((gfs2_tune_get(sdp, gt_max_vb_pages)
+	     - atomic_read(&sdp->sd_vb_page_count)) < cap)
+		goto out;
 
 	vpx = kmalloc(sizeof(struct vp_ctx), GFP_KERNEL);
 	if (vpx == NULL)
@@ -344,6 +351,7 @@ int vp_init(struct gfs2_sbd *sdp, struct vbuf *vb, int init_cap)
 
 	vpx->vp_baseptr = vpx->vp_top = page_address(vpx->vp_pages[0]);
 	vpx->vp_sdp = sdp;
+	atomic_add(cap, &sdp->sd_vb_page_count);
 	vb->v_ptr = vpx->vp_baseptr;
 	vb->v_opaque = vpx;
 
@@ -373,6 +381,7 @@ void vp_uninit(struct vbuf *vb)
 
 	vp_free_pages(vpx);
 	kfree(vpx->vp_pages);
+	atomic_sub(vpx->vp_size, &vpx->vp_sdp->sd_vb_page_count);
 	kfree(vpx);
 	vb->v_ptr = vb->v_opaque = NULL;
 }
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f78..f156b21 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -11,6 +11,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/sort.h>
 #include <linux/xattr.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/posix_acl_xattr.h>
@@ -19,6 +20,7 @@
 #include "gfs2.h"
 #include "incore.h"
 #include "acl.h"
+#include "dir.h"
 #include "xattr.h"
 #include "glock.h"
 #include "inode.h"
@@ -27,6 +29,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "xreaddir.h"
 
 /**
  * ea_calc_size - returns the acutal number of bytes the request will take up
@@ -72,10 +75,6 @@ static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
 	return 0;
 }
 
-typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
-			  struct gfs2_ea_header *ea,
-			  struct gfs2_ea_header *prev, void *private);
-
 static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
 			ea_call_t ea_call, void *data)
 {
@@ -113,7 +112,7 @@ fail:
 	return -EIO;
 }
 
-static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
+int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 {
 	struct buffer_head *bh, *eabh;
 	__be64 *eablk, *end;
@@ -374,28 +373,14 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
 		return 0;
 
 	if (er->er_data_len) {
-		char *prefix = NULL;
+		char prefix[9];
 		unsigned int l = 0;
 		char c = 0;
 
 		if (ei->ei_size + ea_size > er->er_data_len)
 			return -ERANGE;
 
-		switch (ea->ea_type) {
-		case GFS2_EATYPE_USR:
-			prefix = "user.";
-			l = 5;
-			break;
-		case GFS2_EATYPE_SYS:
-			prefix = "system.";
-			l = 7;
-			break;
-		case GFS2_EATYPE_SECURITY:
-			prefix = "security.";
-			l = 9;
-			break;
-		}
-
+		l = ea_prefix(ea, prefix, 9);
 		BUG_ON(l == 0);
 
 		memcpy(er->er_data + ei->ei_size, prefix, l);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index d392f83..c09f090 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -10,6 +10,8 @@
 #ifndef __EATTR_DOT_H__
 #define __EATTR_DOT_H__
 
+#include "dir.h"
+
 struct gfs2_inode;
 struct iattr;
 
@@ -53,9 +55,30 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
+static __inline__ int ea_prefix(struct gfs2_ea_header *ea, char *buf, int size)
+{
+	BUG_ON(size < 9);
+	switch (ea->ea_type) {
+	case GFS2_EATYPE_USR:
+		strncpy(buf, "user.", 5);
+		return 5;
+	case GFS2_EATYPE_SYS:
+		strncpy(buf, "system.", 7);
+		return 7;
+	case GFS2_EATYPE_SECURITY:
+		strncpy(buf, "security.", 9);
+		return 9;
+	}
+	return 0;
+}
+
 extern int __gfs2_xattr_set(struct inode *inode, const char *name,
 			    const void *value, size_t size,
 			    int flags, int type);
+typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
+			  struct gfs2_ea_header *ea,
+			  struct gfs2_ea_header *prev, void *private);
+extern int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data);
 extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
 extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/xreaddir.c b/fs/gfs2/xreaddir.c
new file mode 100644
index 0000000..44e0232
--- /dev/null
+++ b/fs/gfs2/xreaddir.c
@@ -0,0 +1,1024 @@
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/stat.h>
+#include <linux/sort.h>
+#include <linux/fs.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
+#include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <asm/uaccess.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "bmap.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+#include "util.h"
+#include "xattr.h"
+#include "xreaddir.h"
+
+static int gfs2_dirent_dot_or_dotdot(const struct gfs2_dirent *dent)
+{
+	const char *name = (char *)(dent + 1);
+
+	if (be16_to_cpu(dent->de_type) == DT_DIR) {
+		if (be16_to_cpu(dent->de_name_len) == 1 && name[0] == '.')
+			return 1;
+		if (be16_to_cpu(dent->de_name_len) == 2 &&
+		    strncmp(name, "..", 2) == 0)
+			return 1;
+	}
+	return 0;   
+}
+
+/*
+ * Compare the inode blocks of two entries
+ */
+int ctx_compare_dent_iblks(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u64 a_blkno, b_blkno;
+
+	vp_read(&xc->xc_dirents, &a_blkno, &a_vb_p->x_ino, sizeof(u64));
+	vp_read(&xc->xc_dirents, &b_blkno, &b_vb_p->x_ino, sizeof(u64));
+
+	if (a_blkno > b_blkno)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Compare the xattr blocks of two entries
+ */
+int ctx_compare_dent_eablks(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u64 a_blkno, b_blkno;
+
+	vp_read(&xc->xc_dirents, &a_blkno, &a_vb_p->x_eablk, sizeof(u64));
+	vp_read(&xc->xc_dirents, &b_blkno, &b_vb_p->x_eablk, sizeof(u64));
+
+	if (a_blkno > b_blkno)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Compare two entries based on their hash value
+ */
+int ctx_compare_dents(void *opaque, const void *a, const void *b)
+{
+	struct gfs2_xrdir_ctx *xc = opaque;
+	const struct gfs2_xdirent *a_vb_p = *(struct gfs2_xdirent **)a;
+	const struct gfs2_xdirent *b_vb_p = *(struct gfs2_xdirent **)b;
+	u32 a_hash, b_hash;
+	int ret = 0;
+
+	vp_read(&xc->xc_dirents, &a_hash, &a_vb_p->x_hash, sizeof(u32));
+	vp_read(&xc->xc_dirents, &b_hash, &b_vb_p->x_hash, sizeof(u32));
+
+	if (a_hash > b_hash)
+		ret = 1;
+	else if (a_hash < b_hash)
+		ret = -1;
+	else {
+		unsigned int len_a, len_b;
+		vp_read(&xc->xc_dirents, &len_a, &a_vb_p->x_namelen, sizeof(unsigned int));
+		vp_read(&xc->xc_dirents, &len_b, &b_vb_p->x_namelen, sizeof(unsigned int));
+
+		if (len_a > len_b)
+			ret = 1;
+		else if (len_a < len_b)
+			ret = -1;
+		else {
+			char *a, *b, *buf;
+			buf = kmalloc(len_a * 2, GFP_KERNEL);
+			if (buf == NULL) {
+				ret = 0;
+				goto out;
+			}
+			a = buf;
+			b = buf + len_a;
+
+			vp_read(&xc->xc_dirents, a, a_vb_p->x_name, len_a);
+			vp_read(&xc->xc_dirents, b, b_vb_p->x_name, len_b);
+
+			ret = memcmp(a, b, len_a);
+
+			kfree(buf);
+		}
+	}
+out:
+	return ret;
+}
+
+void gfs2_xrdir_ctx_uninit(struct gfs2_file *fp)
+{
+	struct gfs2_xrdir_ctx *xc;
+
+	if (!fp || !fp->f_xrctx)
+		return;
+
+	xc = fp->f_xrctx;
+	if (xc->xc_vb_dptrs)
+		kfree(xc->xc_vb_dptrs);
+	vp_uninit(&xc->xc_xattr_values);
+	vp_uninit(&xc->xc_xattr_keys);
+	vp_uninit(&xc->xc_dirents);
+	kfree(xc);
+	fp->f_xrctx = NULL;
+}
+
+int gfs2_xrdir_ctx_init(struct gfs2_file *fp, struct gfs2_sbd *sdp)
+{
+	struct gfs2_xrdir_ctx *xc;
+	if (!fp)
+		return -EINVAL;
+
+	BUG_ON(fp->f_xrctx != NULL);
+
+	xc = kzalloc(sizeof(struct gfs2_xrdir_ctx), GFP_KERNEL);
+	if (xc == NULL)
+		return -ENOMEM;
+
+	if (vp_init(sdp, &xc->xc_dirents, 1) ||
+	    vp_init(sdp, &xc->xc_xattr_keys, 1) ||
+	    vp_init(sdp, &xc->xc_xattr_values, 1)) {
+		gfs2_xrdir_ctx_uninit(fp);
+		kfree(xc);
+		return -ENOMEM;
+	}
+	xc->xc_flags |= XC_FL_ALLOCATED;
+	fp->f_xrctx = xc;
+
+	return 0;
+}
+
+/*
+ * There was an error while collecting entries.
+ * Figure out what happened and twiddle flags
+ * appropriately.
+ */
+void gfs2_xrdir_partial_collect(struct gfs2_xrdir_ctx *xc)
+{
+	if (xc->xc_flags & XC_FL_GATHER_PART_INT ||
+	    xc->xc_flags & XC_FL_ERROR)
+	  return;
+
+	/*
+	 * We encountered a hash collision situation. We've read
+	 * entries in hash order up to the point (not including)
+	 * the colliding hashes. Setting XC_FL_HASH_COLL denotes
+	 * that. Also setting XC_FL_HASH_COLL_NXT so we know
+	 * that the next time we collect entries, the hash
+	 * colliding entries will be part of the collection
+	 */
+	xc->xc_flags |= (XC_FL_HASH_COLL | XC_FL_HASH_COLL_NXT);
+	xc->xc_flags |= (XC_FL_GATHER_PARTS | XC_FL_GATHER_PART_INT);
+	xc->xc_hash_coll_off = xc->xc_offset;
+
+	return;
+}
+
+/*
+ * We have run out of memory while collecting entries and
+ * don't have a single entry to return to the user. We deal
+ * with such a situation by halving the number of dents we
+ * tried to read last time and returning -EAGAIN to the user
+ * so we can have a go at it again
+ */
+static int gfs2_xrdir_handle_oom(struct gfs2_xrdir_ctx *xc)
+{
+	/* next time, only try half the number of dents */
+	xc->xc_dent_cap = DIV_ROUND_UP(xc->xc_count, 2);
+	/* clear out some flags */
+	xc->xc_flags &= ~(XC_FL_ERROR_OOM | XC_FL_ERROR);
+	xc->xc_flags &= ~XC_FL_GATHER_PART_INT;
+	/* In an oom situation, we're going to re-read fewer
+	 * entries from the same collection. This may or may
+	 * not hit the hash collision we recorded (if any).
+	 * So, we reset the relevant flags */
+	xc->xc_flags &= ~(XC_FL_HASH_COLL | XC_FL_HASH_COLL_NXT);
+	xc->xc_hash_coll_off = 0;
+
+	return -EAGAIN;
+}
+
+static int gfs2_xrdir_collect_errcheck(struct gfs2_xrdir_ctx *xc, int error)
+{
+	if (error < 0) { /* If we're out of memory */
+		if (error == -ENOMEM)
+			xc->xc_flags |= XC_FL_ERROR_OOM;
+		xc->xc_flags |= XC_FL_ERROR;
+		return error;
+	} else {
+		if ((xc->xc_dent_cap && xc->xc_count >= xc->xc_dent_cap) ||
+		    (xc->xc_dent_memcap && vp_get_size(&xc->xc_dirents) 
+		     >= xc->xc_dent_memcap)) {
+			/* We hit one of our limits, flag and return */
+			xc->xc_flags |= XC_FL_GATHER_PARTS;
+			xc->xc_flags |= XC_FL_GATHER_PART_INT;
+			return -EOVERFLOW;
+		}
+		return 0;
+	}
+}
+
+/*
+ * To reduce disk-seeking, we collect all the info in stages.
+ * In each stage, we access relevant disk blocks in order
+ * by pre-sorting the entries correspondingly.
+ *
+ * 1. Collect entry info (name, ino, type, offset) etc for all the
+ *    entries. Obtained by reading the directory inode
+ * 2. Collect stat info for all the entries. Obtained by reading
+ *    the file inode blocks.
+ * 3. Collect xattr info for all the entries. Obtained by reading
+ *    the eattr block of each inode.
+ *
+ * With this scheme of collecting data, we don't know what the final
+ * size of a dirent would be ahead of time. gfs2_xrdir_estimate_dent_memcap()
+ * attempts to guess the size. Right now it statically computes and
+ * reserves a fixed percentage of available space for entry+stat info
+ * and xattr info based on what data is requested by the user.
+ *
+ * TODO: Make this dynamic. Analyse the directory being processed
+ * and use observed ratios to improve throughput.
+ */
+static u64 gfs2_xrdir_estimate_dent_memcap(struct gfs2_sbd *sdp,
+					   struct gfs2_xrdir_ctx *xc)
+{
+	u64 avail;
+	int perc = 80;
+	unsigned int mask = xc->xc_xattr_mask;
+
+	avail = (gfs2_tune_get(sdp, gt_max_vb_pages) +
+		 vp_get_page_count(&xc->xc_dirents) +
+		 vp_get_page_count(&xc->xc_xattr_keys) +
+		 vp_get_page_count(&xc->xc_xattr_values) -
+		 atomic_read(&sdp->sd_vb_page_count)) * PAGE_SIZE;
+	if ((mask & XSTAT_XATTR_ALL) && (mask & XSTAT_XATTR_VALUES))
+		perc = 50;
+
+	return (avail * perc) / 100;
+}
+
+/*
+ * We setup the xreaddir context before every collect run
+ */
+static int gfs2_xrdir_ctx_setup(struct file *file, struct gfs2_xrdir_ctx *xc,
+				unsigned int flags, unsigned int mask)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
+
+	if (!(xc->xc_flags & XC_FL_GATHER_PARTS)) {
+		/*
+		 * We only update flags and mask once per readdirplus
+		 * initiation. If there are multiple parts, use the
+		 * same values as initialized at the start
+		 */
+		xc->xc_xst_flags = flags;
+		xc->xc_xattr_mask = mask;
+		xc->xc_offset = file->f_pos;
+	}
+
+	/*
+	 * Set limits for this part based on how much memory is available
+	 * or how many entries per cycle as defined by sysfs file.
+	 * If dent_cap established in a previous run, leave it alone
+	 */
+	xc->xc_dent_cap = xc->xc_dent_cap ? xc->xc_dent_cap : 
+		gfs2_tune_get(sdp, gt_max_xrdir_dents);
+	xc->xc_dent_memcap = gfs2_xrdir_estimate_dent_memcap(sdp, xc);
+
+	xc->xc_dent_valid = 0;
+	xc->xc_count = 0;
+	xc->xc_next_dent = NULL;
+	kfree(xc->xc_vb_dptrs);
+	xc->xc_vb_dptrs = NULL;
+	vp_reset(&xc->xc_dirents);
+	vp_reset(&xc->xc_xattr_keys);
+	vp_reset(&xc->xc_xattr_values);
+
+	return 0;
+}
+
+/*
+ * Add a gfs2_dirent to the xreaddir context
+ */
+int gfs2_xrdir_collect_dents(const struct gfs2_dirent *dent, loff_t off,
+			     struct gfs2_xrdir_ctx *xc)
+{
+	struct gfs2_xdirent *x;
+	u64 x_ino;
+	u32 x_hash;
+	u8 x_valid = 0;
+	char x_type;
+	unsigned int x_xattr_count, x_namelen;
+	const void *nullptr = NULL;
+	int error = 0;
+
+	if (gfs2_dirent_dot_or_dotdot(dent))
+		return 0;
+
+	if (xc->xc_next_dent == NULL)
+		xc->xc_next_dent = xc->xc_dirents.v_ptr;
+	x = xc->xc_next_dent;
+	vp_memset(&xc->xc_dirents, x, 0, sizeof(struct gfs2_xdirent));
+
+	/*
+	 * If we know that we're encountering hash-colliding
+	 * entries this time around, we read only these in
+	 * and nothing else
+	 */
+	if (xc->xc_flags & XC_FL_HASH_COLL_NXT &&
+	    off != xc->xc_hash_coll_off) {
+		/*
+		 * setting dent_cap to how many we've read in
+		 * so we don't read anymore
+		 */
+		xc->xc_dent_cap = xc->xc_count;
+		xc->xc_flags &= ~XC_FL_HASH_COLL_NXT;
+		/*
+		 * xc_offset will get incremented to read
+		 * at the next offset when everything
+		 * is written out properly this cycle
+		 */
+		xc->xc_offset = xc->xc_hash_coll_off;
+		xc->xc_hash_coll_off = 0;
+		goto err_check;
+	}
+
+	/* Copy the dirent contents */
+	x_ino = be64_to_cpu(dent->de_inum.no_addr);
+	x_hash = be32_to_cpu(dent->de_hash);
+	x_type = be16_to_cpu(dent->de_type);
+	x_xattr_count = 0;
+	x_namelen = be16_to_cpu(dent->de_name_len);
+
+	error = vp_write(&xc->xc_dirents, &x->x_ino, &x_ino, sizeof(x->x_ino));
+	if (error != sizeof(x->x_ino)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_hash, &x_hash, sizeof(x->x_hash));
+	if (error != sizeof(x->x_hash)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_valid, &x_valid, sizeof(x->x_valid));
+	if (error != sizeof(x->x_valid)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_type, &x_type, sizeof(x->x_type));
+	if (error != sizeof(x->x_type)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_xattr_count, &x_xattr_count,
+			 sizeof(x->x_xattr_count));
+	if (error != sizeof(x->x_xattr_count)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_vb_xattr_arr_ptr, &nullptr,
+			 sizeof(x->x_vb_xattr_arr_ptr));
+	if (error != sizeof(x->x_vb_xattr_arr_ptr)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_namelen, &x_namelen,
+			 sizeof(x->x_namelen));
+	if (error != sizeof(x->x_namelen)) goto err_check;
+
+	error = vp_write(&xc->xc_dirents, &x->x_name, (char*)(dent + 1), x_namelen);
+	if (error != x_namelen) goto err_check;
+
+	xc->xc_next_dent = x->x_name + x_namelen;
+	xc->xc_count++;
+	error = 0;
+err_check:
+	return gfs2_xrdir_collect_errcheck(xc, error);
+}
+
+/*
+ * Create the array of pointers that point to all the
+ * collected entries within the xc_dirents vbuf.
+ */
+static int gfs2_xrdir_create_dptrs(struct gfs2_xrdir_ctx *xc)
+{
+	int i;
+	unsigned int namelen;
+	struct gfs2_xdirent *x = NULL;
+
+	BUG_ON(xc->xc_vb_dptrs || xc->xc_count == 0);
+
+	/* allocate the dirent pointers */
+	xc->xc_vb_dptrs = kmalloc(sizeof(struct gfs2_xdirent *) * xc->xc_count,
+				  GFP_KERNEL);
+	if (xc->xc_vb_dptrs == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		if (!x)
+			x = xc->xc_dirents.v_ptr;
+		xc->xc_vb_dptrs[i] = x;
+		vp_read(&xc->xc_dirents, &namelen, &x->x_namelen,
+			sizeof(x->x_namelen));
+		/* 
+		 * reclen is sizeof(struct gfs2_xdirent) + x_namelen.
+		 * see struct gfs2_xdirent for more info
+		 */
+		x = (void *)x->x_name + namelen;
+	}
+	return 0;
+}
+
+static int gfs2_xrdir_collect_xstat(struct gfs2_xrdir_ctx *xc)
+{
+	int i;
+	struct kstat st;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip, sizeof(struct gfs2_inode *));
+		gfs2_getattr_i(ip, &st);
+
+		vp_write(&xc->xc_dirents, &x_vb_p->x_kstat, &st, sizeof(struct kstat));
+		vp_write(&xc->xc_dirents, &x_vb_p->x_eablk, &ip->i_eattr, 
+			 sizeof(x_vb_p->x_eablk));
+	}
+	return 0;
+}
+
+static inline int xattr_requested(char type, unsigned int mask)
+{
+	if ((type == GFS2_EATYPE_USR) && (mask & XSTAT_XATTR_USER))
+		return 1;
+	if ((type == GFS2_EATYPE_SYS) && (mask & XSTAT_XATTR_SYSTEM))
+		return 1;
+	if ((type == GFS2_EATYPE_SECURITY) && (mask & XSTAT_XATTR_SECURITY))
+		return 1;
+	return 0;
+}
+
+static int gfs2_xrdir_xattr_list_i(struct gfs2_inode *ip, 
+				   struct buffer_head *bh,
+				   struct gfs2_ea_header *ea,
+				   struct gfs2_ea_header *prev, void *private)
+{
+	struct gfs2_xdir_ctx_bndle *bundle = private;
+	struct gfs2_xrdir_ctx *xc = bundle->xcb_xc;
+	struct gfs2_xdirent *x = bundle->xcb_xd;
+	struct gfs2_xd_xattr *xtr;
+	char prefix[9];
+	unsigned int l = 0, xtr_count, namlen, reclen;
+	void *p;
+
+	if (!xattr_requested(ea->ea_type, xc->xc_xattr_mask))
+		return 0;
+
+	if (ea->ea_type == GFS2_EATYPE_UNUSED)
+		return 0;
+
+	l = ea_prefix(ea, prefix, 9);
+	BUG_ON(l == 0);
+
+	xtr = vp_get_top(&xc->xc_xattr_keys);
+	/*
+	 * Only certain vp_XXX ops can trip -ENOMEM where we might be extending
+	 * the vbuf. We ignore the error code of other ops.
+	 */
+	if (vp_memset(&xc->xc_xattr_keys, xtr, 0, 
+		      sizeof(struct gfs2_xd_xattr)) == -ENOMEM)
+		goto set_oom;
+
+	/* if mask says don't do values, skip the following lines */
+	if (GFS2_EA_DATA_LEN(ea) > 0 && (xc->xc_xattr_mask & XSTAT_XATTR_VALUES)) {
+		void *valptr = vp_get_top(&xc->xc_xattr_values);
+		unsigned long len = GFS2_EA_DATA_LEN(ea);
+
+		vp_write(&xc->xc_xattr_keys, &xtr->xa_value_len,
+			 &len, sizeof(xtr->xa_value_len));
+		vp_write(&xc->xc_xattr_keys, &xtr->xa_vb_value_ptr, &valptr,
+			 sizeof(void*));
+		vp_read(&xc->xc_xattr_keys, &p, &xtr->xa_vb_value_ptr,
+			sizeof(void*));
+		if (vp_append(&xc->xc_xattr_values, GFS2_EA2DATA(ea), len)
+		    == -ENOMEM)
+			goto set_oom;
+	}
+
+	namlen = l + ea->ea_name_len;
+	vp_write(&xc->xc_xattr_keys, &xtr->xa_keylen, &namlen,
+		 sizeof(xtr->xa_keylen));
+	if (vp_write(&xc->xc_xattr_keys, xtr->xa_keyname, &prefix, l) == -ENOMEM)
+		goto set_oom;
+	if (vp_write(&xc->xc_xattr_keys, xtr->xa_keyname + l, 
+		     GFS2_EA2NAME(ea), namlen) == -ENOMEM)
+		goto set_oom;
+
+	/* gfs2_xd_xattr.xa_keyname[1] has an extra byte */
+	reclen = (xtr->xa_keyname + l + namlen) - (char *)xtr;
+	vp_write(&xc->xc_xattr_keys, &xtr->xa_reclen, &reclen,
+		 sizeof(xtr->xa_reclen));
+
+	vp_read(&xc->xc_dirents, &xtr_count, &x->x_xattr_count,
+		sizeof(x->x_xattr_count));
+	xtr_count++;
+	vp_write(&xc->xc_dirents, &x->x_xattr_count, &xtr_count,
+		 sizeof(x->x_xattr_count));
+
+	return 0;
+set_oom:
+	xc->xc_flags |= XC_FL_ERROR_OOM;
+	return -ENOMEM;
+}
+
+int gfs2_xrdir_collect_xattrs(struct gfs2_xrdir_ctx *xc)
+{
+	int error = 0, i;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *xtop, *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+		struct gfs2_xdir_ctx_bndle bundle;
+		u8 valid = 1;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+
+		if (!ip->i_eattr || !(xc->xc_xattr_mask & XSTAT_XATTR_ALL))
+			goto mark_valid;
+
+		bundle.xcb_xc = xc;
+		bundle.xcb_xd = x_vb_p;
+
+		xtop = vp_get_top(&xc->xc_xattr_keys);
+		vp_write(&xc->xc_dirents, &x_vb_p->x_vb_xattr_arr_ptr, &xtop,
+			 sizeof(struct gfs2_xd_xattr*));
+
+		error = ea_foreach(ip, gfs2_xrdir_xattr_list_i, &bundle);
+		if (error)
+			break;
+	mark_valid:
+		/* Read the xattrs for this dent, so mark it as valid */
+		vp_write(&xc->xc_dirents, &x_vb_p->x_valid, &valid,
+			 sizeof(x_vb_p->x_valid));
+		xc->xc_dent_valid++;
+	}
+	return error;
+}
+
+static int gfs2_xrdir_collect_extra_info(struct gfs2_xrdir_ctx *xc,
+					 struct gfs2_inode *dip)
+{
+	int error = -ENOMEM, i;
+	struct gfs2_holder *ghs;
+
+	/* First sort the dents according to inode blk order for stat */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dent_iblks, NULL);
+	
+	/* Lookup all the inodes for stat info */
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		u64 ino;
+		struct inode *inode;
+		struct gfs2_inode *ip, *nullptr = NULL;
+	
+		vp_read(&xc->xc_dirents, &ino, &x_vb_p->x_ino,
+			sizeof(x_vb_p->x_ino));
+
+		inode = gfs2_lookup_by_inum(GFS2_SB(&dip->i_inode), ino, NULL,
+					    GFS2_BLKST_DINODE);
+		if (IS_ERR(inode)) {
+			vp_write(&xc->xc_dirents, &ip, &nullptr,
+				 sizeof(struct gfs2_inode *));
+			error = -1;
+			goto iput_iarr;
+		}
+		ip = GFS2_I(inode);
+		vp_write(&xc->xc_dirents, &x_vb_p->x_ip, &ip,
+			 sizeof(struct gfs2_inode *));
+	}
+
+	/* lock all inodes */
+	ghs = kcalloc(xc->xc_count, sizeof(struct gfs2_holder), GFP_NOFS);
+	if (!ghs)
+		goto iput_iarr;
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, ghs + i);
+	}
+
+	error = gfs2_glock_nq_m(xc->xc_count, ghs);
+	if (error)
+		goto free_ghs;
+
+	if (gfs2_xrdir_collect_xstat(xc))
+		goto free_ghs;
+
+	/* Sort the dents according to eattr blk order */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dent_eablks, NULL);
+
+	error = gfs2_xrdir_collect_xattrs(xc);
+
+	for (i = 0; i < xc->xc_count; i++)
+		gfs2_glock_dq_uninit(&ghs[i]);
+free_ghs:
+	kfree(ghs);
+iput_iarr:
+	for (i = 0; i < xc->xc_count; i++) {
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[i];
+		struct gfs2_inode *ip;
+
+		vp_read(&xc->xc_dirents, &ip, &x_vb_p->x_ip,
+			sizeof(struct gfs2_inode *));
+		if (ip)
+			iput(&ip->i_inode);
+	}
+	/* Sort the pointers back to dent order */
+	ctx_sort(xc, xc->xc_vb_dptrs, xc->xc_count, sizeof(void *),
+		 ctx_compare_dents, NULL);
+
+	if (error == -ENOMEM) {
+		/*
+		 * If at least one dent has been collected in full,
+		 * void -ENOMEM
+		 * We shuffled the order of dents multiple times while
+		 * retrieving stat and xattrs, so we have to ensure that
+		 * at least the first dent in the final ordering is valid
+		 * in order to be able to return at least 1 entry. This
+		 * is because we need to preserve the order (hash order)
+		 * when we return the dents to the user. XXX: OR DO WE??
+		 */
+		struct gfs2_xdirent *x_vb_p = xc->xc_vb_dptrs[0];
+		u8 valid;
+		vp_read(&xc->xc_dirents, &valid, &x_vb_p->x_valid,
+			sizeof(x_vb_p->x_valid));
+
+		if (valid)
+			error = 0;
+		else {
+			u32 hash;
+			vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash,
+				sizeof(hash));
+			xc->xc_offset = gfs2_disk_hash2offset(hash);
+		}
+	}
+	if (!error)
+		xc->xc_flags |= XC_FL_DATA_AVAIL;
+	
+	return error;
+}
+
+static int gfs2_xrdir_to_user_xattrs(struct gfs2_xrdir_ctx *xc,
+				     struct gfs2_xdirent *x, 
+				     struct gfs2_xd_xattr *xdx_vb_p,
+				     struct xdirent_xattr __user *xx,
+				     size_t count, size_t *bytes, char *tempbuf)
+{
+	struct gfs2_xd_xattr xdx;
+	int attrcount = 0, error = -EINVAL;
+	
+	while (attrcount < x->x_xattr_count) {
+		vp_read(&xc->xc_xattr_keys, &xdx, xdx_vb_p,
+			sizeof(struct gfs2_xd_xattr));
+
+		if ((count - *bytes) < 
+		    (sizeof(struct xdirent_xattr) + 
+		     xdx.xa_keylen + xdx.xa_value_len)) {
+			error = -EOVERFLOW;
+			goto out;
+		}
+
+		if (__put_user(xdx.xa_value_len, &xx->xa_value_len))
+			goto out;
+
+		vp_read(&xc->xc_xattr_keys, tempbuf, xdx_vb_p->xa_keyname,
+			xdx.xa_keylen);
+
+		if (copy_to_user(xx->xa_name_val, tempbuf, xdx.xa_keylen))
+			goto out;
+		if (__put_user(0, xx->xa_name_val + xdx.xa_keylen))
+			goto out;
+
+		if ((xc->xc_xattr_mask & XSTAT_XATTR_VALUES) &&
+		    xdx.xa_vb_value_ptr) {
+			vp_read(&xc->xc_xattr_values, tempbuf, xdx.xa_vb_value_ptr,
+				xdx.xa_value_len);
+
+			if (copy_to_user(xx->xa_name_val + xdx.xa_keylen + 1, tempbuf,
+					 xdx.xa_value_len))
+				goto out;
+		}
+
+		xx = (struct xdirent_xattr __user *)
+			((char *)xx + sizeof(xx->xa_value_len)
+			 + xdx.xa_keylen + 1 + xdx.xa_value_len);
+		xdx_vb_p = (void*) xdx_vb_p + xdx.xa_reclen;
+
+		*bytes += sizeof(struct xdirent_xattr) + xdx.xa_keylen +
+			xdx.xa_value_len;
+		attrcount++;
+	}
+	error = 0;
+out:
+	return error;
+}
+
+static int gfs2_xrdir_to_user_vars(struct gfs2_xrdir_ctx *xc,
+				   struct gfs2_xdirent *x,
+				   struct gfs2_xdirent *x_vb_p,
+				   struct linux_xdirent __user *lxd,
+				   size_t count, size_t *bytes)
+{
+	int error = -EINVAL;
+	char *tempbuf = NULL;
+	struct xdirent_blob __user *xblob;
+	struct xdirent_xattr __user *xx;
+	struct gfs2_xd_xattr *xdx_vb_p;
+
+	tempbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!tempbuf) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	xblob = &lxd->xd_blob;
+
+	/* copy all the variable length fields */
+	if ((count - *bytes) < x->x_namelen) {
+		error = -EOVERFLOW;
+		goto free;
+	}
+
+	vp_read(&xc->xc_dirents, tempbuf, x_vb_p->x_name, x->x_namelen);
+
+	if (copy_to_user(xblob->xb_blob, tempbuf, x->x_namelen))
+		goto free;
+	if (__put_user(0, xblob->xb_blob + x->x_namelen))
+		goto free;
+
+	*bytes += x->x_namelen;
+	error = 0;
+
+	if ((xc->xc_xattr_mask & XSTAT_XATTR_ALL) &&
+		lxd->xd_blob.xb_xattr_count) {
+		xx = (struct xdirent_xattr __user *)
+			(xblob->xb_blob + x->x_namelen + 1);
+		xdx_vb_p = x->x_vb_xattr_arr_ptr;
+
+		error = gfs2_xrdir_to_user_xattrs(xc, x, xdx_vb_p, xx,
+						  count, bytes, tempbuf);
+	}
+free:
+	kfree(tempbuf);
+out:
+	return error;
+}
+
+static int gfs2_xrdir_to_user_fixed(struct gfs2_xrdir_ctx *xc,
+				    struct gfs2_xdirent *x,
+				    struct gfs2_xdirent *x_vb_p,
+				    struct linux_xdirent __user *lxd, 
+				    size_t count, size_t *bytes)
+{
+	struct xdirent_blob __user *xblob;
+	int error = -EINVAL;
+
+	vp_read(&xc->xc_dirents, x, x_vb_p, sizeof(struct gfs2_xdirent));
+
+	if ((count - *bytes) < sizeof(struct linux_xdirent)) {
+		error = -EOVERFLOW;
+		goto out;
+	}
+
+	if (__put_user(x->x_ino, &lxd->xd_ino))
+		goto out;
+	if (__put_user(x->x_type, &lxd->xd_type))
+		goto out;
+	if (__put_user(0, &lxd->xd_off))
+		goto out;
+
+	error = xstat_set_result(&x->x_kstat, &lxd->xd_stat);
+	if (error)
+		goto out;
+
+	xblob = &lxd->xd_blob;
+
+	error = -EINVAL;
+	if (__put_user(x->x_xattr_count, &xblob->xb_xattr_count))
+		goto out;
+
+	/* copied all the fixed size fields */
+	*bytes += sizeof(struct linux_xdirent);
+	error = 0;
+out:
+	return error;
+}
+
+static size_t gfs2_xrdir_to_user(struct gfs2_xrdir_ctx *xc, void __user *buf,
+				 size_t count)
+{
+	size_t error = -EINVAL, bytes = 0, bytes_bef = 0;
+	int i, skip = 1, written = 0;
+	struct gfs2_xdirent x, *x_vb_p;
+	struct linux_xdirent __user *lxd = buf;
+	u8 valid;
+
+	if (!(xc->xc_flags & XC_FL_DATA_AVAIL))
+		goto out;
+
+	for (i = 0; i < xc->xc_count; i++) {
+		u32 hash;
+		x_vb_p = xc->xc_vb_dptrs[i];
+		vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash, sizeof(hash));
+
+		if (skip && xc->xc_vb_dptrs[i] != xc->xc_next_dent)
+			continue;
+		skip = 0;
+		vp_read(&xc->xc_dirents, &valid, &x_vb_p->x_valid,
+			sizeof(x_vb_p->x_valid));
+		if (!valid)
+			break;
+
+		/* This will fill up x from x_vb_p and subsequently lxd from x */
+		error = gfs2_xrdir_to_user_fixed(xc, &x, x_vb_p, lxd, count,
+						 &bytes);
+		if (error) {
+			if (error == -EOVERFLOW)
+				goto overflow;
+			goto out;
+		}
+
+		error = gfs2_xrdir_to_user_vars(xc, &x, x_vb_p, lxd, count,
+						&bytes);
+		if (error) {
+			u64 ino;
+			vp_read(&xc->xc_dirents, &ino, &x_vb_p->x_ino, sizeof(ino));
+			if (error == -EOVERFLOW)
+				goto overflow;
+			goto out;
+		}
+
+		if (__put_user(bytes - bytes_bef, &lxd->xd_reclen))
+			goto out;
+
+		lxd = (void *)lxd + (bytes - bytes_bef);
+		xc->xc_next_dent = xc->xc_vb_dptrs[i+1];
+		written++;
+		bytes_bef = bytes;
+	}
+overflow:
+	if (written) {
+		if (!valid) {
+			u32 hash;
+			x_vb_p = xc->xc_vb_dptrs[i];
+			vp_read(&xc->xc_dirents, &hash, &x_vb_p->x_hash,
+				sizeof(hash));
+			/*
+			 * Some of the entries we collected were incomplete,
+			 * so we only wrote the ones that were complete. For
+			 * next time, we'll only try to collect half the 
+			 * number of entries. This will also invalidate the
+			 * assumption that we'll encounter hash-colliding
+			 * entries in the next pass
+			 */
+			xc->xc_offset = gfs2_disk_hash2offset(hash);
+			xc->xc_flags &= ~(XC_FL_GATHER_PART_INT |
+					  XC_FL_DATA_AVAIL |
+					  XC_FL_HASH_COLL |
+					  XC_FL_HASH_COLL_NXT);
+			xc->xc_hash_coll_off = 0;
+			xc->xc_dent_cap = DIV_ROUND_UP(xc->xc_count, 2);
+		} else {
+			/*
+			 * If we didn't overflow the user buffer, we
+			 * have written out all the collected dents to
+			 * the user buffer
+			 */
+			if (error != -EOVERFLOW) {
+				xc->xc_flags &= ~(XC_FL_GATHER_PART_INT |
+						  XC_FL_DATA_AVAIL);
+				xc->xc_dent_cap = 0;
+				if (!(xc->xc_flags & XC_FL_HASH_COLL))
+					xc->xc_offset++;
+			}
+		}
+	}
+	if (!written && !skip) {
+		error = -EOVERFLOW;
+		goto out;
+	}
+	error = bytes_bef;
+out:
+	return error;
+}
+
+/**
+ * gfs2_xreaddir - GFS2's implementation of xreaddir functionality
+ * @file  : The directory to xreaddir
+ * @flags : flags used by xstat
+ * @mask  : field mask for xstat and xattrs
+ * @buf   : User buffer to fill data into
+ * @count : Size of the user buffer in bytes
+ *
+ * Collect extended information (xstat, xattrs) about the dents in the
+ * given directory and fill them into the user buf passed in.
+ *
+ * Returns: 0       if successful.
+ *          -EAGAIN if the user should retry.
+ *          -ve values for other errors
+ */
+
+size_t gfs2_xreaddir(struct file *file, unsigned int flags, unsigned int mask,
+			    void __user *buf, size_t count)
+{
+	struct gfs2_xrdir_ctx *xc = ((struct gfs2_file *)
+				     file->private_data)->f_xrctx;
+	size_t error = 0;
+	struct inode *dir = file->f_mapping->host;
+	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_holder d_gh;
+
+	if (xc->xc_flags & XC_FL_DATA_AVAIL) {
+		error = gfs2_xrdir_to_user(xc, buf, count);
+		file->f_pos = xc->xc_offset;
+		return error;
+	}
+
+	error = gfs2_xrdir_ctx_setup(file, xc, flags, mask);
+	if (error)
+		goto out;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	error = gfs2_glock_nq(&d_gh);
+	if (error) {
+		gfs2_holder_uninit(&d_gh);
+		goto out;
+	}
+
+	xc->xc_flags &= ~XC_FL_HASH_COLL;
+	error = gfs2_dir_read(dir, NULL, xc, &file->f_ra);
+	if (error) {
+		if (xc->xc_flags & XC_FL_ERROR_OOM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	if (xc->xc_count == 0)
+		goto uninit;
+	
+	if (!(xc->xc_flags & XC_FL_GATHER_PARTS))
+		xc->xc_flags |= XC_FL_GATHER_FULL;
+	else if (!(xc->xc_flags & XC_FL_GATHER_PART_INT))
+		xc->xc_flags |= XC_FL_GATHER_PART_END;
+
+	error = gfs2_xrdir_create_dptrs(xc);
+	if (error) {
+		if (error == -ENOMEM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	error = gfs2_xrdir_collect_extra_info(xc, dip);
+	if (error) {
+		if (error == -ENOMEM)
+			error = gfs2_xrdir_handle_oom(xc);
+		goto uninit;
+	}
+
+	xc->xc_next_dent = xc->xc_vb_dptrs[0];
+	error = gfs2_xrdir_to_user(xc, buf, count);
+
+	file->f_pos = xc->xc_offset;
+uninit:
+	if (xc->xc_flags & XC_FL_HASH_COLL && !(xc->xc_flags & XC_FL_DATA_AVAIL))
+		xc->xc_flags &= ~XC_FL_HASH_COLL;
+
+	gfs2_glock_dq_uninit(&d_gh);
+out:
+	return error;
+}
diff --git a/fs/gfs2/xreaddir.h b/fs/gfs2/xreaddir.h
new file mode 100644
index 0000000..ea6c82c
--- /dev/null
+++ b/fs/gfs2/xreaddir.h
@@ -0,0 +1,84 @@
+#ifndef __XREADDIR_H__
+#define __XREADDIR_H__
+
+struct gfs2_xd_xattr {
+	unsigned int   xa_reclen;
+	void          *xa_vb_value_ptr;
+	unsigned long  xa_value_len;
+	unsigned int   xa_keylen;
+	char           __pad[7];
+	char           xa_keyname[1];
+};
+
+struct gfs2_xdirent {
+	u32                      x_hash;
+	u8                       x_valid;
+	struct gfs2_inode       *x_ip;
+	u64                      x_ino;
+	u64                      x_eablk;
+	char                     x_type;
+	struct kstat             x_kstat;
+	unsigned int             x_xattr_count;
+	void                    *x_vb_xattr_arr_ptr;
+	unsigned int             x_namelen;
+	char                     x_name[1];
+};
+
+#define XC_FL_ALLOCATED                 0x00000001
+#define XC_FL_GATHER_FULL               0x00000002
+#define XC_FL_GATHER_PARTS              0x00000004
+#define XC_FL_GATHER_PART_INT           0x00000008
+#define XC_FL_GATHER_PART_END           0x00000010
+#define XC_FL_HASH_COLL                 0x00000020
+#define XC_FL_HASH_COLL_NXT             0x00000040
+#define XC_FL_ERROR_OOM                 0x00000080
+#define XC_FL_ERROR                     0x00000100
+#define XC_FL_DATA_AVAIL                0x00000200
+#define XC_FL_PRINTOK                   0x10000000
+
+/*
+ * readdir ctx
+ */
+struct gfs2_xrdir_ctx {
+	u32                   xc_flags;           /* XC_FL_XXXX */
+	u64                   xc_dent_memcap;     /* mem limit per collect */
+	u32                   xc_dent_cap;        /* # dent limit per collect */
+	u32                   xc_dent_valid;      /* # valid dents collected */
+	u32                   xc_xattr_mask;      /* XSTAT_XATTR_XXX see stat.h*/
+	u32                   xc_xst_flags;       /* XSTAT_XXX see stat.h */
+	loff_t                xc_offset;          /* offset of next dent */
+	unsigned long         xc_count;           /* # dents collected */
+	loff_t                xc_hash_coll_off;   /* last hash collision offset */
+	void                 *xc_next_dent;       /* next dent to write out */
+	void                **xc_vb_dptrs;        /* ptrs to dents in xc_dirents */
+	struct vbuf           xc_dirents;         /* temp storage for dents */
+	struct vbuf           xc_xattr_keys;      /* xattr keys for dents */
+	struct vbuf           xc_xattr_values;    /* corresponding values */
+};
+
+/*
+ * Ugly struct to blob together these two
+ * structs. Only used in one place to 
+ * retrieve extended attributes.
+ * This is so that we don't have to change
+ * the prototypes of all the existing
+ * xattr handling functions to accept an
+ * extra arg.
+ */
+struct gfs2_xdir_ctx_bndle {
+	struct gfs2_xrdir_ctx *xcb_xc;
+	struct gfs2_xdirent   *xcb_xd;
+};
+
+extern size_t gfs2_xreaddir(struct file *file, unsigned int flags,
+			    unsigned int mask, void __user *buf,
+			    size_t count);
+extern int gfs2_xrdir_collect_dents(const struct gfs2_dirent *dent, loff_t off,
+				    struct gfs2_xrdir_ctx *xc);
+extern void gfs2_xrdir_partial_collect(struct gfs2_xrdir_ctx *xc);
+extern int gfs2_xrdir_collect_xattrs(struct gfs2_xrdir_ctx *xc);
+
+extern int gfs2_xrdir_ctx_init(struct gfs2_file *fp, struct gfs2_sbd *sdp);
+extern void gfs2_xrdir_ctx_uninit(struct gfs2_file *fp);
+
+#endif /* __XREADDIR_H_ */
-- 
1.8.1.4



  parent reply	other threads:[~2014-07-25 17:38 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-25 17:38 [RFC PATCH 0/5] xgetdents system call Abhi Das
2014-07-25 17:38 ` [Cluster-devel] " Abhi Das
2014-07-25 17:38 ` [RFC PATCH 1/5] fs: xstat system call VFS bits Abhi Das
2014-07-25 17:38   ` [Cluster-devel] " Abhi Das
2014-07-25 17:38   ` Abhi Das
2014-07-25 18:17   ` [Cluster-devel] " Bob Peterson
2014-07-25 18:17     ` Bob Peterson
2014-07-25 17:38 ` [RFC PATCH 2/5] fs: Add xgetdents system call and xreaddir file operation Abhi Das
2014-07-25 17:38   ` [Cluster-devel] " Abhi Das
2014-07-25 17:38   ` Abhi Das
2014-07-29  8:20   ` Michael Kerrisk
2014-07-29  8:20     ` [Cluster-devel] " Michael Kerrisk
2014-07-29  8:20     ` Michael Kerrisk
2014-07-25 17:38 ` [RFC PATCH 3/5] gfs2: Add a dynamic buffer backed by a vector of pages Abhi Das
2014-07-25 17:38   ` [Cluster-devel] " Abhi Das
2014-07-25 17:38   ` Abhi Das
2014-07-25 18:42   ` [Cluster-devel] " Bob Peterson
2014-07-25 18:42     ` Bob Peterson
2014-07-25 17:38 ` [RFC PATCH 4/5] gfs2: Add sort functionality with extra parameter Abhi Das
2014-07-25 17:38   ` [Cluster-devel] " Abhi Das
2014-07-25 17:38 ` Abhi Das [this message]
2014-07-25 17:38   ` [Cluster-devel] [RFC PATCH 5/5] gfs2: Add xreaddir file operation and supporting functions Abhi Das
2014-07-29 18:58   ` Jonathan Corbet
2014-07-29 18:58     ` [Cluster-devel] " Jonathan Corbet
2014-07-29 22:25     ` Abhijith Das
2014-07-29 22:25       ` [Cluster-devel] " Abhijith Das
2014-07-30  9:06       ` Steven Whitehouse
2014-07-30 13:57       ` Jonathan Corbet
2014-07-30 13:57         ` [Cluster-devel] " Jonathan Corbet
2014-07-29  8:18 ` [RFC PATCH 0/5] xgetdents system call Michael Kerrisk
2014-07-29  8:18   ` [Cluster-devel] " Michael Kerrisk
2014-07-29  8:18   ` Michael Kerrisk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1406309888-10749-6-git-send-email-adas@redhat.com \
    --to=adas@redhat.com \
    --cc=cluster-devel@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.