linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* GFS2: Pre-pull patch posting (merge window)
@ 2012-07-23  8:00 Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 01/16] GFS2: Extend the life of the reservations Steven Whitehouse
                   ` (15 more replies)
  0 siblings, 16 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

As usual, here is the content of the GFS2 tree prior to sending
a merge request. Not a huge number of patches this time, but some
interesting features nonetheless.

A number of the earlier patches are aimed at cleaning up the resource
group code for the later patch which implements block reservations.
In addition to that, there are a few patches aimed at improving
the time taken to dump (the potentially rather large) glock debugfs
file. Beyond that there are a couple of bug fixes and thats about it
this time,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH 01/16] GFS2: Extend the life of the reservations
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 02/16] GFS2: Fold quota data into the reservations struct Steven Whitehouse
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This patch lengthens the lifespan of the reservations structure for
inodes. Before, they were allocated and deallocated for every write
operation. With this patch, they are allocated when the first write
occurs, and deallocated when the last process closes the file.
It's more efficient to do it this way because it saves GFS2 a lot of
unnecessary allocates and frees. It also gives us more flexibility
for the future: (1) we can now fold the qadata structure back into
the structure and save those alloc/frees, (2) we can use this for
multi-block reservations.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e80a464..aba77b5 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	brelse(dibh);
 failed:
 	gfs2_trans_end(sdp);
-	if (ip->i_res)
+	if (gfs2_mb_reserved(ip))
 		gfs2_inplace_release(ip);
 	if (qa) {
 		gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 31b199f..3790617 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 */
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		return ret;
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret)
@@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 	struct gfs2_file *fp;
+	struct gfs2_inode *ip = GFS2_I(inode);
 
 	fp = file->private_data;
 	file->private_data = NULL;
 
+	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
+	    (atomic_read(&inode->i_writecount) == 1))
+		gfs2_rs_delete(ip);
+
 	if (gfs2_assert_warn(sdp, fp))
 		return -EIO;
 
@@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	struct dentry *dentry = file->f_dentry;
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	int ret;
+
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		return ret;
 
 	if (file->f_flags & O_APPEND) {
-		struct dentry *dentry = file->f_dentry;
-		struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 		struct gfs2_holder gh;
-		int ret;
 
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 		if (ret)
@@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (bytes == 0)
 		bytes = sdp->sd_sb.sb_bsize;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 	error = gfs2_glock_nq(&ip->i_gh);
 	if (unlikely(error))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9ba244..2a1b4b5 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
+	error = gfs2_rs_alloc(dip);
+	if (error)
+		return error;
+
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (error)
 		goto fail;
@@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock2;
 
+	/* the new inode needs a reservation so it can allocate xattrs. */
+	error = gfs2_rs_alloc(GFS2_I(inode));
+	if (error)
+		goto fail_gunlock2;
+
 	error = gfs2_acl_create(dip, inode);
 	if (error)
 		goto fail_gunlock2;
@@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	gfs2_trans_end(sdp);
 	/* Check if we reserved space in the rgrp. Function link_dinode may
 	   not, depending on whether alloc is required. */
-	if (dip->i_res)
+	if (gfs2_mb_reserved(dip))
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_qadata_put(dip);
@@ -819,6 +828,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
+	error = gfs2_rs_alloc(dip);
+	if (error)
+		return error;
+
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
@@ -1234,6 +1247,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		return error;
 
+	error = gfs2_rs_alloc(ndip);
+	if (error)
+		return error;
+
 	if (odip != ndip) {
 		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
 					   0, &r_gh);
@@ -1644,6 +1661,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct gfs2_holder i_gh;
 	int error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
 		return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e..197cc2d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -764,6 +764,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	unsigned int nalloc = 0, blocks;
 	int error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		return error;
+
 	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
 			      &data_blocks, &ind_blocks);
 
@@ -1549,10 +1553,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 	if (error)
 		return error;
 
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		goto out_put;
+
 	mutex_lock(&ip->i_inode.i_mutex);
 	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
 	if (error)
-		goto out_put;
+		goto out_unlockput;
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
 		goto out_q;
@@ -1609,8 +1617,9 @@ out_i:
 	gfs2_glock_dq_uninit(&i_gh);
 out_q:
 	gfs2_glock_dq_uninit(&q_gh);
-out_put:
+out_unlockput:
 	mutex_unlock(&ip->i_inode.i_mutex);
+out_put:
 	qd_put(qd);
 	return error;
 }
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f74fb9b..e944fef 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 	}
 }
 
+/**
+ * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * @ip: the inode for this reservation
+ */
+int gfs2_rs_alloc(struct gfs2_inode *ip)
+{
+	int error = 0;
+
+	down_write(&ip->i_rw_mutex);
+	if (!ip->i_res) {
+		ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+		if (!ip->i_res)
+			error = -ENOMEM;
+	}
+	up_write(&ip->i_rw_mutex);
+	return error;
+}
+
+/**
+ * gfs2_rs_delete - delete a reservation
+ * @ip: The inode for this reservation
+ *
+ */
+void gfs2_rs_delete(struct gfs2_inode *ip)
+{
+	down_write(&ip->i_rw_mutex);
+	if (ip->i_res) {
+		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
+		ip->i_res = NULL;
+	}
+	up_write(&ip->i_rw_mutex);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
 	struct rb_node *n;
@@ -993,22 +1026,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
 }
 
 /**
- * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
- * @ip: the incore GFS2 inode structure
- *
- * Returns: the struct gfs2_qadata
- */
-
-static int gfs2_blkrsv_get(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_res != NULL);
-	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-	if (!ip->i_res)
-		return -ENOMEM;
-	return 0;
-}
-
-/**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
  * @ip: the inode
@@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 	return -ENOSPC;
 }
 
-static void gfs2_blkrsv_put(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_res == NULL);
-	kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
-	ip->i_res = NULL;
-}
-
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_blkreserv *rs;
-	int error;
+	int error = 0;
 	u64 last_unlinked = NO_BLOCK;
 	int tries = 0;
 
-	error = gfs2_blkrsv_get(ip);
-	if (error)
-		return error;
-
 	rs = ip->i_res;
 	rs->rs_requested = requested;
 	if (gfs2_assert_warn(sdp, requested)) {
@@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 out:
 	if (error)
-		gfs2_blkrsv_put(ip);
+		rs->rs_requested = 0;
 	return error;
 }
 
@@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-	gfs2_blkrsv_put(ip);
+	rs->rs_requested = 0;
 }
 
 /**
@@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	/* Only happens if there is a bug in gfs2, return something distinctive
 	 * to ensure that it is noticed.
 	 */
-	if (ip->i_res == NULL)
+	if (ip->i_res->rs_requested == 0)
 		return -ECANCELED;
 
 	rgd = ip->i_rgd;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4b10f4..d9eda5f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
+extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
+/* This is how to tell if a reservation is "inplace" reserved: */
+static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
+{
+	if (ip->i_res && ip->i_res->rs_requested)
+		return 1;
+	return 0;
+}
+
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621..65578df 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1554,6 +1554,7 @@ out_unlock:
 out:
 	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
+	gfs2_rs_delete(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
@@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 		ip->i_flags = 0;
 		ip->i_gl = NULL;
 		ip->i_rgd = NULL;
+		ip->i_res = NULL;
 	}
 	return &ip->i_inode;
 }
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 125d457..41f42cd 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -31,7 +31,7 @@ struct gfs2_glock;
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
 {
 	const struct gfs2_blkreserv *rs = ip->i_res;
-	if (rs->rs_requested < ip->i_rgd->rd_length)
+	if (rs && rs->rs_requested < ip->i_rgd->rd_length)
 		return rs->rs_requested + 1;
 	return ip->i_rgd->rd_length;
 }
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 02/16] GFS2: Fold quota data into the reservations struct
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 01/16] GFS2: Extend the life of the reservations Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 03/16] GFS2: Add "top dir" flag support Steven Whitehouse
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This patch moves the ancillary quota data structures into the
block reservations structure. This saves GFS2 some time and
effort in allocating and deallocating the qadata structure.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aba77b5..d652634 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	int alloc_required;
 	int error = 0;
-	struct gfs2_qadata *qa = NULL;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	struct page *page;
@@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 
 	if (alloc_required) {
-		qa = gfs2_qadata_get(ip);
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_unlock;
-		}
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_alloc_put;
+			goto out_unlock;
 
 		error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
 		if (error)
@@ -708,8 +701,6 @@ out_trans_fail:
 		gfs2_inplace_release(ip);
 out_qunlock:
 		gfs2_quota_unlock(ip);
-out_alloc_put:
-		gfs2_qadata_put(ip);
 	}
 out_unlock:
 	if (&ip->i_inode == sdp->sd_rindex) {
@@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct buffer_head *dibh;
-	struct gfs2_qadata *qa = ip->i_qadata;
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
@@ -880,10 +870,8 @@ failed:
 	gfs2_trans_end(sdp);
 	if (gfs2_mb_reserved(ip))
 		gfs2_inplace_release(ip);
-	if (qa) {
+	if (ip->i_res->rs_qa_qd_num)
 		gfs2_quota_unlock(ip);
-		gfs2_qadata_put(ip);
-	}
 	if (inode == sdp->sd_rindex) {
 		gfs2_glock_dq(&m_ip->i_gh);
 		gfs2_holder_uninit(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index dab5409..6d957a8 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1045,12 +1045,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 		lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
 
 	find_metapath(sdp, lblock, &mp, ip->i_height);
-	if (!gfs2_qadata_get(ip))
-		return -ENOMEM;
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
 
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out;
+		return error;
 
 	while (height--) {
 		struct strip_mine sm;
@@ -1064,8 +1065,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 
 	gfs2_quota_unhold(ip);
 
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -1167,19 +1166,14 @@ static int do_grow(struct inode *inode, u64 size)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh;
-	struct gfs2_qadata *qa = NULL;
 	int error;
 	int unstuff = 0;
 
 	if (gfs2_is_stuffed(ip) &&
 	    (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-		qa = gfs2_qadata_get(ip);
-		if (qa == NULL)
-			return -ENOMEM;
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto do_grow_alloc_put;
+			return error;
 
 		error = gfs2_inplace_reserve(ip, 1);
 		if (error)
@@ -1214,8 +1208,6 @@ do_grow_release:
 		gfs2_inplace_release(ip);
 do_grow_qunlock:
 		gfs2_quota_unlock(ip);
-do_grow_alloc_put:
-		gfs2_qadata_put(ip);
 	}
 	return error;
 }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8aaeb07..259b088 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 	if (!ht)
 		return -ENOMEM;
 
-	if (!gfs2_qadata_get(dip)) {
-		error = -ENOMEM;
-		goto out;
-	}
-
 	error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out_put;
+		goto out;
 
 	/*  Count the number of leaves  */
 	bh = leaf_bh;
@@ -1942,8 +1937,6 @@ out_rg_gunlock:
 out_rlist:
 	gfs2_rlist_free(&rlist);
 	gfs2_quota_unhold(dip);
-out_put:
-	gfs2_qadata_put(dip);
 out:
 	kfree(ht);
 	return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3790617..26e2905 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -366,7 +366,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	u64 pos = page->index << PAGE_CACHE_SHIFT;
 	unsigned int data_blocks, ind_blocks, rblocks;
 	struct gfs2_holder gh;
-	struct gfs2_qadata *qa;
 	loff_t size;
 	int ret;
 
@@ -397,14 +396,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto out_unlock;
 	}
 
-	ret = -ENOMEM;
-	qa = gfs2_qadata_get(ip);
-	if (qa == NULL)
+	ret = gfs2_rindex_update(sdp);
+	if (ret)
 		goto out_unlock;
 
 	ret = gfs2_quota_lock_check(ip);
 	if (ret)
-		goto out_alloc_put;
+		goto out_unlock;
 	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
 	ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
 	if (ret)
@@ -451,8 +449,6 @@ out_trans_fail:
 	gfs2_inplace_release(ip);
 out_quota_unlock:
 	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_qadata_put(ip);
 out_unlock:
 	gfs2_glock_dq(&gh);
 out:
@@ -764,7 +760,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 	loff_t bytes, max_bytes;
-	struct gfs2_qadata *qa;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -804,15 +799,13 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 			offset += bytes;
 			continue;
 		}
-		qa = gfs2_qadata_get(ip);
-		if (!qa) {
-			error = -ENOMEM;
+		error = gfs2_rindex_update(sdp);
+		if (error)
 			goto out_unlock;
-		}
 
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_alloc_put;
+			goto out_unlock;
 
 retry:
 		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -852,7 +845,6 @@ retry:
 		offset += max_bytes;
 		gfs2_inplace_release(ip);
 		gfs2_quota_unlock(ip);
-		gfs2_qadata_put(ip);
 	}
 
 	if (error == 0)
@@ -863,8 +855,6 @@ out_trans_fail:
 	gfs2_inplace_release(ip);
 out_qunlock:
 	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_qadata_put(ip);
 out_unlock:
 	gfs2_glock_dq(&ip->i_gh);
 out_uninit:
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67fd6be..5cda51a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -289,16 +289,14 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
 
-struct gfs2_qadata { /* quota allocation data */
-	/* Quota stuff */
-	struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
-	struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
-	unsigned int qa_qd_num;
-};
-
 struct gfs2_blkreserv {
 	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
 	struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
+
+	/* ancillary quota stuff */
+	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+	unsigned int rs_qa_qd_num;
 };
 
 enum {
@@ -319,7 +317,6 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_qadata *i_qadata; /* quota allocation data */
 	struct gfs2_blkreserv *i_res; /* resource group block reservation */
 	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2a1b4b5..2b035e0 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,12 +521,10 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	int error;
 
 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
-	if (!gfs2_qadata_get(dip))
-		return -ENOMEM;
 
 	error = gfs2_quota_lock(dip, uid, gid);
 	if (error)
-		goto out;
+		return error;
 
 	error = gfs2_quota_check(dip, uid, gid);
 	if (error)
@@ -542,8 +540,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 out_quota:
 	gfs2_quota_unlock(dip);
-out:
-	gfs2_qadata_put(dip);
 	return error;
 }
 
@@ -551,15 +547,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 		       struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_qadata *qa;
 	int alloc_required;
 	struct buffer_head *dibh;
 	int error;
 
-	qa = gfs2_qadata_get(dip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto fail;
@@ -611,7 +602,6 @@ fail_quota_locks:
 	gfs2_quota_unlock(dip);
 
 fail:
-	gfs2_qadata_put(dip);
 	return error;
 }
 
@@ -734,7 +724,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (gfs2_mb_reserved(dip))
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
-	gfs2_qadata_put(dip);
 	mark_inode_dirty(inode);
 	gfs2_glock_dq_uninit_m(2, ghs);
 	d_instantiate(dentry, inode);
@@ -883,16 +872,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	error = 0;
 
 	if (alloc_required) {
-		struct gfs2_qadata *qa = gfs2_qadata_get(dip);
-
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
 		error = gfs2_quota_lock_check(dip);
 		if (error)
-			goto out_alloc;
+			goto out_gunlock;
 
 		error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
 		if (error)
@@ -935,9 +917,6 @@ out_ipres:
 out_gunlock_q:
 	if (alloc_required)
 		gfs2_quota_unlock(dip);
-out_alloc:
-	if (alloc_required)
-		gfs2_qadata_put(dip);
 out_gunlock:
 	gfs2_glock_dq(ghs + 1);
 out_child:
@@ -1374,16 +1353,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		goto out_gunlock;
 
 	if (alloc_required) {
-		struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
-
-		if (!qa) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
 		error = gfs2_quota_lock_check(ndip);
 		if (error)
-			goto out_alloc;
+			goto out_gunlock;
 
 		error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
 		if (error)
@@ -1444,9 +1416,6 @@ out_ipreserv:
 out_gunlock_q:
 	if (alloc_required)
 		gfs2_quota_unlock(ndip);
-out_alloc:
-	if (alloc_required)
-		gfs2_qadata_put(ndip);
 out_gunlock:
 	while (x--) {
 		gfs2_glock_dq(ghs + x);
@@ -1607,12 +1576,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
 		ogid = ngid = NO_QUOTA_CHANGE;
 
-	if (!gfs2_qadata_get(ip))
-		return -ENOMEM;
-
 	error = gfs2_quota_lock(ip, nuid, ngid);
 	if (error)
-		goto out_alloc;
+		return error;
 
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
 		error = gfs2_quota_check(ip, nuid, ngid);
@@ -1638,8 +1604,6 @@ out_end_trans:
 	gfs2_trans_end(sdp);
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
-out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6cdb0f2..e04d0e0 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo)
 	inode_init_once(&ip->i_inode);
 	init_rwsem(&ip->i_rw_mutex);
 	INIT_LIST_HEAD(&ip->i_trunc_list);
-	ip->i_qadata = NULL;
 	ip->i_res = NULL;
 	ip->i_hash_cache = NULL;
 }
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 197cc2d..7d1ede7 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
-	struct gfs2_quota_data **qd = qa->qa_qd;
+	struct gfs2_quota_data **qd;
 	int error;
 
-	if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
+	if (ip->i_res == NULL)
+		gfs2_rs_alloc(ip);
+
+	qd = ip->i_res->rs_qa_qd;
+
+	if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
 	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
 		return -EIO;
 
@@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 	error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
 	if (error)
 		goto out;
-	qa->qa_qd_num++;
+	ip->i_res->rs_qa_qd_num++;
 	qd++;
 
 	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
 	if (error)
 		goto out;
-	qa->qa_qd_num++;
+	ip->i_res->rs_qa_qd_num++;
 	qd++;
 
 	if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
 		error = qdsb_get(sdp, QUOTA_USER, uid, qd);
 		if (error)
 			goto out;
-		qa->qa_qd_num++;
+		ip->i_res->rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 		error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
 		if (error)
 			goto out;
-		qa->qa_qd_num++;
+		ip->i_res->rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -542,16 +546,17 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	unsigned int x;
 
+	if (ip->i_res == NULL)
+		return;
 	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qdsb_put(qa->qa_qd[x]);
-		qa->qa_qd[x] = NULL;
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qdsb_put(ip->i_res->rs_qa_qd[x]);
+		ip->i_res->rs_qa_qd[x] = NULL;
 	}
-	qa->qa_qd_num = 0;
+	ip->i_res->rs_qa_qd_num = 0;
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -919,7 +924,6 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	unsigned int x;
 	int error = 0;
@@ -932,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
 		return 0;
 
-	sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
-	     sort_qd, NULL);
+	sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
+	     sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
 		int force = NO_FORCE;
-		qd = qa->qa_qd[x];
+		qd = ip->i_res->rs_qa_qd[x];
 		if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
 			force = FORCE;
-		error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
+		error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
 		if (error)
 			break;
 	}
@@ -949,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 		set_bit(GIF_QD_LOCKED, &ip->i_flags);
 	else {
 		while (x--)
-			gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+			gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
 		gfs2_quota_unhold(ip);
 	}
 
@@ -994,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd)
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qda[4];
 	unsigned int count = 0;
 	unsigned int x;
@@ -1002,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 	if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
 		goto out;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
 		struct gfs2_quota_data *qd;
 		int sync;
 
-		qd = qa->qa_qd[x];
+		qd = ip->i_res->rs_qa_qd[x];
 		sync = need_sync(qd);
 
-		gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+		gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
 
 		if (sync && qd_trylock(qd))
 			qda[count++] = qd;
@@ -1042,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	s64 value;
 	unsigned int x;
@@ -1054,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
         if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                 return 0;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qd = qa->qa_qd[x];
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qd = ip->i_res->rs_qa_qd[x];
 
 		if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
 		      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1093,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 		       u32 uid, u32 gid)
 {
-	struct gfs2_qadata *qa = ip->i_qadata;
 	struct gfs2_quota_data *qd;
 	unsigned int x;
 
@@ -1102,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
 		return;
 
-	for (x = 0; x < qa->qa_qd_num; x++) {
-		qd = qa->qa_qd[x];
+	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+		qd = ip->i_res->rs_qa_qd[x];
 
 		if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
 		    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e944fef..9eca6a9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1007,25 +1007,6 @@ out:
 }
 
 /**
- * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
- * @ip: the incore GFS2 inode structure
- *
- * Returns: the struct gfs2_qadata
- */
-
-struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	int error;
-	BUG_ON(ip->i_qadata != NULL);
-	ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
-	error = gfs2_rindex_update(sdp);
-	if (error)
-		fs_warn(sdp, "rindex update returns %d\n", error);
-	return ip->i_qadata;
-}
-
-/**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
  * @ip: the inode
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index d9eda5f..5d8314d 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -29,14 +29,6 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
-extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
-static inline void gfs2_qadata_put(struct gfs2_inode *ip)
-{
-	BUG_ON(ip->i_qadata == NULL);
-	kfree(ip->i_qadata);
-	ip->i_qadata = NULL;
-}
-
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 65578df..81fc762 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1399,7 +1399,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_qadata *qa;
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder gh;
 	int error;
@@ -1409,13 +1408,9 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out;
+		return error;
 
 	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
 	if (!rgd) {
@@ -1443,8 +1438,6 @@ out_rg_gunlock:
 	gfs2_glock_dq_uninit(&gh);
 out_qs:
 	gfs2_quota_unhold(ip);
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 927f4df..523c0de 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -325,13 +325,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 			       struct gfs2_ea_header *ea,
 			       struct gfs2_ea_header *prev, int leave)
 {
-	struct gfs2_qadata *qa;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto out_alloc;
@@ -340,7 +335,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	gfs2_quota_unhold(ip);
 out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -713,17 +707,12 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 			     unsigned int blks,
 			     ea_skeleton_call_t skeleton_call, void *private)
 {
-	struct gfs2_qadata *qa;
 	struct buffer_head *dibh;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_lock_check(ip);
 	if (error)
-		goto out;
+		return error;
 
 	error = gfs2_inplace_reserve(ip, blks);
 	if (error)
@@ -753,8 +742,6 @@ out_ipres:
 	gfs2_inplace_release(ip);
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
-out:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
@@ -1494,16 +1481,11 @@ out_gunlock:
 
 int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
-	struct gfs2_qadata *qa;
 	int error;
 
-	qa = gfs2_qadata_get(ip);
-	if (!qa)
-		return -ENOMEM;
-
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
-		goto out_alloc;
+		return error;
 
 	error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
 	if (error)
@@ -1519,8 +1501,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 
 out_quota:
 	gfs2_quota_unhold(ip);
-out_alloc:
-	gfs2_qadata_put(ip);
 	return error;
 }
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 03/16] GFS2: Add "top dir" flag support
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 01/16] GFS2: Extend the life of the reservations Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 02/16] GFS2: Fold quota data into the reservations struct Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 04/16] GFS2: Fix error handling when reading an invalid block from the journal Steven Whitehouse
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse, Steven Whitehouse

From: Steven Whitehouse <steve@chygwyn.com>

This patch adds support for the "top dir" flag. Currently this is unused
but a subsequent patch is planned which will add support for the
Orlov allocation policy when allocating subdirectories in a parent
with this flag set.

In order to ensure backward compatible behaviour, mkfs.gfs2 does
not currently tag the root directory with this flag, it must always be
set manually.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26e2905..6fbf3cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = {
 	[7] = GFS2_DIF_NOATIME,
 	[12] = GFS2_DIF_EXHASH,
 	[14] = GFS2_DIF_INHERIT_JDATA,
+	[17] = GFS2_DIF_TOPDIR,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = {
 	[gfs2fl_AppendOnly] = FS_APPEND_FL,
 	[gfs2fl_NoAtime] = FS_NOATIME_FL,
 	[gfs2fl_ExHash] = FS_INDEX_FL,
+	[gfs2fl_TopLevel] = FS_TOPDIR_FL,
 	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
 
@@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode)
 			     GFS2_DIF_NOATIME|			\
 			     GFS2_DIF_SYNC|			\
 			     GFS2_DIF_SYSTEM|			\
+			     GFS2_DIF_TOPDIR|			\
 			     GFS2_DIF_INHERIT_JDATA)
 
 /**
@@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 
 	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
 	if (!S_ISDIR(inode->i_mode)) {
+		gfsflags &= ~GFS2_DIF_TOPDIR;
 		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
 			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
 		return do_gfs2_set_flags(filp, gfsflags, ~0);
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index fa98bdb..e8ccf6f 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -214,6 +214,7 @@ enum {
 	gfs2fl_NoAtime		= 7,
 	gfs2fl_Sync		= 8,
 	gfs2fl_System		= 9,
+	gfs2fl_TopLevel		= 10,
 	gfs2fl_TruncInProg	= 29,
 	gfs2fl_InheritDirectio	= 30,
 	gfs2fl_InheritJdata	= 31,
@@ -230,8 +231,9 @@ enum {
 #define GFS2_DIF_NOATIME		0x00000080
 #define GFS2_DIF_SYNC			0x00000100
 #define GFS2_DIF_SYSTEM			0x00000200 /* New in gfs2 */
+#define GFS2_DIF_TOPDIR			0x00000400 /* New in gfs2 */
 #define GFS2_DIF_TRUNC_IN_PROG		0x20000000 /* New in gfs2 */
-#define GFS2_DIF_INHERIT_DIRECTIO	0x40000000
+#define GFS2_DIF_INHERIT_DIRECTIO	0x40000000 /* only in gfs1 */
 #define GFS2_DIF_INHERIT_JDATA		0x80000000
 
 struct gfs2_dinode {
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 04/16] GFS2: Fix error handling when reading an invalid block from the journal
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (2 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 03/16] GFS2: Add "top dir" flag support Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 05/16] GFS2: Increase buffer size for glocks and glstats debugfs files Steven Whitehouse
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse, Steven Whitehouse

From: Steven Whitehouse <steve@chygwyn.com>

When we read an invalid block from the journal, we should not call
withdraw, but simply print a message and return an error. It is
up to the caller to then handle that error. In the case of mount
that means a failed mount, rather than a withdraw (requiring a
reboot). In the case of recovering another nodes journal then
we return an error via the uevent.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 852c1be..8ff95a2 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 		goto out;
 	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-	gfs2_meta_check(sdp, bd->bd_bh);
-	gfs2_pin(sdp, bd->bd_bh);
 	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+	if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+		printk(KERN_ERR
+		       "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+		       (unsigned long long)bd->bd_bh->b_blocknr);
+		BUG();
+	}
+	gfs2_pin(sdp, bd->bd_bh);
 	mh->__pad0 = cpu_to_be64(0);
 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 	sdp->sd_log_num_buf++;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 3586b0d..8053573 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		       const char *type, const char *function,
 		       char *file, unsigned int line);
 
-static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
-				    struct buffer_head *bh,
-				    const char *function,
-				    char *file, unsigned int line)
+static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
+				    struct buffer_head *bh)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
 	u32 magic = be32_to_cpu(mh->mh_magic);
-	if (unlikely(magic != GFS2_MAGIC))
-		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
-					  file, line);
+	if (unlikely(magic != GFS2_MAGIC)) {
+		printk(KERN_ERR "GFS2: Magic number missing at %llu\n",
+		       (unsigned long long)bh->b_blocknr);
+		return -EIO;
+	}
 	return 0;
 }
 
-#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
-
-
 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 			   u16 type, u16 t,
 			   const char *function,
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 05/16] GFS2: Increase buffer size for glocks and glstats debugfs files
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (3 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 04/16] GFS2: Fix error handling when reading an invalid block from the journal Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 06/16] GFS2: Cache last hash bucket for glock seq_files Steven Whitehouse
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse, Al Viro

As per Al Viro's suggestion, this increases the buffer size used
for these two files. This provides a speed up of slightly less than
8x (i.e. proportional to the buffer size) for cases when we have
large numbers of glocks.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dab2526..1c4cddf 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1972,6 +1972,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
+		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		if (seq->buf)
+			seq->size = 8*PAGE_SIZE;
 	}
 	return ret;
 }
@@ -1984,6 +1987,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
+		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		if (seq->buf)
+			seq->size = 8*PAGE_SIZE;
 	}
 	return ret;
 }
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 06/16] GFS2: Cache last hash bucket for glock seq_files
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (4 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 05/16] GFS2: Increase buffer size for glocks and glstats debugfs files Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 07/16] GFS2: Use lvbs for storing rgrp information with mount option Steven Whitehouse
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse, Eric Dumazet

For the glocks and glstats seq_files, which are exposed via debugfs
we should cache the most recent hash bucket, along with the offset
into that bucket. This allows us to restart from that point, rather
than having to begin at the beginning each time.

This is an idea from Eric Dumazet, however I've slightly extended it
so that if the position from which we are due to start is at any
point beyond the last cached point, we start from the last cached
point, plus whatever is the appropriate offset. I don't really expect
people to be lseeking around these files, but if they did so with only
positive offsets, then we'd still get some of the benefit of using a
cached offset.

With my simple test of around 200k entries in the file, I'm seeing
an approx 10x speed up.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c4cddf..3ad8cb3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -46,10 +46,12 @@
 #include "trace_gfs2.h"
 
 struct gfs2_glock_iter {
-	int hash;			/* hash bucket index         */
-	struct gfs2_sbd *sdp;		/* incore superblock         */
-	struct gfs2_glock *gl;		/* current glock struct      */
-	char string[512];		/* scratch space             */
+	int hash;			/* hash bucket index           */
+	unsigned nhash;			/* Index within current bucket */
+	struct gfs2_sbd *sdp;		/* incore superblock           */
+	struct gfs2_glock *gl;		/* current glock struct        */
+	loff_t last_pos;		/* last position               */
+	char string[512];		/* scratch space               */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -950,7 +952,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 	if (seq) {
 		struct gfs2_glock_iter *gi = seq->private;
 		vsprintf(gi->string, fmt, args);
-		seq_printf(seq, gi->string);
+		seq_puts(seq, gi->string);
 	} else {
 		vaf.fmt = fmt;
 		vaf.va = &args;
@@ -1854,8 +1856,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 		gl = gi->gl;
 		if (gl) {
 			gi->gl = glock_hash_next(gl);
+			gi->nhash++;
 		} else {
+			if (gi->hash >= GFS2_GL_HASH_SIZE) {
+				rcu_read_unlock();
+				return 1;
+			}
 			gi->gl = glock_hash_chain(gi->hash);
+			gi->nhash = 0;
 		}
 		while (gi->gl == NULL) {
 			gi->hash++;
@@ -1864,6 +1872,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 				return 1;
 			}
 			gi->gl = glock_hash_chain(gi->hash);
+			gi->nhash = 0;
 		}
 	/* Skip entries for other sb and dead entries */
 	} while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
@@ -1876,7 +1885,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 	struct gfs2_glock_iter *gi = seq->private;
 	loff_t n = *pos;
 
-	gi->hash = 0;
+	if (gi->last_pos <= *pos)
+		n = gi->nhash + (*pos - gi->last_pos);
+	else
+		gi->hash = 0;
+
+	gi->nhash = 0;
 	rcu_read_lock();
 
 	do {
@@ -1884,6 +1898,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 			return NULL;
 	} while (n--);
 
+	gi->last_pos = *pos;
 	return gi->gl;
 }
 
@@ -1893,7 +1908,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 	struct gfs2_glock_iter *gi = seq->private;
 
 	(*pos)++;
-
+	gi->last_pos = *pos;
 	if (gfs2_glock_iter_next(gi))
 		return NULL;
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 07/16] GFS2: Use lvbs for storing rgrp information with mount option
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (5 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 06/16] GFS2: Cache last hash bucket for glock seq_files Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 08/16] seq_file: Add seq_vprintf function and export it Steven Whitehouse
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Benjamin Marzinski, Steven Whitehouse

From: Benjamin Marzinski <bmarzins@redhat.com>

Instead of reading in the resource groups when gfs2 is checking
for free space to allocate from, gfs2 can store the necessary infromation
in the resource group's lvb.  Also, instead of searching for unlinked
inodes in every resource group that's checked for free space, gfs2 can
store the number of unlinked but inodes in the lvb, and only check for
unlinked inodes if it will find some.

The first time a resource group is locked, the lvb must initialized.
Since this involves counting the unlinked inodes in the resource group,
this takes a little extra time.  But after that, if the resource group
is locked with GL_SKIP, the buffer head won't be read in unless it's
actually needed.

Enabling the resource groups lvbs is done via the rgrplvb mount option.  If
this option isn't set, the lvbs will still be set and updated, but they won't
be verfied or used by the filesystem.  To safely turn on this option, all of
the nodes mounting the filesystem must be running code with this patch, and
the filesystem must have been completely unmounted since they were updated.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3ad8cb3..10ae164 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
 	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
 	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+	memset(gl->gl_lvb, 0, 32 * sizeof(char));
 	gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
 	gl->gl_tchange = jiffies;
 	gl->gl_object = NULL;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5cda51a..dc73070 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -89,6 +89,7 @@ struct gfs2_rgrpd {
 	u64 rd_igeneration;
 	struct gfs2_bitmap *rd_bits;
 	struct gfs2_sbd *rd_sbd;
+	struct gfs2_rgrp_lvb *rd_rgl;
 	u32 rd_last_alloc;
 	u32 rd_flags;
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
@@ -470,6 +471,7 @@ struct gfs2_args {
 	unsigned int ar_discard:1;		/* discard requests */
 	unsigned int ar_errors:2;               /* errors=withdraw | panic */
 	unsigned int ar_nobarrier:1;            /* do not send barriers */
+	unsigned int ar_rgrplvb:1;		/* use lvbs for rgrp info */
 	int ar_commit;				/* Commit interval */
 	int ar_statfs_quantum;			/* The fast statfs interval */
 	int ar_quota_quantum;			/* The quota interval */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9eca6a9..3c6f7ed 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -660,6 +660,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 		goto fail;
 
 	rgd->rd_gl->gl_object = rgd;
+	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 	if (rgd->rd_data > sdp->sd_max_rg_data)
 		sdp->sd_max_rg_data = rgd->rd_data;
@@ -769,9 +770,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
+static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+
+	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
+	    rgl->rl_dinodes != str->rg_dinodes ||
+	    rgl->rl_igeneration != str->rg_igeneration)
+		return 0;
+	return 1;
+}
+
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+	const struct gfs2_rgrp *str = buf;
+
+	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+	rgl->rl_flags = str->rg_flags;
+	rgl->rl_free = str->rg_free;
+	rgl->rl_dinodes = str->rg_dinodes;
+	rgl->rl_igeneration = str->rg_igeneration;
+	rgl->__pad = 0UL;
+}
+
+static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
+{
+	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
+	rgl->rl_unlinked = cpu_to_be32(unlinked);
+}
+
+static u32 count_unlinked(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_bitmap *bi;
+	const u32 length = rgd->rd_length;
+	const u8 *buffer = NULL;
+	u32 i, goal, count = 0;
+
+	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
+		goal = 0;
+		buffer = bi->bi_bh->b_data + bi->bi_offset;
+		WARN_ON(!buffer_uptodate(bi->bi_bh));
+		while (goal < bi->bi_len * GFS2_NBBY) {
+			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
+					   GFS2_BLKST_UNLINKED);
+			if (goal == BFITNOENT)
+				break;
+			count++;
+			goal++;
+		}
+	}
+
+	return count;
+}
+
+
 /**
- * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
- * @gh: The glock holder for the resource group
+ * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
  *
  * Read in all of a Resource Group's header and bitmap blocks.
  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -779,9 +836,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
  * Returns: errno
  */
 
-int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
-	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_glock *gl = rgd->rd_gl;
 	unsigned int length = rgd->rd_length;
@@ -789,6 +845,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
 	unsigned int x, y;
 	int error;
 
+	if (rgd->rd_bits[0].bi_bh != NULL)
+		return 0;
+
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -815,7 +874,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
 	}
-
+	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
+		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
+				     rgd->rd_bits[0].bi_bh->b_data);
+	}
+	else if (sdp->sd_args.ar_rgrplvb) {
+		if (!gfs2_rgrp_lvb_valid(rgd)){
+			gfs2_consist_rgrpd(rgd);
+			error = -EIO;
+			goto fail;
+		}
+		if (rgd->rd_rgl->rl_unlinked == 0)
+			rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	}
 	return 0;
 
 fail:
@@ -829,6 +901,39 @@ fail:
 	return error;
 }
 
+int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+{
+	u32 rl_flags;
+
+	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
+		return 0;
+
+	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+		return gfs2_rgrp_bh_get(rgd);
+
+	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
+	rl_flags &= ~GFS2_RDF_MASK;
+	rgd->rd_flags &= GFS2_RDF_MASK;
+	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+	if (rgd->rd_rgl->rl_unlinked == 0)
+		rgd->rd_flags &= ~GFS2_RDF_CHECK;
+	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+	rgd->rd_free_clone = rgd->rd_free;
+	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
+	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
+	return 0;
+}
+
+int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+{
+	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
+	struct gfs2_sbd *sdp = rgd->rd_sbd;
+
+	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
+		return 0;
+	return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
+}
+
 /**
  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
  * @gh: The glock holder for the resource group
@@ -842,8 +947,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
 
 	for (x = 0; x < length; x++) {
 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
-		brelse(bi->bi_bh);
-		bi->bi_bh = NULL;
+		if (bi->bi_bh) {
+			brelse(bi->bi_bh);
+			bi->bi_bh = NULL;
+		}
 	}
 
 }
@@ -987,6 +1094,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 				gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
 				gfs2_rgrp_out(rgd, bh->b_data);
+				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
 				gfs2_trans_end(sdp);
 			}
 		}
@@ -1116,6 +1224,9 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 	int error, rg_locked, flags = LM_FLAG_TRY;
 	int loops = 0;
 
+	if (sdp->sd_args.ar_rgrplvb)
+		flags |= GL_SKIP;
+
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
 		rgd = begin = ip->i_rgd;
 	else
@@ -1133,22 +1244,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 		} else {
 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
 						   flags, &rs->rs_rgd_gh);
+			if (!error && sdp->sd_args.ar_rgrplvb) {
+				error = update_rgrp_lvb(rgd);
+				if (error) {
+					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+					return error;
+				}
+			}
 		}
 		switch (error) {
 		case 0:
 			if (try_rgrp_fit(rgd, ip)) {
+				if (sdp->sd_args.ar_rgrplvb)
+					gfs2_rgrp_bh_get(rgd);
 				ip->i_rgd = rgd;
 				return 0;
 			}
-			if (rgd->rd_flags & GFS2_RDF_CHECK)
+			if (rgd->rd_flags & GFS2_RDF_CHECK) {
+				if (sdp->sd_args.ar_rgrplvb)
+					gfs2_rgrp_bh_get(rgd);
 				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
 			if (rgd == begin) {
-				flags = 0;
+				flags &= ~LM_FLAG_TRY;
 				loops++;
 			}
 			break;
@@ -1529,6 +1652,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -1575,6 +1699,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
 	/* Directories keep their data in the metadata address space */
 	if (meta || ip->i_depth)
@@ -1611,6 +1736,8 @@ void gfs2_unlink_di(struct inode *inode)
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+	update_rgrp_lvb_unlinked(rgd, 1);
 }
 
 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1630,6 +1757,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+	update_rgrp_lvb_unlinked(rgd, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 81fc762..7880687 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -78,6 +78,8 @@ enum {
 	Opt_quota_quantum,
 	Opt_barrier,
 	Opt_nobarrier,
+	Opt_rgrplvb,
+	Opt_norgrplvb,
 	Opt_error,
 };
 
@@ -115,6 +117,8 @@ static const match_table_t tokens = {
 	{Opt_quota_quantum, "quota_quantum=%d"},
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_rgrplvb, "rgrplvb"},
+	{Opt_norgrplvb, "norgrplvb"},
 	{Opt_error, NULL}
 };
 
@@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
 		case Opt_nobarrier:
 			args->ar_nobarrier = 1;
 			break;
+		case Opt_rgrplvb:
+			args->ar_rgrplvb = 1;
+			break;
+		case Opt_norgrplvb:
+			args->ar_rgrplvb = 0;
+			break;
 		case Opt_error:
 		default:
 			printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
@@ -1379,6 +1389,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
 		seq_printf(s, ",nobarrier");
 	if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
 		seq_printf(s, ",demote_interface_used");
+	if (args->ar_rgrplvb)
+		seq_printf(s, ",rgrplvb");
 	return 0;
 }
 
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index e8ccf6f..b2de1f9 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -170,6 +170,16 @@ struct gfs2_rindex {
 #define GFS2_RGF_NOALLOC	0x00000008
 #define GFS2_RGF_TRIMMED	0x00000010
 
+struct gfs2_rgrp_lvb {
+	__be32 rl_magic;
+	__be32 rl_flags;
+	__be32 rl_free;
+	__be32 rl_dinodes;
+	__be64 rl_igeneration;
+	__be32 rl_unlinked;
+	__be32 __pad;
+};
+
 struct gfs2_rgrp {
 	struct gfs2_meta_header rg_header;
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 08/16] seq_file: Add seq_vprintf function and export it
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (6 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 07/16] GFS2: Use lvbs for storing rgrp information with mount option Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:00 ` [PATCH 09/16] GFS2: Use seq_vprintf for glocks debugfs file Steven Whitehouse
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse

The existing seq_printf function is rewritten in terms of the new
seq_vprintf which is also exported to modules. This allows GFS2
(and potentially other seq_file users) to have a vprintf based
interface and to avoid an extra copy into a temporary buffer in
some cases.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0cbd049..14cf9de 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
 }
 EXPORT_SYMBOL(seq_escape);
 
-int seq_printf(struct seq_file *m, const char *f, ...)
+int seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
-	va_list args;
 	int len;
 
 	if (m->count < m->size) {
-		va_start(args, f);
 		len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
-		va_end(args);
 		if (m->count + len < m->size) {
 			m->count += len;
 			return 0;
@@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...)
 	seq_set_overflow(m);
 	return -1;
 }
+EXPORT_SYMBOL(seq_vprintf);
+
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, f);
+	ret = seq_vprintf(m, f, args);
+	va_end(args);
+
+	return ret;
+}
 EXPORT_SYMBOL(seq_printf);
 
 /**
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index fc61854..83c44ee 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -86,6 +86,7 @@ int seq_puts(struct seq_file *m, const char *s);
 int seq_write(struct seq_file *seq, const void *data, size_t len);
 
 __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
+__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);
 
 int seq_path(struct seq_file *, const struct path *, const char *);
 int seq_dentry(struct seq_file *, struct dentry *, const char *);
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 09/16] GFS2: Use seq_vprintf for glocks debugfs file
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (7 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 08/16] seq_file: Add seq_vprintf function and export it Steven Whitehouse
@ 2012-07-23  8:00 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 10/16] GFS2: Size seq_file buffer more carefully Steven Whitehouse
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse

Make use of the newly added seq_vprintf() function.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 10ae164..4d5d63d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -51,7 +51,6 @@ struct gfs2_glock_iter {
 	struct gfs2_sbd *sdp;		/* incore superblock           */
 	struct gfs2_glock *gl;		/* current glock struct        */
 	loff_t last_pos;		/* last position               */
-	char string[512];		/* scratch space               */
 };
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -951,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 	va_start(args, fmt);
 
 	if (seq) {
-		struct gfs2_glock_iter *gi = seq->private;
-		vsprintf(gi->string, fmt, args);
-		seq_puts(seq, gi->string);
+		seq_vprintf(seq, fmt, args);
 	} else {
 		vaf.fmt = fmt;
 		vaf.va = &args;
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 10/16] GFS2: Size seq_file buffer more carefully
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (8 preceding siblings ...)
  2012-07-23  8:00 ` [PATCH 09/16] GFS2: Use seq_vprintf for glocks debugfs file Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 11/16] GFS2: Add kobject release method Steven Whitehouse
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Steven Whitehouse

This places a limit on the buffer size for archs with larger
PAGE_SIZE.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4d5d63d..1ed81f4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1977,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
 	.show  = gfs2_sbstats_seq_show,
 };
 
+#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
+
 static int gfs2_glocks_open(struct inode *inode, struct file *file)
 {
 	int ret = seq_open_private(file, &gfs2_glock_seq_ops,
@@ -1985,9 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
-		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
-			seq->size = 8*PAGE_SIZE;
+			seq->size = GFS2_SEQ_GOODSIZE;
 	}
 	return ret;
 }
@@ -2000,9 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
 		struct seq_file *seq = file->private_data;
 		struct gfs2_glock_iter *gi = seq->private;
 		gi->sdp = inode->i_private;
-		seq->buf = kmalloc(8*PAGE_SIZE, GFP_KERNEL | __GFP_NOWARN);
+		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
-			seq->size = 8*PAGE_SIZE;
+			seq->size = GFS2_SEQ_GOODSIZE;
 	}
 	return ret;
 }
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 11/16] GFS2: Add kobject release method
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (9 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 10/16] GFS2: Size seq_file buffer more carefully Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 12/16] GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve Steven Whitehouse
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This patch adds a kobject release function that properly maintains
the kobject use count, so that accesses to the sysfs files do not
cause an access to freed kernel memory after an unmount.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b8c250f..9b23897 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	}
 
 	error = init_names(sdp, silent);
-	if (error)
-		goto fail;
+	if (error) {
+		/* In this case, we haven't initialized sysfs, so we have to
+		   manually free the sdp. */
+		free_percpu(sdp->sd_lkstats);
+		kfree(sdp);
+		sb->s_fs_info = NULL;
+		return error;
+	}
 
 	snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
 
-	gfs2_create_debugfs_file(sdp);
-
 	error = gfs2_sys_fs_add(sdp);
+	/*
+	 * If we hit an error here, gfs2_sys_fs_add will have called function
+	 * kobject_put which causes the sysfs usage count to go to zero, which
+	 * causes sysfs to call function gfs2_sbd_release, which frees sdp.
+	 * Subsequent error paths here will call gfs2_sys_fs_del, which also
+	 * kobject_put to free sdp.
+	 */
 	if (error)
-		goto fail;
+		return error;
+
+	gfs2_create_debugfs_file(sdp);
 
 	error = gfs2_lm_mount(sdp, silent);
 	if (error)
-		goto fail_sys;
+		goto fail_debug;
 
 	error = init_locking(sdp, &mount_gh, DO);
 	if (error)
@@ -1215,12 +1228,12 @@ fail_locking:
 fail_lm:
 	gfs2_gl_hash_clear(sdp);
 	gfs2_lm_unmount(sdp);
-fail_sys:
-	gfs2_sys_fs_del(sdp);
-fail:
+fail_debug:
 	gfs2_delete_debugfs_file(sdp);
 	free_percpu(sdp->sd_lkstats);
-	kfree(sdp);
+	/* gfs2_sys_fs_del must be the last thing we do, since it causes
+	 * sysfs to call function gfs2_sbd_release, which frees sdp. */
+	gfs2_sys_fs_del(sdp);
 	sb->s_fs_info = NULL;
 	return error;
 }
@@ -1390,10 +1403,9 @@ static void gfs2_kill_sb(struct super_block *sb)
 	sdp->sd_root_dir = NULL;
 	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
-	kill_block_super(sb);
 	gfs2_delete_debugfs_file(sdp);
 	free_percpu(sdp->sd_lkstats);
-	kfree(sdp);
+	kill_block_super(sb);
 }
 
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b..e4bee4b 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = {
 	NULL,
 };
 
+static void gfs2_sbd_release(struct kobject *kobj)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+
+	kfree(sdp);
+}
+
 static struct kobj_type gfs2_ktype = {
+	.release = gfs2_sbd_release,
 	.default_attrs = gfs2_attrs,
 	.sysfs_ops     = &gfs2_attr_ops,
 };
@@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	char ro[20];
 	char spectator[20];
 	char *envp[] = { ro, spectator, NULL };
+	int sysfs_frees_sdp = 0;
 
 	sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
 	sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
 				     "%s", sdp->sd_table_name);
 	if (error)
-		goto fail;
+		goto fail_reg;
 
+	sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
+				function gfs2_sbd_release. */
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
 		goto fail_reg;
@@ -615,9 +626,13 @@ fail_lock_module:
 fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_reg:
-	kobject_put(&sdp->sd_kobj);
-fail:
+	free_percpu(sdp->sd_lkstats);
 	fs_err(sdp, "error %d adding sysfs files", error);
+	if (sysfs_frees_sdp)
+		kobject_put(&sdp->sd_kobj);
+	else
+		kfree(sdp);
+	sb->s_fs_info = NULL;
 	return error;
 }
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 12/16] GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (10 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 11/16] GFS2: Add kobject release method Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 13/16] GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs Steven Whitehouse
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This function combines rgrp functions get_local_rgrp and
gfs2_inplace_reserve so that the double retry loop is gone.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3c6f7ed..e53d0a1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1207,25 +1207,30 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 }
 
 /**
- * get_local_rgrp - Choose and lock a rgrp for allocation
+ * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
- * @last_unlinked: the last unlinked block
- *
- * Try to acquire rgrp in way which avoids contending with others.
+ * @requested: the number of blocks to be reserved
  *
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
 	struct gfs2_blkreserv *rs = ip->i_res;
-	int error, rg_locked, flags = LM_FLAG_TRY;
+	int error = 0, rg_locked, flags = LM_FLAG_TRY;
+	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
+	rs = ip->i_res;
+	rs->rs_requested = requested;
+	if (gfs2_assert_warn(sdp, requested)) {
+		error = -EINVAL;
+		goto out;
+	}
 
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
 		rgd = begin = ip->i_rgd;
@@ -1263,63 +1268,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 			if (rgd->rd_flags & GFS2_RDF_CHECK) {
 				if (sdp->sd_args.ar_rgrplvb)
 					gfs2_rgrp_bh_get(rgd);
-				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+				try_rgrp_unlink(rgd, &last_unlinked,
+						ip->i_no_addr);
 			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
-			if (rgd == begin) {
-				flags &= ~LM_FLAG_TRY;
-				loops++;
-			}
+			if (rgd != begin) /* If we didn't wrap */
+				break;
+
+			flags &= ~LM_FLAG_TRY;
+			loops++;
+			/* Check that fs hasn't grown if writing to rindex */
+			if (ip == GFS2_I(sdp->sd_rindex) &&
+			    !sdp->sd_rindex_uptodate) {
+				error = gfs2_ri_update(ip);
+				if (error)
+					goto out;
+			} else if (loops == 2)
+				/* Flushing the log may release space */
+				gfs2_log_flush(sdp, NULL);
 			break;
 		default:
-			return error;
+			goto out;
 		}
 	}
-
-	return -ENOSPC;
-}
-
-/**
- * gfs2_inplace_reserve - Reserve space in the filesystem
- * @ip: the inode to reserve space for
- * @requested: the number of blocks to be reserved
- *
- * Returns: errno
- */
-
-int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_blkreserv *rs;
-	int error = 0;
-	u64 last_unlinked = NO_BLOCK;
-	int tries = 0;
-
-	rs = ip->i_res;
-	rs->rs_requested = requested;
-	if (gfs2_assert_warn(sdp, requested)) {
-		error = -EINVAL;
-		goto out;
-	}
-
-	do {
-		error = get_local_rgrp(ip, &last_unlinked);
-		if (error != -ENOSPC)
-			break;
-		/* Check that fs hasn't grown if writing to rindex */
-		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
-			error = gfs2_ri_update(ip);
-			if (error)
-				break;
-			continue;
-		}
-		/* Flushing the log may release space */
-		gfs2_log_flush(sdp, NULL);
-	} while (tries++ < 3);
+	error = -ENOSPC;
 
 out:
 	if (error)
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 13/16] GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (11 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 12/16] GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 14/16] GFS2: kernel panic with small gfs2 filesystems - 1 RG Steven Whitehouse
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Masatake YAMATO, Steven Whitehouse

From: Masatake YAMATO <yamato@redhat.com>

This patch fixes buffer_head double free in following code path:

gfs2_block_map
=> gfs2_meta_inode_buffer
 => gfs2_meta_indirect_buffer
  => gfs2_meta_read
=> release_metapath

gfs2_block_map calls gfs2_meta_inode_buffer with &mp.mp_bh[0]
as an argument. mp.mp_bh are filled with zero at the beginning
of gfs2_block_map.

If gfs2_meta_inode_buffer returns non-zero value, gfs2_block_map
calls release_metapath to free buffers chained to mp.mp_bh.
release_metapath checks each slot of mp.mp_bh[i] and
free(with brelse) unless the slot is filled with NULL.

&mp.mp_bh[0] passed to gfs2_meta_inode_buffer is filled at
gfs2_meta_read. gfs2_meta_read is filled a buffer allocated with
gfs2_getbuf even if EIO occurs. When EIO occurs, the allocated buffer
is brelse'ed though the pointer(wrong poiner) points the brelse'ed is
passed back to caller via an argument bhp.

gfs2_meta_indirect_buffer, the caller also pass the wrong pointer
to its caller with EIO. Finally gfs2_block_map gets both EIO and
&mp.mp_bh[0] filled with the wrong pointer. release_metapath
calls brelse again on the wrong pointer.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6c1e5d1..3a56c8d 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct buffer_head *bh;
 
-	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+		*bhp = NULL;
 		return -EIO;
+	}
 
 	*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
 
@@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		if (tr && tr->tr_touched)
 			gfs2_io_error_bh(sdp, bh);
 		brelse(bh);
+		*bhp = NULL;
 		return -EIO;
 	}
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 14/16] GFS2: kernel panic with small gfs2 filesystems - 1 RG
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (12 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 13/16] GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 15/16] GFS2: Reduce file fragmentation Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 16/16] GFS2: Eliminate 64-bit divides Steven Whitehouse
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Abhijith Das, Steven Whitehouse

From: Abhijith Das <adas@redhat.com>

In the unlikely setup where there's only one resource group in the gfs2
filesystem, gfs2_rgrpd_get_next() returns a NULL rgd that is not dealt with
properly, causing a kernel NULL ptr dereference. This patch fixes this issue.

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e53d0a1..fb70792 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1276,6 +1276,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = gfs2_rgrpd_get_next(rgd);
+			rgd = rgd ? : begin; /* if NULL, wrap */
 			if (rgd != begin) /* If we didn't wrap */
 				break;
 
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 15/16] GFS2: Reduce file fragmentation
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (13 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 14/16] GFS2: kernel panic with small gfs2 filesystems - 1 RG Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  2012-07-23  8:01 ` [PATCH 16/16] GFS2: Eliminate 64-bit divides Steven Whitehouse
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.

The implementation uses an rbtree of reservations attached to each
resource group (and each inode).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d957a8..49cd7dd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	if (error)
 		goto out_rlist;
 
+	if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
+		gfs2_rs_deltree(ip->i_res);
+
 	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
 				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
 				 revokes);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6fbf3cb..9f94832 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (ret)
 		return ret;
 
+	atomic_set(&ip->i_res->rs_sizehint,
+		   PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
+
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret)
@@ -571,22 +574,15 @@ fail:
 
 static int gfs2_release(struct inode *inode, struct file *file)
 {
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_file *fp;
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	fp = file->private_data;
+	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
+	if ((file->f_mode & FMODE_WRITE) &&
 	    (atomic_read(&inode->i_writecount) == 1))
 		gfs2_rs_delete(ip);
 
-	if (gfs2_assert_warn(sdp, fp))
-		return -EIO;
-
-	kfree(fp);
-
 	return 0;
 }
 
@@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	size_t writesize = iov_length(iov, nr_segs);
 	struct dentry *dentry = file->f_dentry;
 	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct gfs2_sbd *sdp;
 	int ret;
 
+	sdp = GFS2_SB(file->f_mapping->host);
 	ret = gfs2_rs_alloc(ip);
 	if (ret)
 		return ret;
 
+	atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
 	if (file->f_flags & O_APPEND) {
 		struct gfs2_holder gh;
 
@@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (unlikely(error))
 		goto out_uninit;
 
+	atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
+
 	while (len > 0) {
 		if (len < bytes)
 			bytes = len;
@@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 			offset += bytes;
 			continue;
 		}
-		error = gfs2_rindex_update(sdp);
-		if (error)
-			goto out_unlock;
-
 		error = gfs2_quota_lock_check(ip);
 		if (error)
 			goto out_unlock;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dc73070..aaecc80 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -84,6 +84,7 @@ struct gfs2_rgrpd {
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
+	u32 rd_reserved;                /* number of blocks reserved */
 	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
@@ -96,6 +97,9 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
+	spinlock_t rd_rsspin;           /* protects reservation related vars */
+	struct rb_root rd_rstree;       /* multi-block reservation tree */
+	u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
 enum gfs2_state_bits {
@@ -233,6 +237,38 @@ struct gfs2_holder {
 	unsigned long gh_ip;
 };
 
+/* Resource group multi-block reservation, in order of appearance:
+
+   Step 1. Function prepares to write, allocates a mb, sets the size hint.
+   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
+   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
+   Step 4. Bits are assigned from the rgrp based on either the reservation
+           or wherever it can.
+*/
+
+struct gfs2_blkreserv {
+	/* components used during write (step 1): */
+	atomic_t rs_sizehint;         /* hint of the write size */
+
+	/* components used during inplace_reserve (step 2): */
+	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+
+	/* components used during get_local_rgrp (step 3): */
+	struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
+	struct rb_node rs_node;       /* link to other block reservations */
+
+	/* components used during block searches and assignments (step 4): */
+	struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
+	u32 rs_biblk;                 /* start block relative to the bi */
+	u32 rs_free;                  /* how many blocks are still free */
+
+	/* ancillary quota stuff */
+	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+	unsigned int rs_qa_qd_num;
+};
+
 enum {
 	GLF_LOCK			= 1,
 	GLF_DEMOTE			= 3,
@@ -290,16 +326,6 @@ struct gfs2_glock {
 
 #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
 
-struct gfs2_blkreserv {
-	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-	struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
-
-	/* ancillary quota stuff */
-	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
-	struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
-	unsigned int rs_qa_qd_num;
-};
-
 enum {
 	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
@@ -307,7 +333,6 @@ enum {
 	GIF_SW_PAGED		= 3,
 };
 
-
 struct gfs2_inode {
 	struct inode i_inode;
 	u64 i_no_addr;
@@ -318,7 +343,7 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_blkreserv *i_res; /* resource group block reservation */
+	struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
 	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b035e0..c53c67e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	int error;
 
 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
 
 	error = gfs2_quota_lock(dip, uid, gid);
 	if (error)
@@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	struct buffer_head *dibh;
 	int error;
 
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
+
 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto fail;
@@ -596,7 +603,8 @@ fail_end_trans:
 	gfs2_trans_end(sdp);
 
 fail_ipreserv:
-	gfs2_inplace_release(dip);
+	if (alloc_required)
+		gfs2_inplace_release(dip);
 
 fail_quota_locks:
 	gfs2_quota_unlock(dip);
@@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_holder ghs[2];
 	struct inode *inode = NULL;
-	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
@@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
+	/* We need a reservation to allocate the new dinode block. The
+	   directory ip temporarily points to the reservation, but this is
+	   being done to get a set of contiguous blocks for the new dinode.
+	   Since this is a create, we don't have a sizehint yet, so it will
+	   have to use the minimum reservation size. */
 	error = gfs2_rs_alloc(dip);
 	if (error)
 		return error;
@@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
-	error = gfs2_inode_refresh(GFS2_I(inode));
+	ip = GFS2_I(inode);
+	error = gfs2_inode_refresh(ip);
 	if (error)
 		goto fail_gunlock2;
 
-	/* the new inode needs a reservation so it can allocate xattrs. */
-	error = gfs2_rs_alloc(GFS2_I(inode));
-	if (error)
-		goto fail_gunlock2;
+	/* The newly created inode needs a reservation so it can allocate
+	   xattrs. At the same time, we want new blocks allocated to the new
+	   dinode to be as contiguous as possible. Since we allocated the
+	   dinode block under the directory's reservation, we transfer
+	   ownership of that reservation to the new inode. The directory
+	   doesn't need a reservation unless it needs a new allocation. */
+	ip->i_res = dip->i_res;
+	dip->i_res = NULL;
 
 	error = gfs2_acl_create(dip, inode);
 	if (error)
 		goto fail_gunlock2;
 
-	error = gfs2_security_init(dip, GFS2_I(inode), name);
+	error = gfs2_security_init(dip, ip, name);
 	if (error)
 		goto fail_gunlock2;
 
-	error = link_dinode(dip, name, GFS2_I(inode));
+	error = link_dinode(dip, name, ip);
 	if (error)
 		goto fail_gunlock2;
 
@@ -738,6 +756,7 @@ fail_gunlock:
 		iput(inode);
 	}
 fail:
+	gfs2_rs_delete(dip);
 	if (bh)
 		brelse(bh);
 	return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fb70792..4d34887 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+#define RSRV_CONTENTION_FACTOR 4
+#define RGRP_RSRV_MAX_CONTENDERS 2
+
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 }
 
 /**
+ * rs_cmp - multi-block reservation range compare
+ * @blk: absolute file system block number of the new reservation
+ * @len: number of blocks in the new reservation
+ * @rs: existing reservation to compare against
+ *
+ * returns: 1 if the block range is beyond the reach of the reservation
+ *         -1 if the block range is before the start of the reservation
+ *          0 if the block range overlaps with the reservation
+ */
+static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+{
+	u64 startblk = gfs2_rs_startblk(rs);
+
+	if (blk >= startblk + rs->rs_free)
+		return 1;
+	if (blk + len - 1 < startblk)
+		return -1;
+	return 0;
+}
+
+/**
+ * rs_find - Find a rgrp multi-block reservation that contains a given block
+ * @rgd: The rgrp
+ * @rgblk: The block we're looking for, relative to the rgrp
+ */
+static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
+{
+	struct rb_node **newn;
+	int rc;
+	u64 fsblk = rgblk + rgd->rd_data0;
+
+	spin_lock(&rgd->rd_rsspin);
+	newn = &rgd->rd_rstree.rb_node;
+	while (*newn) {
+		struct gfs2_blkreserv *cur =
+			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+		rc = rs_cmp(fsblk, 1, cur);
+		if (rc < 0)
+			newn = &((*newn)->rb_left);
+		else if (rc > 0)
+			newn = &((*newn)->rb_right);
+		else {
+			spin_unlock(&rgd->rd_rsspin);
+			return cur;
+		}
+	}
+	spin_unlock(&rgd->rd_rsspin);
+	return NULL;
+}
+
+/**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *       a block in a given allocation state.
  * @buf: the buffer that holds the bitmaps
@@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 int gfs2_rs_alloc(struct gfs2_inode *ip)
 {
 	int error = 0;
+	struct gfs2_blkreserv *res;
+
+	if (ip->i_res)
+		return 0;
+
+	res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+	if (!res)
+		error = -ENOMEM;
 
 	down_write(&ip->i_rw_mutex);
-	if (!ip->i_res) {
-		ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-		if (!ip->i_res)
-			error = -ENOMEM;
-	}
+	if (ip->i_res)
+		kmem_cache_free(gfs2_rsrv_cachep, res);
+	else
+		ip->i_res = res;
 	up_write(&ip->i_rw_mutex);
 	return error;
 }
 
+static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+{
+	gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
+		       rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
+		       rs->rs_free);
+}
+
 /**
- * gfs2_rs_delete - delete a reservation
+ * __rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+static void __rs_deltree(struct gfs2_blkreserv *rs)
+{
+	struct gfs2_rgrpd *rgd;
+
+	if (!gfs2_rs_active(rs))
+		return;
+
+	rgd = rs->rs_rgd;
+	/* We can't do this: The reason is that when the rgrp is invalidated,
+	   it's in the "middle" of acquiring the glock, but the HOLDER bit
+	   isn't set yet:
+	   BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
+	trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
+
+	if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
+		rb_erase(&rs->rs_node, &rgd->rd_rstree);
+	BUG_ON(!rgd->rd_rs_cnt);
+	rgd->rd_rs_cnt--;
+
+	if (rs->rs_free) {
+		/* return reserved blocks to the rgrp and the ip */
+		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+		rs->rs_rgd->rd_reserved -= rs->rs_free;
+		rs->rs_free = 0;
+		clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+		smp_mb__after_clear_bit();
+	}
+	/* We can't change any of the step 1 or step 2 components of the rs.
+	   E.g. We can't set rs_rgd to NULL because the rgd glock is held and
+	   dequeued through this pointer.
+	   Can't: atomic_set(&rs->rs_sizehint, 0);
+	   Can't: rs->rs_requested = 0;
+	   Can't: rs->rs_rgd = NULL;*/
+	rs->rs_bi = NULL;
+	rs->rs_biblk = 0;
+}
+
+/**
+ * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+{
+	struct gfs2_rgrpd *rgd;
+
+	if (!gfs2_rs_active(rs))
+		return;
+
+	rgd = rs->rs_rgd;
+	spin_lock(&rgd->rd_rsspin);
+	__rs_deltree(rs);
+	spin_unlock(&rgd->rd_rsspin);
+}
+
+/**
+ * gfs2_rs_delete - delete a multi-block reservation
  * @ip: The inode for this reservation
  *
  */
@@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
 {
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res) {
+		gfs2_rs_deltree(ip->i_res);
+		trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
+		BUG_ON(ip->i_res->rs_free);
 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
 		ip->i_res = NULL;
 	}
 	up_write(&ip->i_rw_mutex);
 }
 
+/**
+ * return_all_reservations - return all reserved blocks back to the rgrp.
+ * @rgd: the rgrp that needs its space back
+ *
+ * We previously reserved a bunch of blocks for allocation. Now we need to
+ * give them back. This leave the reservation structures in tact, but removes
+ * all of their corresponding "no-fly zones".
+ */
+static void return_all_reservations(struct gfs2_rgrpd *rgd)
+{
+	struct rb_node *n;
+	struct gfs2_blkreserv *rs;
+
+	spin_lock(&rgd->rd_rsspin);
+	while ((n = rb_first(&rgd->rd_rstree))) {
+		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		__rs_deltree(rs);
+	}
+	spin_unlock(&rgd->rd_rsspin);
+}
+
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
 	struct rb_node *n;
@@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
 		gfs2_free_clones(rgd);
 		kfree(rgd->rd_bits);
+		return_all_reservations(rgd);
 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 	}
 }
@@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 	rgd->rd_data = be32_to_cpu(buf.ri_data);
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+	spin_lock_init(&rgd->rd_rsspin);
 
 	error = compute_bitstructs(rgd);
 	if (error)
@@ -1115,29 +1269,212 @@ out:
 }
 
 /**
+ * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
+ * @bi: the bitmap with the blocks
+ * @ip: the inode structure
+ * @biblk: the 32-bit block number relative to the start of the bitmap
+ * @amount: the number of blocks to reserve
+ *
+ * Returns: NULL - reservation was already taken, so not inserted
+ *          pointer to the inserted reservation
+ */
+static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
+				       struct gfs2_inode *ip, u32 biblk,
+				       int amount)
+{
+	struct rb_node **newn, *parent = NULL;
+	int rc;
+	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+
+	spin_lock(&rgd->rd_rsspin);
+	newn = &rgd->rd_rstree.rb_node;
+	BUG_ON(!ip->i_res);
+	BUG_ON(gfs2_rs_active(rs));
+	/* Figure out where to put new node */
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+	while (*newn) {
+		struct gfs2_blkreserv *cur =
+			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+
+		parent = *newn;
+		rc = rs_cmp(fsblock, amount, cur);
+		if (rc > 0)
+			newn = &((*newn)->rb_right);
+		else if (rc < 0)
+			newn = &((*newn)->rb_left);
+		else {
+			spin_unlock(&rgd->rd_rsspin);
+			return NULL; /* reservation already in use */
+		}
+	}
+
+	/* Do our reservation work */
+	rs = ip->i_res;
+	rs->rs_free = amount;
+	rs->rs_biblk = biblk;
+	rs->rs_bi = bi;
+	rb_link_node(&rs->rs_node, parent, newn);
+	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+
+	/* Do our inode accounting for the reservation */
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+
+	/* Do our rgrp accounting for the reservation */
+	rgd->rd_reserved += amount; /* blocks reserved */
+	rgd->rd_rs_cnt++; /* number of in-tree reservations */
+	spin_unlock(&rgd->rd_rsspin);
+	trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+	return rs;
+}
+
+/**
+ * unclaimed_blocks - return number of blocks that aren't spoken for
+ */
+static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
+{
+	return rgd->rd_free_clone - rgd->rd_reserved;
+}
+
+/**
+ * rg_mblk_search - find a group of multiple free blocks
+ * @rgd: the resource group descriptor
+ * @rs: the block reservation
+ * @ip: pointer to the inode for which we're reserving blocks
+ *
+ * This is very similar to rgblk_search, except we're looking for whole
+ * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
+ * on aligned dwords for speed's sake.
+ *
+ * Returns: 0 if successful or BFITNOENT if there isn't enough free space
+ */
+
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+{
+	struct gfs2_bitmap *bi = rgd->rd_bits;
+	const u32 length = rgd->rd_length;
+	u32 blk;
+	unsigned int buf, x, search_bytes;
+	u8 *buffer = NULL;
+	u8 *ptr, *end, *nonzero;
+	u32 goal, rsv_bytes;
+	struct gfs2_blkreserv *rs;
+	u32 best_rs_bytes, unclaimed;
+	int best_rs_blocks;
+
+	/* Find bitmap block that contains bits for goal block */
+	if (rgrp_contains_block(rgd, ip->i_goal))
+		goal = ip->i_goal - rgd->rd_data0;
+	else
+		goal = rgd->rd_last_alloc;
+	for (buf = 0; buf < length; buf++) {
+		bi = rgd->rd_bits + buf;
+		/* Convert scope of "goal" from rgrp-wide to within
+		   found bit block */
+		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+			goal -= bi->bi_start * GFS2_NBBY;
+			goto do_search;
+		}
+	}
+	buf = 0;
+	goal = 0;
+
+do_search:
+	best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
+			       (RGRP_RSRV_MINBLKS * rgd->rd_length));
+	best_rs_bytes = (best_rs_blocks *
+			 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
+		GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
+	best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
+	unclaimed = unclaimed_blocks(rgd);
+	if (best_rs_bytes * GFS2_NBBY > unclaimed)
+		best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
+
+	for (x = 0; x <= length; x++) {
+		bi = rgd->rd_bits + buf;
+
+		if (test_bit(GBF_FULL, &bi->bi_flags))
+			goto skip;
+
+		WARN_ON(!buffer_uptodate(bi->bi_bh));
+		if (bi->bi_clone)
+			buffer = bi->bi_clone + bi->bi_offset;
+		else
+			buffer = bi->bi_bh->b_data + bi->bi_offset;
+
+		/* We have to keep the reservations aligned on u64 boundaries
+		   otherwise we could get situations where a byte can't be
+		   used because it's after a reservation, but a free bit still
+		   is within the reservation's area. */
+		ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
+		end = (buffer + bi->bi_len);
+		while (ptr < end) {
+			rsv_bytes = 0;
+			if ((ptr + best_rs_bytes) <= end)
+				search_bytes = best_rs_bytes;
+			else
+				search_bytes = end - ptr;
+			BUG_ON(!search_bytes);
+			nonzero = memchr_inv(ptr, 0, search_bytes);
+			/* If the lot is all zeroes, reserve the whole size. If
+			   there's enough zeroes to satisfy the request, use
+			   what we can. If there's not enough, keep looking. */
+			if (nonzero == NULL)
+				rsv_bytes = search_bytes;
+			else if ((nonzero - ptr) * GFS2_NBBY >=
+				 ip->i_res->rs_requested)
+				rsv_bytes = (nonzero - ptr);
+
+			if (rsv_bytes) {
+				blk = ((ptr - buffer) * GFS2_NBBY);
+				BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
+				rs = rs_insert(bi, ip, blk,
+					       rsv_bytes * GFS2_NBBY);
+				if (IS_ERR(rs))
+					return PTR_ERR(rs);
+				if (rs)
+					return 0;
+			}
+			ptr += ALIGN(search_bytes, sizeof(u64));
+		}
+skip:
+		/* Try next bitmap block (wrap back to rgrp header
+		   if at end) */
+		buf++;
+		buf %= length;
+		goal = 0;
+	}
+
+	return BFITNOENT;
+}
+
+/**
  * try_rgrp_fit - See if a given reservation will fit in a given RG
  * @rgd: the RG data
  * @ip: the inode
  *
  * If there's room for the requested blocks to be allocated from the RG:
+ * This will try to get a multi-block reservation first, and if that doesn't
+ * fit, it will take what it can.
  *
  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
-static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-	const struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = ip->i_res;
 
 	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 		return 0;
-	if (rgd->rd_free_clone >= rs->rs_requested)
+	/* Look for a multi-block reservation. */
+	if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
+	    rg_mblk_search(rgd, ip) != BFITNOENT)
+		return 1;
+	if (unclaimed_blocks(rgd) >= rs->rs_requested)
 		return 1;
-	return 0;
-}
 
-static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
-{
-	return (bi->bi_start * GFS2_NBBY) + blk;
+	return 0;
 }
 
 /**
@@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_rgrpd *rgd, *begin = NULL;
+	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = ip->i_res;
 	int error = 0, rg_locked, flags = LM_FLAG_TRY;
 	u64 last_unlinked = NO_BLOCK;
@@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
-	rs = ip->i_res;
 	rs->rs_requested = requested;
 	if (gfs2_assert_warn(sdp, requested)) {
 		error = -EINVAL;
 		goto out;
 	}
-
-	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
-		rgd = begin = ip->i_rgd;
-	else
-		rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
-
-	if (rgd == NULL)
+	if (gfs2_rs_active(rs)) {
+		begin = rs->rs_rgd;
+		flags = 0; /* Yoda: Do or do not. There is no try */
+	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
+		rs->rs_rgd = begin = ip->i_rgd;
+	} else {
+		rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+	}
+	if (rs->rs_rgd == NULL)
 		return -EBADSLT;
 
 	while (loops < 3) {
 		rg_locked = 0;
 
-		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+		if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
 			rg_locked = 1;
 			error = 0;
+		} else if (!loops && !gfs2_rs_active(rs) &&
+			   rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+			/* If the rgrp already is maxed out for contenders,
+			   we can eliminate it as a "first pass" without even
+			   requesting the rgrp glock. */
+			error = GLR_TRYFAILED;
 		} else {
-			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-						   flags, &rs->rs_rgd_gh);
+			error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+						   LM_ST_EXCLUSIVE, flags,
+						   &rs->rs_rgd_gh);
 			if (!error && sdp->sd_args.ar_rgrplvb) {
-				error = update_rgrp_lvb(rgd);
+				error = update_rgrp_lvb(rs->rs_rgd);
 				if (error) {
 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 					return error;
@@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		}
 		switch (error) {
 		case 0:
-			if (try_rgrp_fit(rgd, ip)) {
+			if (gfs2_rs_active(rs)) {
+				if (unclaimed_blocks(rs->rs_rgd) +
+				    rs->rs_free >= rs->rs_requested) {
+					ip->i_rgd = rs->rs_rgd;
+					return 0;
+				}
+				/* We have a multi-block reservation, but the
+				   rgrp doesn't have enough free blocks to
+				   satisfy the request. Free the reservation
+				   and look for a suitable rgrp. */
+				gfs2_rs_deltree(rs);
+			}
+			if (try_rgrp_fit(rs->rs_rgd, ip)) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rgd);
-				ip->i_rgd = rgd;
+					gfs2_rgrp_bh_get(rs->rs_rgd);
+				ip->i_rgd = rs->rs_rgd;
 				return 0;
 			}
-			if (rgd->rd_flags & GFS2_RDF_CHECK) {
+			if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rgd);
-				try_rgrp_unlink(rgd, &last_unlinked,
+					gfs2_rgrp_bh_get(rs->rs_rgd);
+				try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
 						ip->i_no_addr);
 			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
-			rgd = gfs2_rgrpd_get_next(rgd);
-			rgd = rgd ? : begin; /* if NULL, wrap */
-			if (rgd != begin) /* If we didn't wrap */
+			rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
+			rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
+			if (rs->rs_rgd != begin) /* If we didn't wrap */
 				break;
 
 			flags &= ~LM_FLAG_TRY;
@@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
 
+	if (!rs)
+		return;
+
+	if (!rs->rs_free)
+		gfs2_rs_deltree(rs);
+
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 	rs->rs_requested = 0;
@@ -1413,7 +1776,27 @@ do_search:
 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
 			buffer = bi->bi_clone + bi->bi_offset;
 
-		biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+		while (1) {
+			struct gfs2_blkreserv *rs;
+			u32 rgblk;
+
+			biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+			if (biblk == BFITNOENT)
+				break;
+			/* Check if this block is reserved() */
+			rgblk = gfs2_bi2rgd_blk(bi, biblk);
+			rs = rs_find(rgd, rgblk);
+			if (rs == NULL)
+				break;
+
+			BUG_ON(rs->rs_bi != bi);
+			biblk = BFITNOENT;
+			/* This should jump to the first block after the
+			   reservation. */
+			goal = rs->rs_biblk + rs->rs_free;
+			if (goal >= bi->bi_len * GFS2_NBBY)
+				break;
+		}
 		if (biblk != BFITNOENT)
 			break;
 
@@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
 			     u32 blk, bool dinode, unsigned int *n)
 {
 	const unsigned int elen = *n;
-	u32 goal;
+	u32 goal, rgblk;
 	const u8 *buffer = NULL;
+	struct gfs2_blkreserv *rs;
 
 	*n = 0;
 	buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
 		goal++;
 		if (goal >= (bi->bi_len * GFS2_NBBY))
 			break;
+		rgblk = gfs2_bi2rgd_blk(bi, goal);
+		rs = rs_find(rgd, rgblk);
+		if (rs) /* Oops, we bumped into someone's reservation */
+			break;
 		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
 		    GFS2_BLKST_FREE)
 			break;
@@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 
 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 {
-	const struct gfs2_rgrpd *rgd = gl->gl_object;
+	struct gfs2_rgrpd *rgd = gl->gl_object;
+	struct gfs2_blkreserv *trs;
+	const struct rb_node *n;
+
 	if (rgd == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+		       rgd->rd_reserved);
+	spin_lock(&rgd->rd_rsspin);
+	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
+		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		dump_rs(seq, trs);
+	}
+	spin_unlock(&rgd->rd_rsspin);
 	return 0;
 }
 
@@ -1558,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 }
 
 /**
+ * claim_reserved_blks - Claim previously reserved blocks
+ * @ip: the inode that's claiming the reservation
+ * @dinode: 1 if this block is a dinode block, otherwise data block
+ * @nblocks: desired extent length
+ *
+ * Lay claim to previously allocated block reservation blocks.
+ * Returns: Starting block number of the blocks claimed.
+ * Sets *nblocks to the actual extent length allocated.
+ */
+static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
+			       unsigned int *nblocks)
+{
+	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_bitmap *bi;
+	u64 start_block = gfs2_rs_startblk(rs);
+	const unsigned int elen = *nblocks;
+
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+	gfs2_assert_withdraw(sdp, rgd);
+	/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+	bi = rs->rs_bi;
+	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+
+	for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
+		/* Make sure the bitmap hasn't changed */
+		gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
+			    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+		rs->rs_biblk++;
+		rs->rs_free--;
+
+		BUG_ON(!rgd->rd_reserved);
+		rgd->rd_reserved--;
+		dinode = false;
+		trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+	}
+
+	if (!rs->rs_free) {
+		struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+
+		gfs2_rs_deltree(rs);
+		/* -nblocks because we haven't returned to do the math yet.
+		   I'm doing the math backwards to prevent negative numbers,
+		   but think of it as:
+		   if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
+		if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
+			rg_mblk_search(rgd, ip);
+	}
+	return start_block;
+}
+
+/**
  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
  * @ip: the inode to allocate the block for
  * @bn: Used to return the starting block number
- * @ndata: requested number of blocks/extent length (value/result)
+ * @nblocks: requested number of blocks/extent length (value/result)
  * @dinode: 1 if we're allocating a dinode block, else 0
  * @generation: the generation number of the inode
  *
@@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	if (ip->i_res->rs_requested == 0)
 		return -ECANCELED;
 
-	rgd = ip->i_rgd;
-
-	if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-		goal = ip->i_goal - rgd->rd_data0;
-	else
-		goal = rgd->rd_last_alloc;
-
-	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+	/* Check if we have a multi-block reservation, and if so, claim the
+	   next free block from it. */
+	if (gfs2_rs_active(ip->i_res)) {
+		BUG_ON(!ip->i_res->rs_free);
+		rgd = ip->i_res->rs_rgd;
+		block = claim_reserved_blks(ip, dinode, nblocks);
+	} else {
+		rgd = ip->i_rgd;
 
-	/* Since all blocks are reserved in advance, this shouldn't happen */
-	if (blk == BFITNOENT)
-		goto rgrp_error;
+		if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
+			goal = ip->i_goal - rgd->rd_data0;
+		else
+			goal = rgd->rd_last_alloc;
+
+		blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+
+		/* Since all blocks are reserved in advance, this shouldn't
+		   happen */
+		if (blk == BFITNOENT) {
+			printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
+			       *nblocks);
+			printk(KERN_WARNING "FULL=%d\n",
+			       test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+			goto rgrp_error;
+		}
 
-	block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+		block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+	}
 	ndata = *nblocks;
 	if (dinode)
 		ndata--;
@@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 			brelse(dibh);
 		}
 	}
-	if (rgd->rd_free < *nblocks)
+	if (rgd->rd_free < *nblocks) {
+		printk(KERN_WARNING "nblocks=%u\n", *nblocks);
 		goto rgrp_error;
+	}
 
 	rgd->rd_free -= *nblocks;
 	if (dinode) {
@@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
 		for (x = 0; x < rlist->rl_rgrps; x++)
 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
 		kfree(rlist->rl_ghs);
+		rlist->rl_ghs = NULL;
 	}
 }
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8314d..ca6e267 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -13,6 +13,14 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+/* Since each block in the file system is represented by two bits in the
+ * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
+ * By reserving 32 blocks at a time, we can optimize / shortcut how we search
+ * through the bitmaps by looking a word at a time.
+ */
+#define RGRP_RSRV_MINBYTES 8
+#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
@@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
@@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
 extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
 extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
-/* This is how to tell if a reservation is "inplace" reserved: */
+/* This is how to tell if a multi-block reservation is "inplace" reserved: */
 static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
 {
 	if (ip->i_res && ip->i_res->rs_requested)
@@ -70,4 +81,22 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
 	return 0;
 }
 
+/* This is how to tell if a multi-block reservation is in the rgrp tree: */
+static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+{
+	if (rs && rs->rs_bi)
+		return 1;
+	return 0;
+}
+
+static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
+{
+	return (bi->bi_start * GFS2_NBBY) + blk;
+}
+
+static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
+{
+	return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
+}
+
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 7880687..b1502c4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		return error;
@@ -1550,6 +1554,9 @@ out_truncate:
 
 out_unlock:
 	/* Error path for case 1 */
+	if (gfs2_rs_active(ip->i_res))
+		gfs2_rs_deltree(ip->i_res);
+
 	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
 		gfs2_glock_dq(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 1b8b815..a25c252 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -14,6 +14,7 @@
 #include <linux/ktime.h>
 #include "incore.h"
 #include "glock.h"
+#include "rgrp.h"
 
 #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
 #define glock_trace_name(x) __print_symbolic(x,		\
@@ -31,6 +32,17 @@
 			    { GFS2_BLKST_DINODE, "dinode" },	\
 			    { GFS2_BLKST_UNLINKED, "unlinked" })
 
+#define TRACE_RS_DELETE  0
+#define TRACE_RS_TREEDEL 1
+#define TRACE_RS_INSERT  2
+#define TRACE_RS_CLAIM   3
+
+#define rs_func_name(x) __print_symbolic(x,	\
+					 { 0, "del " },	\
+					 { 1, "tdel" },	\
+					 { 2, "ins " },	\
+					 { 3, "clm " })
+
 #define show_glock_flags(flags) __print_flags(flags, "",	\
 	{(1UL << GLF_LOCK),			"l" },		\
 	{(1UL << GLF_DEMOTE),			"D" },		\
@@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(	u8,	block_state		)
 		__field(        u64,	rd_addr			)
 		__field(        u32,	rd_free_clone		)
+		__field(	u32,	rd_reserved		)
 	),
 
 	TP_fast_assign(
@@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->block_state	= block_state;
 		__entry->rd_addr	= rgd->rd_addr;
 		__entry->rd_free_clone	= rgd->rd_free_clone;
+		__entry->rd_reserved	= rgd->rd_reserved;
 	),
 
-	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone)
+		  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+);
+
+/* Keep track of multi-block reservations as they are allocated/freed */
+TRACE_EVENT(gfs2_rs,
+
+	TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
+		 u8 func),
+
+	TP_ARGS(ip, rs, func),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	rd_addr			)
+		__field(	u32,	rd_free_clone		)
+		__field(	u32,	rd_reserved		)
+		__field(	u64,	inum			)
+		__field(	u64,	start			)
+		__field(	u32,	free			)
+		__field(	u8,	func			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
+		__entry->rd_addr	= rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
+		__entry->rd_free_clone	= rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
+		__entry->rd_reserved	= rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
+		__entry->inum		= ip ? ip->i_no_addr : 0;
+		__entry->start		= gfs2_rs_startblk(rs);
+		__entry->free		= rs->rs_free;
+		__entry->func		= func;
+	),
+
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
+		  "f:%lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->start,
+		  (unsigned long long)__entry->rd_addr,
+		  (unsigned long)__entry->rd_free_clone,
+		  (unsigned long)__entry->rd_reserved,
+		  rs_func_name(__entry->func), (unsigned long)__entry->free)
 );
 
 #endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 523c0de..27a0b4a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 {
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		goto out_alloc;
@@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	struct buffer_head *dibh;
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_lock_check(ip);
 	if (error)
 		return error;
@@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
 	int error;
 
+	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
+	if (error)
+		return error;
+
 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
 	if (error)
 		return error;
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 16/16] GFS2: Eliminate 64-bit divides
  2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (14 preceding siblings ...)
  2012-07-23  8:01 ` [PATCH 15/16] GFS2: Reduce file fragmentation Steven Whitehouse
@ 2012-07-23  8:01 ` Steven Whitehouse
  15 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:01 UTC (permalink / raw)
  To: linux-kernel, cluster-devel; +Cc: Bob Peterson, Steven Whitehouse

From: Bob Peterson <rpeterso@redhat.com>

This patch removes the 64-bit divides introduced in the previous patch
in favor of shifting, so that it will compile properly on 32-bit machines.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9f94832..9aa6af1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -384,7 +384,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return ret;
 
 	atomic_set(&ip->i_res->rs_sizehint,
-		   PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
+		   PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
@@ -669,7 +669,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret)
 		return ret;
 
-	atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
+	atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
 	if (file->f_flags & O_APPEND) {
 		struct gfs2_holder gh;
 
@@ -795,7 +795,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (unlikely(error))
 		goto out_uninit;
 
-	atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
+	atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
 
 	while (len > 0) {
 		if (len < bytes)
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2014-10-08  9:53 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2014-10-08  9:53 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Not a huge amount this time... just four patches. This time we have a couple
of bug fixes, one relating to bad i_goal values which are now ignored (i_goal
is basically a hint so it is safe to so this) and another relating to the
saving of the dirent location during rename. There is one performance
improvement, which is an optimisation in rgblk_free so that multiple block
deallocations will now be more efficient, and one clean up patch to use
_RET_IP_ rather than writing it out longhand,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2014-06-03 11:02 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2014-06-03 11:02 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

This must be about the smallest merge window patch set ever for GFS2.
It is probably also the first one without a single patch from me. That
is down to a combination of factors, and I have some things in the works
that are not quite ready yet, that I hope to put in next time around.

Returning to what is here this time... we have 3 patches which fix
various warnings. Two are bug fixes (for quotas and also a
rare recovery race condition). The final patch, from Ben Marzinski,
is an important change in the freeze code which has been in
progress for some time. This removes the need to take and drop the
transaction lock for every single transaction, when the only time it
was used, was at file system freeze time. Ben's patch integrates the
freeze operation into the journal flush code as an alternative with
lower overheads and also lands up resolving some difficult to fix races
at the same time,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2014-04-01  9:15 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2014-04-01  9:15 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Here is the current content of the GFS2 -nmw tree for the
current merge window.

One of the main highlights this time, is not the patches themselves
but instead the widening contributor base. It is good to see that
interest is increasing in GFS2, and I'd like to thank all the
contributors to this patch set.

In addition to the usual set of bug fixes and clean ups, there are
patches to improve inode creation performance when xattrs are required
and some improvements to the transaction code which is intended to help
improve scalability after further changes in due course. Journal extent
mapping is also updated to make it more efficient and again, this is a
foundation for future work in this area.

The maximum number of ACLs has been increased to 300 (for a 4k block size)
which means that even with a few additional xattrs from selinux,
everything should fit within a single fs block. There is also a patch
to bring GFS2's own copy of the writepages code up to the same level as
the core VFS. Eventually we may be able to merge some of this code, since
it is fairly similar.

The other major change this time, is bringing consistency to the printing
of messages via fs_<level>, pr_<level> macros. 

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2014-01-20 12:23 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2014-01-20 12:23 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Here are the pending patches for the merge window which are currently
in the GFS2 tree.

The main topics this time are allocation, in the form of Bob's
improvements when searching resource groups and several updates
to quotas which should increase scalability. The quota changes
follow on from those in the last merge window, and there will
likely be further work to come in this area in due course.

There are also a few patches which help to improve efficiency
of adding entries into directories, and clean up some of that
code.

One on-disk change is included this time, which is to write some
additional information which should be useful to fsck and
also potentially for debugging.

Other than that, its just a few small random bug fixes and
clean ups,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2013-11-04 11:09 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2013-11-04 11:09 UTC (permalink / raw)
  To: cluster-devel, linux-kernel

Hi,

I'm just back from firstly Edinburgh, and secondly holiday, and the
merge window is again upon us. I've added in the three pending patches
which were under test while I was away and then that should be it for
this time.

The main feature of interest this time is quota updates. There are
some clean ups and some patches to use the new generic lru list
code. There is still plenty of scope for some further changes in
due course - faster lookups of quota structures is very much
on the todo list. Also, a start has been made towards the more tricky
issue of using the generic lru code with glocks, but that will
have to be completed in a subsequent merge window.

The other, more minor feature, is that there have been a number of
performance patches which relate to block allocation. In particular
they will improve performance when the disk is nearly full,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2 Pre-pull patch posting (merge window)
@ 2013-09-05  9:02 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2013-09-05  9:02 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

This is the smallest merge window patch set for GFS2 for quite
some time. Only one of the patches (moving gfs2_sync_meta) is
a non-bug fix patch, although the merge ordered and writeback
writepage patch is also a nice clean up.

A couple of the patches are quite recently added, due to my only
having recently returned from holiday, so I'll give them a couple
of extra days in -next before sending the pull request.

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2013-07-01  9:33 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2013-07-01  9:33 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

There are a few bug fixes for various, mostly very minor corner
cases, plus some interesting new features. The new features
include atomic_open whose main benefit will be the reduction in
locking overhead in case of combined lookup/create and open operations,
sorting the log buffer lists by block number to improve the efficiency
of AIL writeback, and agressively issuing revokes in gfs2_log_flush
to reduce overhead when dropping glocks,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2013-04-26  9:18 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2013-04-26  9:18 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Since the merge window is coming up soon, I'm posting the content of
the GFS2 -nmw tree as usual. There is not a whole lot of change this
time - there are some further changes which are in the works, but those
will be held over until next time.

Here there are some clean ups to inode creation, the addition of an
origin (local or remote) indicator to glock demote requests, removal
of one of the remaining GFP_NOFAIL allocations during log flushes,
one minor clean up, and a one liner bug fix,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2013-02-19 10:07 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2013-02-19 10:07 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

This is one of the smallest collections of patches for the merge
window for some time. There are some clean ups relating to the
transaction code and the shrinker, which are mostly in preparation
for further development, but also make the code much easier to
follow in these areas.

There is a patch which allows the use of ->writepages even in the
default ordered write mode for all writebacks. This results in
sending larger i/os to the block layer, and a subsequent increase
in performance. It also reduces the number of different i/o paths
by one.

There is also a bug fix reinstating the withdraw ack system which
somehow got lost when the lock modules were merged into GFS2.

And thats all this time around,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2012-11-30  9:52 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-11-30  9:52 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

So yes, this is a bit early, but the tree seems to have settled down
now, and I'd like to hold off any further feature patches until the
subsequent merge window at this stage.

The main feature this time is the new Orlov allocator and the patches
leading up to it which allow us to allocate new inodes from their own
allocation context, rather than borrowing that of their parent directory.
It is this change which then allows us to choose a different location
for subdirectories when required. This works exactly as per the ext3
implementation from the users point of view.

In addition to that, we've got a speed up in gfs2_rbm_from_block()
from Bob Peterson, three locking related improvements from Dave
Teigland plus a selection of smaller bug fixes and clean ups.

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2012-09-26  8:25 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-09-26  8:25 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

We've collected up a goodly number of patches in the -nmw tree now
and we can hold off any further changes until the following merge
window, so here is the current tree content.

The major feature this time is the "rbm" conversion in the resource
group code. The new struct gfs2_rbm specifies the location of an
allocatable block in (resource group, bitmap, offset) form. There
are a number of added helper functions, and later patches then
rewrite some of the resource group code in terms of this new
structure. Not only does this give us a nice code clean up, but
it also removes some of the previous restructions where extents
could not cross bitmap boundaries, for example.

In addition to that, there are a few bug fixes and clean ups, but
the rbm work is by far the majority of this patch set in terms of
number of changed lines.

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2012-05-17 12:23 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-05-17 12:23 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Since the merge window appears to be fast approaching, here are the
current GFS2 patches. This time there are two main themes, one is
updates to the log code, mostly on the writing side. The other is
preparation for some block reservation work which will probably
land in the subsequent merge window.

There is of course the usual collection of cleanup and bug fixes
as well. See the individual patches for the detailed descriptions,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-04-02 15:35                   ` Randy Dunlap
@ 2012-04-02 15:47                     ` Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-04-02 15:47 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Benjamin Poirier, David Teigland, linux-kernel, cluster-devel

Hi,

On Mon, 2012-04-02 at 08:35 -0700, Randy Dunlap wrote:
> On 03/26/2012 03:44 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Fri, 2012-03-23 at 18:06 -0400, Benjamin Poirier wrote:
> > [snip]
> >>
> >> Instead of trying to select everything in GFS2, how about doing it this way?
> >>
> >> [PATCH] gfs2: use depends instead of select in kconfig
> >>
> >> Avoids having to duplicate the dependencies of what is 'select'ed (and on
> >> down...)
> >>
> >> Those dependencies are currently incomplete, leading to broken builds with
> >> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
> >>
> >> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
> >> ---
> >>  fs/gfs2/Kconfig |    7 ++-----
> >>  1 files changed, 2 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> >> index c465ae0..eb08c9e 100644
> >> --- a/fs/gfs2/Kconfig
> >> +++ b/fs/gfs2/Kconfig
> >> @@ -1,10 +1,6 @@
> >>  config GFS2_FS
> >>  	tristate "GFS2 file system support"
> >>  	depends on (64BIT || LBDAF)
> >> -	select DLM if GFS2_FS_LOCKING_DLM
> >> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >> -	select SYSFS if GFS2_FS_LOCKING_DLM
> >> -	select IP_SCTP if DLM_SCTP
> >>  	select FS_POSIX_ACL
> >>  	select CRC32
> >>  	select QUOTACTL
> >> @@ -29,7 +25,8 @@ config GFS2_FS
> >>  
> >>  config GFS2_FS_LOCKING_DLM
> >>  	bool "GFS2 DLM locking"
> >> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
> >> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> >> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
> >>  	help
> >>  	  Multiple node locking module for GFS2
> >>  
> > 
> > That looks ok to me. I've put it in the GFS2 -fixes tree, and if
> > everybody is happy with that I'll send a pull request shortly,
> 
> 
> Can we get Benjamin's patch merged, please?
> linux-next is still having build errors without it.
> 

It is in the GFS2 -nmw tree now, so it will be in linux-next shortly.
I'll merge up the -fixes tree shortly, but I'm expecting one more patch
for that very shortly,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-26 10:44                 ` Steven Whitehouse
@ 2012-04-02 15:35                   ` Randy Dunlap
  2012-04-02 15:47                     ` Steven Whitehouse
  0 siblings, 1 reply; 51+ messages in thread
From: Randy Dunlap @ 2012-04-02 15:35 UTC (permalink / raw)
  To: Steven Whitehouse
  Cc: Benjamin Poirier, David Teigland, linux-kernel, cluster-devel

On 03/26/2012 03:44 AM, Steven Whitehouse wrote:

> Hi,
> 
> On Fri, 2012-03-23 at 18:06 -0400, Benjamin Poirier wrote:
> [snip]
>>
>> Instead of trying to select everything in GFS2, how about doing it this way?
>>
>> [PATCH] gfs2: use depends instead of select in kconfig
>>
>> Avoids having to duplicate the dependencies of what is 'select'ed (and on
>> down...)
>>
>> Those dependencies are currently incomplete, leading to broken builds with
>> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
>>
>> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
>> ---
>>  fs/gfs2/Kconfig |    7 ++-----
>>  1 files changed, 2 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
>> index c465ae0..eb08c9e 100644
>> --- a/fs/gfs2/Kconfig
>> +++ b/fs/gfs2/Kconfig
>> @@ -1,10 +1,6 @@
>>  config GFS2_FS
>>  	tristate "GFS2 file system support"
>>  	depends on (64BIT || LBDAF)
>> -	select DLM if GFS2_FS_LOCKING_DLM
>> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
>> -	select SYSFS if GFS2_FS_LOCKING_DLM
>> -	select IP_SCTP if DLM_SCTP
>>  	select FS_POSIX_ACL
>>  	select CRC32
>>  	select QUOTACTL
>> @@ -29,7 +25,8 @@ config GFS2_FS
>>  
>>  config GFS2_FS_LOCKING_DLM
>>  	bool "GFS2 DLM locking"
>> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
>> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
>> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
>>  	help
>>  	  Multiple node locking module for GFS2
>>  
> 
> That looks ok to me. I've put it in the GFS2 -fixes tree, and if
> everybody is happy with that I'll send a pull request shortly,


Can we get Benjamin's patch merged, please?
linux-next is still having build errors without it.

-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 22:06               ` Benjamin Poirier
  2012-03-23 22:48                 ` Randy Dunlap
@ 2012-03-26 10:44                 ` Steven Whitehouse
  2012-04-02 15:35                   ` Randy Dunlap
  1 sibling, 1 reply; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-26 10:44 UTC (permalink / raw)
  To: Benjamin Poirier
  Cc: David Teigland, Randy Dunlap, linux-kernel, cluster-devel

Hi,

On Fri, 2012-03-23 at 18:06 -0400, Benjamin Poirier wrote:
[snip]
> 
> Instead of trying to select everything in GFS2, how about doing it this way?
> 
> [PATCH] gfs2: use depends instead of select in kconfig
> 
> Avoids having to duplicate the dependencies of what is 'select'ed (and on
> down...)
> 
> Those dependencies are currently incomplete, leading to broken builds with
> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
> 
> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
> ---
>  fs/gfs2/Kconfig |    7 ++-----
>  1 files changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index c465ae0..eb08c9e 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -1,10 +1,6 @@
>  config GFS2_FS
>  	tristate "GFS2 file system support"
>  	depends on (64BIT || LBDAF)
> -	select DLM if GFS2_FS_LOCKING_DLM
> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> -	select SYSFS if GFS2_FS_LOCKING_DLM
> -	select IP_SCTP if DLM_SCTP
>  	select FS_POSIX_ACL
>  	select CRC32
>  	select QUOTACTL
> @@ -29,7 +25,8 @@ config GFS2_FS
>  
>  config GFS2_FS_LOCKING_DLM
>  	bool "GFS2 DLM locking"
> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
>  	help
>  	  Multiple node locking module for GFS2
>  

That looks ok to me. I've put it in the GFS2 -fixes tree, and if
everybody is happy with that I'll send a pull request shortly,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 22:06               ` Benjamin Poirier
@ 2012-03-23 22:48                 ` Randy Dunlap
  2012-03-26 10:44                 ` Steven Whitehouse
  1 sibling, 0 replies; 51+ messages in thread
From: Randy Dunlap @ 2012-03-23 22:48 UTC (permalink / raw)
  To: Benjamin Poirier
  Cc: David Teigland, Steven Whitehouse, linux-kernel, cluster-devel

On 03/23/2012 03:06 PM, Benjamin Poirier wrote:

> On 2012/03/23 16:18, David Teigland wrote:
>> On Fri, Mar 23, 2012 at 01:06:05PM -0700, Randy Dunlap wrote:
>>>>> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
>>>>> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
>>>>> used anywhere else in the kernel tree AFAICT.
>>>>> DLM just always selects IP_SCTP.
>>>>
>>>> Here's what we have now:
>>>>
>>>> config GFS2_FS
>>>>         tristate "GFS2 file system support"
>>>>         depends on (64BIT || LBDAF)
>>>>         select DLM if GFS2_FS_LOCKING_DLM
>>>>         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
>>>>         select SYSFS if GFS2_FS_LOCKING_DLM
>>>>         select IP_SCTP if DLM_SCTP
>>>>         select FS_POSIX_ACL
>>>>         select CRC32
>>>>         select QUOTACTL
>>>>
>>>> menuconfig DLM
>>>>         tristate "Distributed Lock Manager (DLM)"
>>>>         depends on EXPERIMENTAL && INET
>>>>         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
>>>>         select IP_SCTP
>>>>
>>>> Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
>>>> just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
>>>> vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
>>>> possibly be undefined if we're selecting SCTP.
>>>
>>> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
>>> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
>>> in GFS2 is meaningless since there is no DLM_SCTP.
>>
>> https://lkml.org/lkml/2012/3/8/222 seems to have caused this by adding
>> the new dependency on the sctp module without any Kconfig changes.
>>
>> Should that patch have added depends IP_SCTP to the dlm and gfs2?
>>
> 
> Instead of trying to select everything in GFS2, how about doing it this way?
> 
> [PATCH] gfs2: use depends instead of select in kconfig
> 
> Avoids having to duplicate the dependencies of what is 'select'ed (and on
> down...)
> 
> Those dependencies are currently incomplete, leading to broken builds with
> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
> 
> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>


That seems to work for me.  Thanks.

Acked-by: Randy Dunlap <rdunlap@xenotime.net>


> ---
>  fs/gfs2/Kconfig |    7 ++-----
>  1 files changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index c465ae0..eb08c9e 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -1,10 +1,6 @@
>  config GFS2_FS
>  	tristate "GFS2 file system support"
>  	depends on (64BIT || LBDAF)
> -	select DLM if GFS2_FS_LOCKING_DLM
> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> -	select SYSFS if GFS2_FS_LOCKING_DLM
> -	select IP_SCTP if DLM_SCTP
>  	select FS_POSIX_ACL
>  	select CRC32
>  	select QUOTACTL
> @@ -29,7 +25,8 @@ config GFS2_FS
>  
>  config GFS2_FS_LOCKING_DLM
>  	bool "GFS2 DLM locking"
> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
>  	help
>  	  Multiple node locking module for GFS2
>  



-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 20:18             ` David Teigland
  2012-03-23 22:06               ` Benjamin Poirier
@ 2012-03-23 22:06               ` Randy Dunlap
  1 sibling, 0 replies; 51+ messages in thread
From: Randy Dunlap @ 2012-03-23 22:06 UTC (permalink / raw)
  To: David Teigland; +Cc: Steven Whitehouse, linux-kernel, cluster-devel, bpoirier

On 03/23/2012 01:18 PM, David Teigland wrote:

> On Fri, Mar 23, 2012 at 01:06:05PM -0700, Randy Dunlap wrote:
>>>> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
>>>> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
>>>> used anywhere else in the kernel tree AFAICT.
>>>> DLM just always selects IP_SCTP.
>>>
>>> Here's what we have now:
>>>
>>> config GFS2_FS
>>>         tristate "GFS2 file system support"
>>>         depends on (64BIT || LBDAF)
>>>         select DLM if GFS2_FS_LOCKING_DLM
>>>         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
>>>         select SYSFS if GFS2_FS_LOCKING_DLM
>>>         select IP_SCTP if DLM_SCTP
>>>         select FS_POSIX_ACL
>>>         select CRC32
>>>         select QUOTACTL
>>>
>>> menuconfig DLM
>>>         tristate "Distributed Lock Manager (DLM)"
>>>         depends on EXPERIMENTAL && INET
>>>         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
>>>         select IP_SCTP
>>>
>>> Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
>>> just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
>>> vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
>>> possibly be undefined if we're selecting SCTP.
>>
>> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
>> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
>> in GFS2 is meaningless since there is no DLM_SCTP.
> 
> https://lkml.org/lkml/2012/3/8/222 seems to have caused this by adding
> the new dependency on the sctp module without any Kconfig changes.

bad URL?  I don't see how that patch affects this area at all.

> Should that patch have added depends IP_SCTP to the dlm and gfs2?

Sounds reasonable (but I haven't seen the patch).


-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 20:18             ` David Teigland
@ 2012-03-23 22:06               ` Benjamin Poirier
  2012-03-23 22:48                 ` Randy Dunlap
  2012-03-26 10:44                 ` Steven Whitehouse
  2012-03-23 22:06               ` Randy Dunlap
  1 sibling, 2 replies; 51+ messages in thread
From: Benjamin Poirier @ 2012-03-23 22:06 UTC (permalink / raw)
  To: David Teigland
  Cc: Randy Dunlap, Steven Whitehouse, linux-kernel, cluster-devel

On 2012/03/23 16:18, David Teigland wrote:
> On Fri, Mar 23, 2012 at 01:06:05PM -0700, Randy Dunlap wrote:
> > >> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> > >> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> > >> used anywhere else in the kernel tree AFAICT.
> > >> DLM just always selects IP_SCTP.
> > > 
> > > Here's what we have now:
> > > 
> > > config GFS2_FS
> > >         tristate "GFS2 file system support"
> > >         depends on (64BIT || LBDAF)
> > >         select DLM if GFS2_FS_LOCKING_DLM
> > >         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> > >         select SYSFS if GFS2_FS_LOCKING_DLM
> > >         select IP_SCTP if DLM_SCTP
> > >         select FS_POSIX_ACL
> > >         select CRC32
> > >         select QUOTACTL
> > > 
> > > menuconfig DLM
> > >         tristate "Distributed Lock Manager (DLM)"
> > >         depends on EXPERIMENTAL && INET
> > >         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> > >         select IP_SCTP
> > > 
> > > Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> > > just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> > > vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> > > possibly be undefined if we're selecting SCTP.
> > 
> > What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
> > don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
> > in GFS2 is meaningless since there is no DLM_SCTP.
> 
> https://lkml.org/lkml/2012/3/8/222 seems to have caused this by adding
> the new dependency on the sctp module without any Kconfig changes.
> 
> Should that patch have added depends IP_SCTP to the dlm and gfs2?
> 

Instead of trying to select everything in GFS2, how about doing it this way?

[PATCH] gfs2: use depends instead of select in kconfig

Avoids having to duplicate the dependencies of what is 'select'ed (and on
down...)

Those dependencies are currently incomplete, leading to broken builds with
GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
---
 fs/gfs2/Kconfig |    7 ++-----
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index c465ae0..eb08c9e 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,10 +1,6 @@
 config GFS2_FS
 	tristate "GFS2 file system support"
 	depends on (64BIT || LBDAF)
-	select DLM if GFS2_FS_LOCKING_DLM
-	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
-	select SYSFS if GFS2_FS_LOCKING_DLM
-	select IP_SCTP if DLM_SCTP
 	select FS_POSIX_ACL
 	select CRC32
 	select QUOTACTL
@@ -29,7 +25,8 @@ config GFS2_FS
 
 config GFS2_FS_LOCKING_DLM
 	bool "GFS2 DLM locking"
-	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
+	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
+		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
 	help
 	  Multiple node locking module for GFS2
 

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 20:06           ` Randy Dunlap
  2012-03-23 20:09             ` Steven Whitehouse
@ 2012-03-23 20:18             ` David Teigland
  2012-03-23 22:06               ` Benjamin Poirier
  2012-03-23 22:06               ` Randy Dunlap
  1 sibling, 2 replies; 51+ messages in thread
From: David Teigland @ 2012-03-23 20:18 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Steven Whitehouse, linux-kernel, cluster-devel, bpoirier

On Fri, Mar 23, 2012 at 01:06:05PM -0700, Randy Dunlap wrote:
> >> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> >> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> >> used anywhere else in the kernel tree AFAICT.
> >> DLM just always selects IP_SCTP.
> > 
> > Here's what we have now:
> > 
> > config GFS2_FS
> >         tristate "GFS2 file system support"
> >         depends on (64BIT || LBDAF)
> >         select DLM if GFS2_FS_LOCKING_DLM
> >         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >         select SYSFS if GFS2_FS_LOCKING_DLM
> >         select IP_SCTP if DLM_SCTP
> >         select FS_POSIX_ACL
> >         select CRC32
> >         select QUOTACTL
> > 
> > menuconfig DLM
> >         tristate "Distributed Lock Manager (DLM)"
> >         depends on EXPERIMENTAL && INET
> >         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> >         select IP_SCTP
> > 
> > Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> > just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> > vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> > possibly be undefined if we're selecting SCTP.
> 
> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
> in GFS2 is meaningless since there is no DLM_SCTP.

https://lkml.org/lkml/2012/3/8/222 seems to have caused this by adding
the new dependency on the sctp module without any Kconfig changes.

Should that patch have added depends IP_SCTP to the dlm and gfs2?


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 20:06           ` Randy Dunlap
@ 2012-03-23 20:09             ` Steven Whitehouse
  2012-03-23 20:18             ` David Teigland
  1 sibling, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-23 20:09 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: David Teigland, linux-kernel, cluster-devel

Hi,

On Fri, 2012-03-23 at 13:06 -0700, Randy Dunlap wrote:
> On 03/23/2012 12:41 PM, David Teigland wrote:
> 
> > 
> >> on i386:
> >>
> >> ERROR: "sctp_do_peeloff" [fs/dlm/dlm.ko] undefined!
> >>
> >>
> >> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> >> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> >> used anywhere else in the kernel tree AFAICT.
> >> DLM just always selects IP_SCTP.
> > 
> > Here's what we have now:
> > 
> > config GFS2_FS
> >         tristate "GFS2 file system support"
> >         depends on (64BIT || LBDAF)
> >         select DLM if GFS2_FS_LOCKING_DLM
> >         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >         select SYSFS if GFS2_FS_LOCKING_DLM
> >         select IP_SCTP if DLM_SCTP
> >         select FS_POSIX_ACL
> >         select CRC32
> >         select QUOTACTL
> > 
> > menuconfig DLM
> >         tristate "Distributed Lock Manager (DLM)"
> >         depends on EXPERIMENTAL && INET
> >         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> >         select IP_SCTP
> > 
> > Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> > just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> > vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> > possibly be undefined if we're selecting SCTP.
> 
> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
> in GFS2 is meaningless since there is no DLM_SCTP.
> 
> I just verified that the (posted) failing config still fails with
> today's linux-next.
> 

The DLM_SCTP is historical. There used to be such a thing, but that
config option went away, and there is now run time selection of the DLM
transport. So that the GFS2 Kconfig should have been updated, however
that appears not to be enough on its own to resolve the issue,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 19:41         ` David Teigland
  2012-03-23 19:46           ` David Miller
@ 2012-03-23 20:06           ` Randy Dunlap
  2012-03-23 20:09             ` Steven Whitehouse
  2012-03-23 20:18             ` David Teigland
  1 sibling, 2 replies; 51+ messages in thread
From: Randy Dunlap @ 2012-03-23 20:06 UTC (permalink / raw)
  To: David Teigland; +Cc: Steven Whitehouse, linux-kernel, cluster-devel

On 03/23/2012 12:41 PM, David Teigland wrote:

> 
>> on i386:
>>
>> ERROR: "sctp_do_peeloff" [fs/dlm/dlm.ko] undefined!
>>
>>
>> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
>> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
>> used anywhere else in the kernel tree AFAICT.
>> DLM just always selects IP_SCTP.
> 
> Here's what we have now:
> 
> config GFS2_FS
>         tristate "GFS2 file system support"
>         depends on (64BIT || LBDAF)
>         select DLM if GFS2_FS_LOCKING_DLM
>         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
>         select SYSFS if GFS2_FS_LOCKING_DLM
>         select IP_SCTP if DLM_SCTP
>         select FS_POSIX_ACL
>         select CRC32
>         select QUOTACTL
> 
> menuconfig DLM
>         tristate "Distributed Lock Manager (DLM)"
>         depends on EXPERIMENTAL && INET
>         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
>         select IP_SCTP
> 
> Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> possibly be undefined if we're selecting SCTP.

What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
in GFS2 is meaningless since there is no DLM_SCTP.

I just verified that the (posted) failing config still fails with
today's linux-next.

-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-23 19:41         ` David Teigland
@ 2012-03-23 19:46           ` David Miller
  2012-03-23 20:06           ` Randy Dunlap
  1 sibling, 0 replies; 51+ messages in thread
From: David Miller @ 2012-03-23 19:46 UTC (permalink / raw)
  To: teigland; +Cc: swhiteho, rdunlap, linux-kernel, cluster-devel

From: David Teigland <teigland@redhat.com>
Date: Fri, 23 Mar 2012 15:41:52 -0400

> Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> possibly be undefined if we're selecting SCTP.

GFS2=y SCTP=m

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 15:34       ` Steven Whitehouse
@ 2012-03-23 19:41         ` David Teigland
  2012-03-23 19:46           ` David Miller
  2012-03-23 20:06           ` Randy Dunlap
  0 siblings, 2 replies; 51+ messages in thread
From: David Teigland @ 2012-03-23 19:41 UTC (permalink / raw)
  To: Steven Whitehouse; +Cc: Randy Dunlap, linux-kernel, cluster-devel


> on i386:
>
> ERROR: "sctp_do_peeloff" [fs/dlm/dlm.ko] undefined!
>
>
> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> used anywhere else in the kernel tree AFAICT.
> DLM just always selects IP_SCTP.

Here's what we have now:

config GFS2_FS
        tristate "GFS2 file system support"
        depends on (64BIT || LBDAF)
        select DLM if GFS2_FS_LOCKING_DLM
        select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
        select SYSFS if GFS2_FS_LOCKING_DLM
        select IP_SCTP if DLM_SCTP
        select FS_POSIX_ACL
        select CRC32
        select QUOTACTL

menuconfig DLM
        tristate "Distributed Lock Manager (DLM)"
        depends on EXPERIMENTAL && INET
        depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
        select IP_SCTP

Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
possibly be undefined if we're selecting SCTP.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 15:18     ` Randy Dunlap
  2012-03-19 15:34       ` Steven Whitehouse
@ 2012-03-20  9:47       ` Steven Whitehouse
  1 sibling, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-20  9:47 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: linux-kernel, cluster-devel, teigland

Hi,

On Mon, 2012-03-19 at 08:18 -0700, Randy Dunlap wrote:
> On 03/19/2012 07:59 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> >> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> >>
> >>> Hi,
> >>>
> >>> Not a huge number of patches this time. Some notable new features
> >>> though:
> >>>  - Glock stats gathering (v. useful for performance analysis)
> >>>  - FITRIM ioctl support
> >>>  - Sorting the ordered write list (big performance increase when the workload
> >>>    doesn't result in the write requests being nicely ordered to start with)
> >>>
> >>> Plus a few clean ups, and bug fixes in addition,
> >>
> >>
> >>
> >> Hi,
> >>
> >> I reported a build error in linux-next 20120313, but it appears
> >> that mainline also needs the fix (when it's ready) since mainline
> >> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> >>
> >> https://lkml.org/lkml/2012/3/13/456
> >>
> > 
> > Does the following fix the problem? If so then I'll roll that into the
> > tree before it gets pushed,
> > 
> 
> No, that's not sufficient:
> 
> warning: (GFS2_FS) selects DLM which has unmet direct dependencies (EXPERIMENTAL && INET && SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n))
> warning: (DLM && GFS2_FS) selects IP_SCTP which has unmet direct dependencies (NET && INET && EXPERIMENTAL && (IPV6 || IPV6=n))
> 
> and
> 
> ERROR: "crc32c" [net/sctp/sctp.ko] undefined!
> 
> 
Since the pending patch set doesn't affect the Kconfig at all, I don't
think that this issue needs to hold up merging the GFS2 tree. We'll
follow up with a fix for this later on,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 15:18     ` Randy Dunlap
@ 2012-03-19 15:34       ` Steven Whitehouse
  2012-03-23 19:41         ` David Teigland
  2012-03-20  9:47       ` Steven Whitehouse
  1 sibling, 1 reply; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-19 15:34 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: linux-kernel, cluster-devel, teigland

Hi,

On Mon, 2012-03-19 at 08:18 -0700, Randy Dunlap wrote:
> On 03/19/2012 07:59 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> >> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> >>
> >>> Hi,
> >>>
> >>> Not a huge number of patches this time. Some notable new features
> >>> though:
> >>>  - Glock stats gathering (v. useful for performance analysis)
> >>>  - FITRIM ioctl support
> >>>  - Sorting the ordered write list (big performance increase when the workload
> >>>    doesn't result in the write requests being nicely ordered to start with)
> >>>
> >>> Plus a few clean ups, and bug fixes in addition,
> >>
> >>
> >>
> >> Hi,
> >>
> >> I reported a build error in linux-next 20120313, but it appears
> >> that mainline also needs the fix (when it's ready) since mainline
> >> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> >>
> >> https://lkml.org/lkml/2012/3/13/456
> >>
> > 
> > Does the following fix the problem? If so then I'll roll that into the
> > tree before it gets pushed,
> > 
> 
> No, that's not sufficient:
> 
> warning: (GFS2_FS) selects DLM which has unmet direct dependencies (EXPERIMENTAL && INET && SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n))
> warning: (DLM && GFS2_FS) selects IP_SCTP which has unmet direct dependencies (NET && INET && EXPERIMENTAL && (IPV6 || IPV6=n))
> 
> and
> 
> ERROR: "crc32c" [net/sctp/sctp.ko] undefined!
> 
> 
Hmm, ok. I'll look at this again. I'm not sure why DLM is still calling
itself EXPERIMENTAL since thats long since not been the case, maybe SCTP
still is, but I don't think GFS2 should be selecting EXPERIMENTAL
directly, anyway. It is rather easy to tie ones' self in knots with this
config language.... since GFS2_FS_LOCKING_DLM depends on NET && INET &&
(IPV6 || IPV6=n) && HOTPLUG then all those other deps must presumably be
set anyway, so I don't understand quite why DLM doesn't have those
available to it.

I'll dig around a bit and see if I can figure out whats going on here,

Steve.


> 
> 
> > 
> > diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> > index c465ae0..f4e1c60 100644
> > --- a/fs/gfs2/Kconfig
> > +++ b/fs/gfs2/Kconfig
> > @@ -4,7 +4,7 @@ config GFS2_FS
> >  	select DLM if GFS2_FS_LOCKING_DLM
> >  	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >  	select SYSFS if GFS2_FS_LOCKING_DLM
> > -	select IP_SCTP if DLM_SCTP
> > +	select IP_SCTP if GFS2_FS_LOCKING_DLM
> >  	select FS_POSIX_ACL
> >  	select CRC32
> >  	select QUOTACTL
> > 
> > 
> 
> 
> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 14:59   ` Steven Whitehouse
@ 2012-03-19 15:18     ` Randy Dunlap
  2012-03-19 15:34       ` Steven Whitehouse
  2012-03-20  9:47       ` Steven Whitehouse
  0 siblings, 2 replies; 51+ messages in thread
From: Randy Dunlap @ 2012-03-19 15:18 UTC (permalink / raw)
  To: Steven Whitehouse; +Cc: linux-kernel, cluster-devel, teigland

On 03/19/2012 07:59 AM, Steven Whitehouse wrote:

> Hi,
> 
> On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
>> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
>>
>>> Hi,
>>>
>>> Not a huge number of patches this time. Some notable new features
>>> though:
>>>  - Glock stats gathering (v. useful for performance analysis)
>>>  - FITRIM ioctl support
>>>  - Sorting the ordered write list (big performance increase when the workload
>>>    doesn't result in the write requests being nicely ordered to start with)
>>>
>>> Plus a few clean ups, and bug fixes in addition,
>>
>>
>>
>> Hi,
>>
>> I reported a build error in linux-next 20120313, but it appears
>> that mainline also needs the fix (when it's ready) since mainline
>> gfs2 Kconfig selects DLM_SCTP, which does not exist.
>>
>> https://lkml.org/lkml/2012/3/13/456
>>
> 
> Does the following fix the problem? If so then I'll roll that into the
> tree before it gets pushed,
> 

No, that's not sufficient:

warning: (GFS2_FS) selects DLM which has unmet direct dependencies (EXPERIMENTAL && INET && SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n))
warning: (DLM && GFS2_FS) selects IP_SCTP which has unmet direct dependencies (NET && INET && EXPERIMENTAL && (IPV6 || IPV6=n))

and

ERROR: "crc32c" [net/sctp/sctp.ko] undefined!




> 
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index c465ae0..f4e1c60 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -4,7 +4,7 @@ config GFS2_FS
>  	select DLM if GFS2_FS_LOCKING_DLM
>  	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
>  	select SYSFS if GFS2_FS_LOCKING_DLM
> -	select IP_SCTP if DLM_SCTP
> +	select IP_SCTP if GFS2_FS_LOCKING_DLM
>  	select FS_POSIX_ACL
>  	select CRC32
>  	select QUOTACTL
> 
> 



-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 14:45 ` Randy Dunlap
@ 2012-03-19 14:59   ` Steven Whitehouse
  2012-03-19 15:18     ` Randy Dunlap
  0 siblings, 1 reply; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-19 14:59 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: linux-kernel, cluster-devel, teigland

Hi,

On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > Not a huge number of patches this time. Some notable new features
> > though:
> >  - Glock stats gathering (v. useful for performance analysis)
> >  - FITRIM ioctl support
> >  - Sorting the ordered write list (big performance increase when the workload
> >    doesn't result in the write requests being nicely ordered to start with)
> > 
> > Plus a few clean ups, and bug fixes in addition,
> 
> 
> 
> Hi,
> 
> I reported a build error in linux-next 20120313, but it appears
> that mainline also needs the fix (when it's ready) since mainline
> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> 
> https://lkml.org/lkml/2012/3/13/456
> 

Does the following fix the problem? If so then I'll roll that into the
tree before it gets pushed,

Steve.

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index c465ae0..f4e1c60 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,7 +4,7 @@ config GFS2_FS
 	select DLM if GFS2_FS_LOCKING_DLM
 	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
 	select SYSFS if GFS2_FS_LOCKING_DLM
-	select IP_SCTP if DLM_SCTP
+	select IP_SCTP if GFS2_FS_LOCKING_DLM
 	select FS_POSIX_ACL
 	select CRC32
 	select QUOTACTL



^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: GFS2: Pre-pull patch posting (merge window)
  2012-03-19 10:25 Steven Whitehouse
@ 2012-03-19 14:45 ` Randy Dunlap
  2012-03-19 14:59   ` Steven Whitehouse
  0 siblings, 1 reply; 51+ messages in thread
From: Randy Dunlap @ 2012-03-19 14:45 UTC (permalink / raw)
  To: Steven Whitehouse; +Cc: linux-kernel, cluster-devel

On 03/19/2012 03:25 AM, Steven Whitehouse wrote:

> Hi,
> 
> Not a huge number of patches this time. Some notable new features
> though:
>  - Glock stats gathering (v. useful for performance analysis)
>  - FITRIM ioctl support
>  - Sorting the ordered write list (big performance increase when the workload
>    doesn't result in the write requests being nicely ordered to start with)
> 
> Plus a few clean ups, and bug fixes in addition,



Hi,

I reported a build error in linux-next 20120313, but it appears
that mainline also needs the fix (when it's ready) since mainline
gfs2 Kconfig selects DLM_SCTP, which does not exist.

https://lkml.org/lkml/2012/3/13/456

-- 
~Randy

^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2012-03-19 10:25 Steven Whitehouse
  2012-03-19 14:45 ` Randy Dunlap
  0 siblings, 1 reply; 51+ messages in thread
From: Steven Whitehouse @ 2012-03-19 10:25 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Not a huge number of patches this time. Some notable new features
though:
 - Glock stats gathering (v. useful for performance analysis)
 - FITRIM ioctl support
 - Sorting the ordered write list (big performance increase when the workload
   doesn't result in the write requests being nicely ordered to start with)

Plus a few clean ups, and bug fixes in addition,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2012-01-05 11:51 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2012-01-05 11:51 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

The main feature this time is clean up around the allocation and
resource group code. Otherwise the remainder is mostly small
bug fixes.

I've held back the glock stats patch and that will probably be
ready for the following merge window with a bit of luck,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2011-10-24 12:48 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel

Hi,

Since the merge window is upon us, here is the current content of
the GFS2 git tree. A few things will be help back to the following
merge window in order to ensure a greater test time, but those currently
in the tree are ready for the current window.

Recently I've reconstituted the GFS2 git tree, so it can be pulled
(via http) from:

http://sucs.org/~rohan/git/gfs2-3.0-nmw

and viewed via gitweb at:

http://sucs.org/gitweb/

This is thanks to the Swansea University Computer Society for providing
a temporary (or possibly permanent) home for the GFS2 git trees. Please
treat their server kindly as this will only continue while it doesn't
generate too much traffic. I figure that there will not be too many
people pulling the GFS2 tree at once, but we'll see.

Some highlights of the current patch set:
 o Reduction in code of approx 400 lines
 o Big clean up (and speed up) in the resource group code
   - This is a nice base to build some forthcoming improvements on
   - It should improve performance with multi-threaded workloads
 o Some left-over fsync/writeback changes
 o Improvements to readahead when deallocating large directories

Any questions/concerns then please let me know as usual,

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2011-07-22  9:16 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2011-07-22  9:16 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

Not a lot new this time... the addition of a cache for the directory hash table
improve directory read/lookup speed, automatic adjustment of the glock hold
time improves performance for some contention corner cases. S_NOSEC support
is another performance related change, plus a nice clean up from Eric
Sandeen,

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2011-05-19  8:46 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2011-05-19  8:46 UTC (permalink / raw)
  To: linux-kernel, cluster-devel

Hi,

This time, most of the GFS2 patches are code clean up, although there
are a few bug fixes (fallocate/ail writeback/end of life inodes/nlink) and
some new features (new tracepoint & tracing flags, using the UUID field
in the generic superblock).

The changes can be broadly divided into three sets:

1. Bob's directory code clean up
2. My fsync/ail writeback fixes & clean up
3. inode.c/ops_inode.c clean up

Steve.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* GFS2: Pre-pull patch posting (merge window)
@ 2011-03-15  9:11 Steven Whitehouse
  0 siblings, 0 replies; 51+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel, linux-kernel

Hi,

The most interesting "feature" in this patch set is the RCU glock
patch which has been a long time coming, but is finally here. That
patch contains most of the changes this time. The other patches ins
this set are mostly smaller bug fixes and performance improvements.

Steve.



^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2014-10-08  9:53 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-23  8:00 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
2012-07-23  8:00 ` [PATCH 01/16] GFS2: Extend the life of the reservations Steven Whitehouse
2012-07-23  8:00 ` [PATCH 02/16] GFS2: Fold quota data into the reservations struct Steven Whitehouse
2012-07-23  8:00 ` [PATCH 03/16] GFS2: Add "top dir" flag support Steven Whitehouse
2012-07-23  8:00 ` [PATCH 04/16] GFS2: Fix error handling when reading an invalid block from the journal Steven Whitehouse
2012-07-23  8:00 ` [PATCH 05/16] GFS2: Increase buffer size for glocks and glstats debugfs files Steven Whitehouse
2012-07-23  8:00 ` [PATCH 06/16] GFS2: Cache last hash bucket for glock seq_files Steven Whitehouse
2012-07-23  8:00 ` [PATCH 07/16] GFS2: Use lvbs for storing rgrp information with mount option Steven Whitehouse
2012-07-23  8:00 ` [PATCH 08/16] seq_file: Add seq_vprintf function and export it Steven Whitehouse
2012-07-23  8:00 ` [PATCH 09/16] GFS2: Use seq_vprintf for glocks debugfs file Steven Whitehouse
2012-07-23  8:01 ` [PATCH 10/16] GFS2: Size seq_file buffer more carefully Steven Whitehouse
2012-07-23  8:01 ` [PATCH 11/16] GFS2: Add kobject release method Steven Whitehouse
2012-07-23  8:01 ` [PATCH 12/16] GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve Steven Whitehouse
2012-07-23  8:01 ` [PATCH 13/16] GFS2: Fixing double brelse'ing bh allocated in gfs2_meta_read when EIO occurs Steven Whitehouse
2012-07-23  8:01 ` [PATCH 14/16] GFS2: kernel panic with small gfs2 filesystems - 1 RG Steven Whitehouse
2012-07-23  8:01 ` [PATCH 15/16] GFS2: Reduce file fragmentation Steven Whitehouse
2012-07-23  8:01 ` [PATCH 16/16] GFS2: Eliminate 64-bit divides Steven Whitehouse
  -- strict thread matches above, loose matches on Subject: below --
2014-10-08  9:53 GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
2014-06-03 11:02 Steven Whitehouse
2014-04-01  9:15 Steven Whitehouse
2014-01-20 12:23 Steven Whitehouse
2013-11-04 11:09 Steven Whitehouse
2013-09-05  9:02 GFS2 " Steven Whitehouse
2013-07-01  9:33 GFS2: " Steven Whitehouse
2013-04-26  9:18 Steven Whitehouse
2013-02-19 10:07 Steven Whitehouse
2012-11-30  9:52 Steven Whitehouse
2012-09-26  8:25 Steven Whitehouse
2012-05-17 12:23 Steven Whitehouse
2012-03-19 10:25 Steven Whitehouse
2012-03-19 14:45 ` Randy Dunlap
2012-03-19 14:59   ` Steven Whitehouse
2012-03-19 15:18     ` Randy Dunlap
2012-03-19 15:34       ` Steven Whitehouse
2012-03-23 19:41         ` David Teigland
2012-03-23 19:46           ` David Miller
2012-03-23 20:06           ` Randy Dunlap
2012-03-23 20:09             ` Steven Whitehouse
2012-03-23 20:18             ` David Teigland
2012-03-23 22:06               ` Benjamin Poirier
2012-03-23 22:48                 ` Randy Dunlap
2012-03-26 10:44                 ` Steven Whitehouse
2012-04-02 15:35                   ` Randy Dunlap
2012-04-02 15:47                     ` Steven Whitehouse
2012-03-23 22:06               ` Randy Dunlap
2012-03-20  9:47       ` Steven Whitehouse
2012-01-05 11:51 Steven Whitehouse
2011-10-24 12:48 Steven Whitehouse
2011-07-22  9:16 Steven Whitehouse
2011-05-19  8:46 Steven Whitehouse
2011-03-15  9:11 Steven Whitehouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).