All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme
       [not found] <1450086691.17689091.1383753511869.JavaMail.root@redhat.com>
@ 2013-11-06 15:59 ` Bob Peterson
  2013-11-07 11:32   ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: Bob Peterson @ 2013-11-06 15:59 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This is a slightly revised version of a patch I recently sent out:

Before this patch, we used a bit, GBF_FULL, to determine when a bitmap
was full. With the preceding patch, we started accepting block
reservations smaller than the ideal size, which requires a lot more
parsing of the bitmaps. To reduce the amount of bitmap searching, this
patch implements a scheme whereby each rgrp keeps track of the point
at this multi-block reservations will fail.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@redhat.com> 
---
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ba1ea67..48c1a85 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -68,7 +68,6 @@ struct gfs2_log_operations {
 struct gfs2_bitmap {
 	struct buffer_head *bi_bh;
 	char *bi_clone;
-	unsigned long bi_flags;
 	u32 bi_offset;
 	u32 bi_start;
 	u32 bi_len;
@@ -93,6 +92,7 @@ struct gfs2_rgrpd {
 	struct gfs2_rgrp_lvb *rd_rgl;
 	u32 rd_last_alloc;
 	u32 rd_flags;
+	u32 rd_extfail_pt;		/* extent failure point */
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 010b9fb..e615021 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -81,8 +81,8 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
 	memcpy(bi->bi_clone + bi->bi_offset,
 	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
-	clear_bit(GBF_FULL, &bi->bi_flags);
 	rgd->rd_free_clone = rgd->rd_free;
+	rgd->rd_extfail_pt = rgd->rd_free;
 }
 
 /**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ba5eb41..b1b3022 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -636,14 +636,15 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	RB_CLEAR_NODE(&rs->rs_node);
 
 	if (rs->rs_free) {
-		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
-
 		/* return reserved blocks to the rgrp */
 		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
+		/* The rgrp extent failure point is likely not to increase;
+		   it will only do so if the freed blocks are somehow
+		   contiguous with a span of free blocks that follows. Still,
+		   it will force the number to be recalculated later. */
+		rgd->rd_extfail_pt += rs->rs_free;
 		rs->rs_free = 0;
-		clear_bit(GBF_FULL, &bi->bi_flags);
-		smp_mb__after_clear_bit();
 	}
 }
 
@@ -768,7 +769,6 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
 
-		bi->bi_flags = 0;
 		/* small rgrp; bitmap stored completely in header block */
 		if (length == 1) {
 			bytes = bytes_left;
@@ -1127,11 +1127,11 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 	}
 
 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
-		for (x = 0; x < length; x++)
-			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
+		/* max out the rgrp allocation failure point */
+		rgd->rd_extfail_pt = rgd->rd_free;
 	}
 	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
@@ -1591,8 +1591,8 @@ fail:
  * @ap: the allocation parameters
  *
  * Side effects:
- * - If looking for free blocks, we set GBF_FULL on each bitmap which
- *   has no free blocks in it.
+ * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
+ *   has come up short on a free block search.
  *
  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
  */
@@ -1603,7 +1603,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 {
 	struct buffer_head *bh;
 	int initial_bii;
-	u32 initial_offset;
+	int first_bii = rbm->bii;
+	u32 first_offset = rbm->offset;
 	u32 offset;
 	u8 *buffer;
 	int n = 0;
@@ -1621,19 +1622,14 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 
 	while(1) {
 		bi = rbm_bi(rbm);
-		if (test_bit(GBF_FULL, &bi->bi_flags) &&
-		    (state == GFS2_BLKST_FREE))
-			goto next_bitmap;
-
 		bh = bi->bi_bh;
 		buffer = bh->b_data + bi->bi_offset;
 		WARN_ON(!buffer_uptodate(bh));
 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
 			buffer = bi->bi_clone + bi->bi_offset;
-		initial_offset = rbm->offset;
 		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
 		if (offset == BFITNOENT)
-			goto bitmap_full;
+			goto next_bitmap;
 		rbm->offset = offset;
 		if (ip == NULL)
 			return 0;
@@ -1656,12 +1652,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 		}
 		return ret;
 
-bitmap_full:	/* Mark bitmap as full and fall through */
-		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
-			struct gfs2_bitmap *bi = rbm_bi(rbm);
-			set_bit(GBF_FULL, &bi->bi_flags);
-		}
-
 next_bitmap:	/* Find next bitmap in the rgrp */
 		rbm->offset = 0;
 		rbm->bii++;
@@ -1679,6 +1669,13 @@ next_iter:
 	if (minext == NULL || state != GFS2_BLKST_FREE)
 		return -ENOSPC;
 
+	/* If the extent was too small, and it's smaller than the smallest
+	   to have failed before, remember for future reference that it's
+	   useless to search this rgrp again for this amount or more. */
+	if ((first_offset == 0) && (first_bii == 0) &&
+	    (*minext < rbm->rgd->rd_extfail_pt))
+		rbm->rgd->rd_extfail_pt = *minext;
+
 	/* If the maximum extent we found is big enough to fulfill the
 	   minimum requirements, use it anyway. */
 	if (maxext.len) {
@@ -1924,7 +1921,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
 		}
 
 		/* Skip unuseable resource groups */
-		if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
+						 GFS2_RDF_ERROR)) ||
+		    (ap && (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)))
 			goto skip_rgrp;
 
 		if (sdp->sd_args.ar_rgrplvb)
@@ -2106,10 +2105,10 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 
 	if (rgd == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-		       rgd->rd_reserved);
+		       rgd->rd_reserved, rgd->rd_extfail_pt);
 	spin_lock(&rgd->rd_rsspin);
 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
@@ -2228,9 +2227,9 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
 	if (error) {
-		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
+		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, fail_pt=%d\n",
 			(unsigned long long)ip->i_no_addr, error, *nblocks,
-			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
+			rbm.rgd->rd_extfail_pt);
 		goto rgrp_error;
 	}
 



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme
  2013-11-06 15:59 ` [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme Bob Peterson
@ 2013-11-07 11:32   ` Steven Whitehouse
  2013-11-11 14:01     ` Bob Peterson
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Whitehouse @ 2013-11-07 11:32 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Wed, 2013-11-06 at 10:59 -0500, Bob Peterson wrote:
> Hi,
> 
> This is a slightly revised version of a patch I recently sent out:
> 
> Before this patch, we used a bit, GBF_FULL, to determine when a bitmap
> was full. With the preceding patch, we started accepting block
> reservations smaller than the ideal size, which requires a lot more
> parsing of the bitmaps. To reduce the amount of bitmap searching, this
> patch implements a scheme whereby each rgrp keeps track of the point
> at this multi-block reservations will fail.
> 
The first two patches look good. However this one I'm still not so keen
on. I'd much prefer if we can avoid dropping the GBF_FULL support at
bitmap level if the new test is at rgrp level. It doesn't look like that
is too tricky to achieve either - all we need to do is to not remove the
GBF_FULL support since there doesn't appear to be any conflict between
the two mechanisms.

The net result is a diffstat that looks like this:

[root at chywoon linux-2.6]# diffstat -p1 ./patch3.diff 
 fs/gfs2/incore.h |    1 +
 fs/gfs2/lops.c   |    1 +
 fs/gfs2/rgrp.c   |   32 ++++++++++++++++++++++++++------
 3 files changed, 28 insertions(+), 6 deletions(-)

as opposed to your patch which looks like:

[root at chywoon linux-2.6]# diffstat -p1 ../bob-a-3.mbox 
 fs/gfs2/incore.h |    2 +-
 fs/gfs2/lops.c   |    2 +-
 fs/gfs2/rgrp.c   |   53 ++++++++++++++++++++++++++---------------------------
 3 files changed, 28 insertions(+), 29 deletions(-)

So there would be fewer changes overall. I've given this a quick spin,
and it hasn't broken yet, so let me know what you think,

Steve.


diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ba1ea67..0132816 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -93,6 +93,7 @@ struct gfs2_rgrpd {
 	struct gfs2_rgrp_lvb *rd_rgl;
 	u32 rd_last_alloc;
 	u32 rd_flags;
+	u32 rd_extfail_pt;		/* extent failure point */
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 010b9fb..cdadb92 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -83,6 +83,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
 	clear_bit(GBF_FULL, &bi->bi_flags);
 	rgd->rd_free_clone = rgd->rd_free;
+	rgd->rd_extfail_pt = rgd->rd_free;
 }
 
 /**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ba5eb41..af4c8cf 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -641,9 +641,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 		/* return reserved blocks to the rgrp */
 		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
+		/* The rgrp extent failure point is likely not to increase;
+		   it will only do so if the freed blocks are somehow
+		   contiguous with a span of free blocks that follows. Still,
+		   it will force the number to be recalculated later. */
+		rgd->rd_extfail_pt += rs->rs_free;
 		rs->rs_free = 0;
 		clear_bit(GBF_FULL, &bi->bi_flags);
-		smp_mb__after_clear_bit();
 	}
 }
 
@@ -1132,6 +1136,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
+		/* max out the rgrp allocation failure point */
+		rgd->rd_extfail_pt = rgd->rd_free;
 	}
 	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
@@ -1593,6 +1599,8 @@ fail:
  * Side effects:
  * - If looking for free blocks, we set GBF_FULL on each bitmap which
  *   has no free blocks in it.
+ * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
+ *   has come up short on a free block search.
  *
  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
  */
@@ -1604,6 +1612,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 	struct buffer_head *bh;
 	int initial_bii;
 	u32 initial_offset;
+	int first_bii = rbm->bii;
+	u32 first_offset = rbm->offset;
 	u32 offset;
 	u8 *buffer;
 	int n = 0;
@@ -1679,6 +1689,13 @@ next_iter:
 	if (minext == NULL || state != GFS2_BLKST_FREE)
 		return -ENOSPC;
 
+	/* If the extent was too small, and it's smaller than the smallest
+	   to have failed before, remember for future reference that it's
+	   useless to search this rgrp again for this amount or more. */
+	if ((first_offset == 0) && (first_bii == 0) &&
+	    (*minext < rbm->rgd->rd_extfail_pt))
+		rbm->rgd->rd_extfail_pt = *minext;
+
 	/* If the maximum extent we found is big enough to fulfill the
 	   minimum requirements, use it anyway. */
 	if (maxext.len) {
@@ -1924,7 +1941,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
 		}
 
 		/* Skip unuseable resource groups */
-		if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
+						 GFS2_RDF_ERROR)) ||
+		    (ap && (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)))
 			goto skip_rgrp;
 
 		if (sdp->sd_args.ar_rgrplvb)
@@ -2106,10 +2125,10 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 
 	if (rgd == NULL)
 		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-		       rgd->rd_reserved);
+		       rgd->rd_reserved, rgd->rd_extfail_pt);
 	spin_lock(&rgd->rd_rsspin);
 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
@@ -2228,9 +2247,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
 	if (error) {
-		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
+		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
 			(unsigned long long)ip->i_no_addr, error, *nblocks,
-			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
+			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
+			rbm.rgd->rd_extfail_pt);
 		goto rgrp_error;
 	}
 




^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme
  2013-11-07 11:32   ` Steven Whitehouse
@ 2013-11-11 14:01     ` Bob Peterson
  2013-11-11 14:52       ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: Bob Peterson @ 2013-11-11 14:01 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
| Hi,
| 
| On Wed, 2013-11-06 at 10:59 -0500, Bob Peterson wrote:
| > Hi,
| > 
| > This is a slightly revised version of a patch I recently sent out:
| > 
| > Before this patch, we used a bit, GBF_FULL, to determine when a bitmap
| > was full. With the preceding patch, we started accepting block
| > reservations smaller than the ideal size, which requires a lot more
| > parsing of the bitmaps. To reduce the amount of bitmap searching, this
| > patch implements a scheme whereby each rgrp keeps track of the point
| > at this multi-block reservations will fail.
| > 
| The first two patches look good. However this one I'm still not so keen
| on. I'd much prefer if we can avoid dropping the GBF_FULL support at
| bitmap level if the new test is at rgrp level. It doesn't look like that
| is too tricky to achieve either - all we need to do is to not remove the
| GBF_FULL support since there doesn't appear to be any conflict between
| the two mechanisms.
| 
| The net result is a diffstat that looks like this:
| 
| [root at chywoon linux-2.6]# diffstat -p1 ./patch3.diff
|  fs/gfs2/incore.h |    1 +
|  fs/gfs2/lops.c   |    1 +
|  fs/gfs2/rgrp.c   |   32 ++++++++++++++++++++++++++------
|  3 files changed, 28 insertions(+), 6 deletions(-)
| 
| as opposed to your patch which looks like:
| 
| [root at chywoon linux-2.6]# diffstat -p1 ../bob-a-3.mbox
|  fs/gfs2/incore.h |    2 +-
|  fs/gfs2/lops.c   |    2 +-
|  fs/gfs2/rgrp.c   |   53
|  ++++++++++++++++++++++++++---------------------------
|  3 files changed, 28 insertions(+), 29 deletions(-)
| 
| So there would be fewer changes overall. I've given this a quick spin,
| and it hasn't broken yet, so let me know what you think,
| 
| Steve.
Hi Steve,

I've done a fair amount of testing on your new patch that does not remove
the GBF_FULL flag, and it seems to work fine. Go ahead and use that one
instead of mine.

IOW: ACK

Regards,

Bob Peterson
Red Hat File Systems



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme
  2013-11-11 14:01     ` Bob Peterson
@ 2013-11-11 14:52       ` Steven Whitehouse
  0 siblings, 0 replies; 4+ messages in thread
From: Steven Whitehouse @ 2013-11-11 14:52 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2013-11-11 at 09:01 -0500, Bob Peterson wrote:
> Hi Steve,
> 
> I've done a fair amount of testing on your new patch that does not remove
> the GBF_FULL flag, and it seems to work fine. Go ahead and use that one
> instead of mine.
> 
> IOW: ACK
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems

Ok, that sounds good. I'll add these patches to my tree shortly. Linus
has pulled the tree over the weekend, so as soon as -rc1 has arrived,
I'll sort out a new tree,

Steve.




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-11-11 14:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1450086691.17689091.1383753511869.JavaMail.root@redhat.com>
2013-11-06 15:59 ` [Cluster-devel] [GFS2 PATCH] [TRY #2] GFS2: Implement a "bitmap has no extents longer than X" scheme Bob Peterson
2013-11-07 11:32   ` Steven Whitehouse
2013-11-11 14:01     ` Bob Peterson
2013-11-11 14:52       ` Steven Whitehouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.