From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andreas Gruenbacher Date: Wed, 14 Oct 2020 11:58:32 +0200 Subject: [Cluster-devel] [RFC 16/17] gfs2: Limit the maximum amount of reserved space In-Reply-To: <20201014095833.1035870-1-agruenba@redhat.com> References: <20201014095833.1035870-1-agruenba@redhat.com> Message-ID: <20201014095833.1035870-17-agruenba@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit When allocating blocks for an inode, gfs2 tries to keep future allocations efficient by "reserving" the blocks adjacent to the allocated blocks for future use by this inode. These "reservations" are node-local, and they can be stolen by other nodes or even other processes on the same node when necessary. Stealing from those reservations is very inefficient because it involves scanning the bitmaps, repeatedly. This slows down workloads that create many small files, for example. Fix this by discarding some of those semi-reservations as soon as they cover more than half of the remaining space. Fixes xfstests generic/488, generic/531. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 1 + fs/gfs2/rgrp.c | 30 ++++++++++++++++++++++++++++++ fs/gfs2/trace_gfs2.h | 13 ++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 295c22441ade..4f68456da677 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -104,6 +104,7 @@ struct gfs2_rgrpd { u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_reserved; /* number of blocks reserved */ + u32 rd_wanted; u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 751bd31cfa5d..1ed09e45738f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -646,6 +646,7 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { + rgd->rd_wanted -= rs->rs_free; /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, @@ -1519,6 +1520,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + rgd->rd_wanted += rs->rs_free; spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); } @@ -1997,6 +1999,26 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) return 0; } +static bool gfs2_trim_wanted_blocks(struct gfs2_rgrpd *rgd) +{ + u32 max_wanted = (rgd->rd_free_clone - rgd->rd_reserved) / 2; + struct gfs2_blkreserv *rs, *next; + bool trimmed = false; + + if (rgd->rd_wanted <= max_wanted) + goto out; + rbtree_postorder_for_each_entry_safe(rs, next, &rgd->rd_rstree, rs_node) { + if (rs->rs_reserved) + continue; + __rs_deltree(rs); + if (rgd->rd_wanted <= max_wanted) + break; + } + +out: + return trimmed; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -2046,6 +2068,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) while (loops < 3) { struct gfs2_rgrpd *rgd; + bool retry = false; rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); if (rg_locked) { @@ -2110,7 +2133,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) if (rs->rs_reserved > blocks_available) rs->rs_reserved = blocks_available; rgd->rd_reserved += rs->rs_reserved; + if (!gfs2_rs_active(rs)) { + if (gfs2_trim_wanted_blocks(rgd)) + retry = true; + } spin_unlock(&rgd->rd_rsspin); + if (retry) + rg_mblk_search(rs->rs_rgd, ip, ap); rgrp_unlock_local(rs->rs_rgd); return 0; check_rgrp: @@ -2330,6 +2359,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, rs->rs_start += len; rlen = min(rs->rs_free, len); rs->rs_free -= rlen; + rgd->rd_wanted -= rlen; trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && rs->rs_free) diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 282fcb1a242f..4fd1614274fd 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -560,6 +560,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_reserved ) + __field( u32, rd_wanted ) ), TP_fast_assign( @@ -571,16 +572,19 @@ TRACE_EVENT(gfs2_block_alloc, __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; __entry->rd_reserved = rgd->rd_reserved; + __entry->rd_wanted = rgd->rd_wanted; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu rw:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone, (unsigned long)__entry->rd_reserved) + __entry->rd_free_clone, + (unsigned long)__entry->rd_reserved, + (unsigned long)__entry->rd_wanted) ); /* Keep track of multi-block reservations as they are allocated/freed */ @@ -595,6 +599,7 @@ TRACE_EVENT(gfs2_rs, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_reserved ) + __field( u32, rd_wanted ) __field( u64, inum ) __field( u64, start ) __field( u32, free ) @@ -607,6 +612,7 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rgd->rd_addr; __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; __entry->rd_reserved = rs->rs_rgd->rd_reserved; + __entry->rd_wanted = rs->rs_rgd->rd_wanted; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; __entry->start = rs->rs_start; @@ -615,13 +621,14 @@ TRACE_EVENT(gfs2_rs, __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu r:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu rw:%lu %s f:%lu r:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long long)__entry->rd_addr, (unsigned long)__entry->rd_free_clone, (unsigned long)__entry->rd_reserved, + (unsigned long)__entry->rd_wanted, rs_func_name(__entry->func), (unsigned long)__entry->free, (unsigned long)__entry->reserved) -- 2.26.2