All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point
       [not found] <31885223-eaad-4c37-962a-ad1e9aa1f022@zmail12.collab.prod.int.phx2.redhat.com>
@ 2012-04-12 12:43 ` Bob Peterson
  2012-04-12 13:01   ` Steven Whitehouse
  0 siblings, 1 reply; 5+ messages in thread
From: Bob Peterson @ 2012-04-12 12:43 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This patch adds rgrp information to the block allocation trace point.

Regards,

Bob Peterson
Red Hat GFS

Signed-off-by: Bob Peterson <rpeterso@redhat.com> 
--
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Thu Apr 12 08:32:45 2012 -0500

    GFS2: Add rgrp information to block_alloc trace point
    
    This patch adds resource group information to the block allocation
    trace point for GFS2. This makes it easier to debug problems with
    resource groups, such as management of the number of free blocks.

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7a1cf67..146c3d2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1566,7 +1566,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 				  ip->i_inode.i_gid);
 
 	rgd->rd_free_clone -= *nblocks;
-	trace_gfs2_block_alloc(ip, block, *nblocks,
+	trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	*bn = block;
 	return 0;
@@ -1593,7 +1593,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1631,7 +1631,7 @@ void gfs2_unlink_di(struct inode *inode)
 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
 		return;
-	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
+	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 }
@@ -1661,7 +1661,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
-	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd..981b360 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,13 +457,15 @@ TRACE_EVENT(gfs2_bmap,
 /* Keep track of blocks as they are allocated/freed */
 TRACE_EVENT(gfs2_block_alloc,
 
-	TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
-		u8 block_state),
+	TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+		 u64 block, unsigned len, u8 block_state),
 
-	TP_ARGS(ip, block, len, block_state),
+	TP_ARGS(ip, rgd, block, len, block_state),
 
 	TP_STRUCT__entry(
 		__field(        dev_t,  dev                     )
+		__field(        u64,	rd_addr			)
+		__field(        u32,	rd_free_clone		)
 		__field(	u64,	start			)
 		__field(	u64,	inum			)
 		__field(	u32,	len			)
@@ -472,14 +474,18 @@ TRACE_EVENT(gfs2_block_alloc,
 
 	TP_fast_assign(
 		__entry->dev		= ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->rd_addr	= rgd->rd_addr;
+		__entry->rd_free_clone	= rgd->rd_free_clone;
 		__entry->start		= block;
 		__entry->inum		= ip->i_no_addr;
 		__entry->len		= len;
 		__entry->block_state	= block_state;
 	),
 
-	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+	TP_printk("%u,%u rg:%llu rf:%u bmap %llu alloc %llu/%lu %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->rd_addr,
+		  __entry->rd_free_clone,
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long)__entry->len,



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point
  2012-04-12 12:43 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point Bob Peterson
@ 2012-04-12 13:01   ` Steven Whitehouse
  2012-04-12 13:16     ` Bob Peterson
  0 siblings, 1 reply; 5+ messages in thread
From: Steven Whitehouse @ 2012-04-12 13:01 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2012-04-12 at 08:43 -0400, Bob Peterson wrote:
> Hi,
> 
> This patch adds rgrp information to the block allocation trace point.
> 
> Regards,
> 
> Bob Peterson
> Red Hat GFS
> 
> Signed-off-by: Bob Peterson <rpeterso@redhat.com> 
> --
> Author: Bob Peterson <rpeterso@redhat.com>
> Date:   Thu Apr 12 08:32:45 2012 -0500
> 
>     GFS2: Add rgrp information to block_alloc trace point
>     
>     This patch adds resource group information to the block allocation
>     trace point for GFS2. This makes it easier to debug problems with
>     resource groups, such as management of the number of free blocks.
> 
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 7a1cf67..146c3d2 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -1566,7 +1566,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
>  				  ip->i_inode.i_gid);
>  
>  	rgd->rd_free_clone -= *nblocks;
> -	trace_gfs2_block_alloc(ip, block, *nblocks,
> +	trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
>  			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
>  	*bn = block;
>  	return 0;
> @@ -1593,7 +1593,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
>  	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
>  	if (!rgd)
>  		return;
> -	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
> +	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
>  	rgd->rd_free += blen;
>  	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
>  	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
> @@ -1631,7 +1631,7 @@ void gfs2_unlink_di(struct inode *inode)
>  	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
>  	if (!rgd)
>  		return;
> -	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
> +	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
>  	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
>  	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
>  }
> @@ -1661,7 +1661,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
>  void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
>  {
>  	gfs2_free_uninit_di(rgd, ip->i_no_addr);
> -	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
> +	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
>  	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
>  	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
>  }
> diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
> index dfa89cd..981b360 100644
> --- a/fs/gfs2/trace_gfs2.h
> +++ b/fs/gfs2/trace_gfs2.h
> @@ -457,13 +457,15 @@ TRACE_EVENT(gfs2_bmap,
>  /* Keep track of blocks as they are allocated/freed */
>  TRACE_EVENT(gfs2_block_alloc,
>  
> -	TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
> -		u8 block_state),
> +	TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
> +		 u64 block, unsigned len, u8 block_state),
>  
> -	TP_ARGS(ip, block, len, block_state),
> +	TP_ARGS(ip, rgd, block, len, block_state),
>  
>  	TP_STRUCT__entry(
>  		__field(        dev_t,  dev                     )
> +		__field(        u64,	rd_addr			)
> +		__field(        u32,	rd_free_clone		)
>  		__field(	u64,	start			)
>  		__field(	u64,	inum			)
>  		__field(	u32,	len			)
> @@ -472,14 +474,18 @@ TRACE_EVENT(gfs2_block_alloc,
>  
>  	TP_fast_assign(
>  		__entry->dev		= ip->i_gl->gl_sbd->sd_vfs->s_dev;
> +		__entry->rd_addr	= rgd->rd_addr;
> +		__entry->rd_free_clone	= rgd->rd_free_clone;
>  		__entry->start		= block;
>  		__entry->inum		= ip->i_no_addr;
>  		__entry->len		= len;
>  		__entry->block_state	= block_state;
>  	),
>  
> -	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
> +	TP_printk("%u,%u rg:%llu rf:%u bmap %llu alloc %llu/%lu %s",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  (unsigned long long)__entry->rd_addr,
> +		  __entry->rd_free_clone,
>  		  (unsigned long long)__entry->inum,
>  		  (unsigned long long)__entry->start,
>  		  (unsigned long)__entry->len,
> 
All the bmap group tracepoints start with the device number, followed by
the string bmap, the inode number and then the start/length of the
blocks, so I'd rather not change that, without good reason.

If we are going to add the rgrp information here, then it should be done
later in the structure/string. I'm also wondering whether we shouldn't
add some of the other fields as well... rd_free and rd_dinodes spring to
mind as obvious candidates.

Since there is quite a lot of information in each rgrp, it almost
warrants its own tracepoint rather than trying to add it into an
existing one...

So I think that this probably needs some more thought,

Steve.




^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point
  2012-04-12 13:01   ` Steven Whitehouse
@ 2012-04-12 13:16     ` Bob Peterson
  2012-04-12 13:21       ` Steven Whitehouse
  0 siblings, 1 reply; 5+ messages in thread
From: Bob Peterson @ 2012-04-12 13:16 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
| All the bmap group tracepoints start with the device number, followed
| by
| the string bmap, the inode number and then the start/length of the
| blocks, so I'd rather not change that, without good reason.
| 
| If we are going to add the rgrp information here, then it should be
| done
| later in the structure/string. I'm also wondering whether we
| shouldn't
| add some of the other fields as well... rd_free and rd_dinodes spring
| to
| mind as obvious candidates.
| 
| Since there is quite a lot of information in each rgrp, it almost
| warrants its own tracepoint rather than trying to add it into an
| existing one...
| 
| So I think that this probably needs some more thought,
| 
| Steve.

Hi,

I can reformat it to put the rgrp data later in the string.

The main reason I added that particular rgrp information was for
the purposes of debugging the (future) block reservations code
for file defragmentation. For that (future) patch I add a new
trace point for block reservations. Adding the rgrp address and
rd_free_clone to this trace point allow us to see a correlation
between the decisions made by the reservations code and the
actual blocks that are allocated as a result. While I agree that
another trace point may be warranted for rgrp information, I'd
rather keep the rgrp address and rd_free_clone in this trace point
for that reason.

I'll see if I can rearrange the format to be more suitable.

Regards,

Bob Peterson
Red Hat File Systems



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point
  2012-04-12 13:16     ` Bob Peterson
@ 2012-04-12 13:21       ` Steven Whitehouse
  2012-04-12 13:34         ` Bob Peterson
  0 siblings, 1 reply; 5+ messages in thread
From: Steven Whitehouse @ 2012-04-12 13:21 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2012-04-12 at 09:16 -0400, Bob Peterson wrote:
> ----- Original Message -----
> | All the bmap group tracepoints start with the device number, followed
> | by
> | the string bmap, the inode number and then the start/length of the
> | blocks, so I'd rather not change that, without good reason.
> | 
> | If we are going to add the rgrp information here, then it should be
> | done
> | later in the structure/string. I'm also wondering whether we
> | shouldn't
> | add some of the other fields as well... rd_free and rd_dinodes spring
> | to
> | mind as obvious candidates.
> | 
> | Since there is quite a lot of information in each rgrp, it almost
> | warrants its own tracepoint rather than trying to add it into an
> | existing one...
> | 
> | So I think that this probably needs some more thought,
> | 
> | Steve.
> 
> Hi,
> 
> I can reformat it to put the rgrp data later in the string.
> 
> The main reason I added that particular rgrp information was for
> the purposes of debugging the (future) block reservations code
> for file defragmentation. For that (future) patch I add a new
> trace point for block reservations. Adding the rgrp address and
> rd_free_clone to this trace point allow us to see a correlation
> between the decisions made by the reservations code and the
> actual blocks that are allocated as a result. While I agree that
> another trace point may be warranted for rgrp information, I'd
> rather keep the rgrp address and rd_free_clone in this trace point
> for that reason.
> 
> I'll see if I can rearrange the format to be more suitable.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems

I'm still confused though... the reservation is just a start/length
pair, so why do we need the resource group in order to match it up here?

The rgrp information is also available via the rgrp specific entry in
the glocks file too, so we can always get it that way if required,

Steve.




^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point
  2012-04-12 13:21       ` Steven Whitehouse
@ 2012-04-12 13:34         ` Bob Peterson
  0 siblings, 0 replies; 5+ messages in thread
From: Bob Peterson @ 2012-04-12 13:34 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
| Hi,
| 
| On Thu, 2012-04-12 at 09:16 -0400, Bob Peterson wrote:
| > ----- Original Message -----
| > | All the bmap group tracepoints start with the device number,
| > | followed
| > | by
| > | the string bmap, the inode number and then the start/length of
| > | the
| > | blocks, so I'd rather not change that, without good reason.
| > | 
| > | If we are going to add the rgrp information here, then it should
| > | be
| > | done
| > | later in the structure/string. I'm also wondering whether we
| > | shouldn't
| > | add some of the other fields as well... rd_free and rd_dinodes
| > | spring
| > | to
| > | mind as obvious candidates.
| > | 
| > | Since there is quite a lot of information in each rgrp, it almost
| > | warrants its own tracepoint rather than trying to add it into an
| > | existing one...
| > | 
| > | So I think that this probably needs some more thought,
| > | 
| > | Steve.
| > 
| > Hi,
| > 
| > I can reformat it to put the rgrp data later in the string.
| > 
| > The main reason I added that particular rgrp information was for
| > the purposes of debugging the (future) block reservations code
| > for file defragmentation. For that (future) patch I add a new
| > trace point for block reservations. Adding the rgrp address and
| > rd_free_clone to this trace point allow us to see a correlation
| > between the decisions made by the reservations code and the
| > actual blocks that are allocated as a result. While I agree that
| > another trace point may be warranted for rgrp information, I'd
| > rather keep the rgrp address and rd_free_clone in this trace point
| > for that reason.
| > 
| > I'll see if I can rearrange the format to be more suitable.
| > 
| > Regards,
| > 
| > Bob Peterson
| > Red Hat File Systems
| 
| I'm still confused though... the reservation is just a start/length
| pair, so why do we need the resource group in order to match it up
| here?
| 
| The rgrp information is also available via the rgrp specific entry in
| the glocks file too, so we can always get it that way if required,
| 
| Steve.

Hi,

The forthcoming block reservations code is complex, but basically,
some writes are allocating spans of blocks as you say. Others are
not, for technical reasons. Later, the blocks within the reserved
span are claimed. These reserved spans of blocks become "no-fly
zones" for other processes which may also be reserving blocks and/or
allocating blocks _without_ a reservation.

In debugging this new code, I ran into cases where a process was
improperly allocating a block that had been reserved by another process.
I also ran into cases where the block allocation code failed because
of mismanagement of the free space numbers for rgrps. By adding the
rgrp address and free space info to the allocation trace point, I
was able to make a correlation between what was reserved and what
was actually allocated. Without the information, I couldn't tell
when blocks were being allocated (and/or free space number problems)
by processes that didn't hold a reservation.

Regards,

Bob Peterson
Red Hat File Systems



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-04-12 13:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <31885223-eaad-4c37-962a-ad1e9aa1f022@zmail12.collab.prod.int.phx2.redhat.com>
2012-04-12 12:43 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add rgrp information to block_alloc trace point Bob Peterson
2012-04-12 13:01   ` Steven Whitehouse
2012-04-12 13:16     ` Bob Peterson
2012-04-12 13:21       ` Steven Whitehouse
2012-04-12 13:34         ` Bob Peterson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.