[Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
@ 2016-06-17  9:28 Junxiao Bi
  2016-06-17  9:43 ` Joseph Qi
  2016-07-07  2:05 ` xuejiufei
  0 siblings, 2 replies; 12+ messages in thread
From: Junxiao Bi @ 2016-06-17  9:28 UTC (permalink / raw)
  To: ocfs2-devel

Journal replay will be run when do recovery for a dead node,
to avoid the stale cache impact, all blocks of dead node's
journal inode were reload from disk. This hurts the performance,
check whether one block is cached before reload it can improve
a lot performance. In my test env, the time doing recovery was
improved from 120s to 1s.

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
---
 fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e607419cdfa4..bc0e21e8a674 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
 	int status = 0;
 	int i;
 	u64 v_blkno, p_blkno, p_blocks, num_blocks;
-#define CONCURRENT_JOURNAL_FILL 32ULL
-	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
-
-	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
+	struct buffer_head *bh = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 	v_blkno = 0;
@@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
 			goto bail;
 		}
 
-		if (p_blocks > CONCURRENT_JOURNAL_FILL)
-			p_blocks = CONCURRENT_JOURNAL_FILL;
+		for (i = 0; i < p_blocks; i++) {
+			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+					osb->sb->s_blocksize);
+			/* block not cached. */
+			if (!bh) {
+				p_blkno++;
+				continue;
+			}
 
-		/* We are reading journal data which should not
-		 * be put in the uptodate cache */
-		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
-						p_blkno, p_blocks, bhs);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
+			brelse(bh);
+			bh = NULL;
+			/* We are reading journal data which should not
+			 * be put in the uptodate cache.
+			 */
+			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail;
+			}
 
-		for(i = 0; i < p_blocks; i++) {
-			brelse(bhs[i]);
-			bhs[i] = NULL;
+			brelse(bh);
+			bh = NULL;
 		}
 
 		v_blkno += p_blocks;
 	}
 
 bail:
-	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
-		brelse(bhs[i]);
 	return status;
 }
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-17  9:28 [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance Junxiao Bi
@ 2016-06-17  9:43 ` Joseph Qi
  2016-06-20  3:10   ` Gang He
  2016-06-23  1:17   ` Junxiao Bi
  2016-07-07  2:05 ` xuejiufei
  1 sibling, 2 replies; 12+ messages in thread
From: Joseph Qi @ 2016-06-17  9:43 UTC (permalink / raw)
  To: ocfs2-devel

On 2016/6/17 17:28, Junxiao Bi wrote:
> Journal replay will be run when do recovery for a dead node,
> to avoid the stale cache impact, all blocks of dead node's
> journal inode were reload from disk. This hurts the performance,
> check whether one block is cached before reload it can improve
> a lot performance. In my test env, the time doing recovery was
> improved from 120s to 1s.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Looks good to me. And it indeed has performance improvement from my
test.
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>

> ---
>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index e607419cdfa4..bc0e21e8a674 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  	int status = 0;
>  	int i;
>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
> -#define CONCURRENT_JOURNAL_FILL 32ULL
> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
> -
> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
> +	struct buffer_head *bh = NULL;
> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>  
>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>  	v_blkno = 0;
> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  			goto bail;
>  		}
>  
> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
> -			p_blocks = CONCURRENT_JOURNAL_FILL;
> +		for (i = 0; i < p_blocks; i++) {
> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
> +					osb->sb->s_blocksize);
> +			/* block not cached. */
> +			if (!bh) {
> +				p_blkno++;
> +				continue;
> +			}
>  
> -		/* We are reading journal data which should not
> -		 * be put in the uptodate cache */
> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
> -						p_blkno, p_blocks, bhs);
> -		if (status < 0) {
> -			mlog_errno(status);
> -			goto bail;
> -		}
> +			brelse(bh);
> +			bh = NULL;
> +			/* We are reading journal data which should not
> +			 * be put in the uptodate cache.
> +			 */
> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
> +			if (status < 0) {
> +				mlog_errno(status);
> +				goto bail;
> +			}
>  
> -		for(i = 0; i < p_blocks; i++) {
> -			brelse(bhs[i]);
> -			bhs[i] = NULL;
> +			brelse(bh);
> +			bh = NULL;
>  		}
>  
>  		v_blkno += p_blocks;
>  	}
>  
>  bail:
> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
> -		brelse(bhs[i]);
>  	return status;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-17  9:43 ` Joseph Qi
@ 2016-06-20  3:10   ` Gang He
  2016-06-21  9:03     ` Junxiao Bi
  2016-06-23  1:17   ` Junxiao Bi
  1 sibling, 1 reply; 12+ messages in thread
From: Gang He @ 2016-06-20  3:10 UTC (permalink / raw)
  To: ocfs2-devel

Hello Junxiao,

I think this change will bring a performance improvement, but from the function comments
/*
 * JBD Might read a cached version of another nodes journal file. We
 * don't want this as this file changes often and we get no
 * notification on those changes. The only way to be sure that we've
 * got the most up to date version of those blocks then is to force
 * read them off disk. Just searching through the buffer cache won't
 * work as there may be pages backing this file which are still marked
 * up to date. We know things can't change on this file underneath us
 * as we have the lock by now :)
 */
static int ocfs2_force_read_journal(struct inode *inode)

Did we consider this potential risk behind this patch? I am not familiar with this part code, 
I want to know if there is any sync mechanism to make sure the block cache for another node journal file is really the latest data?  



Thanks
Gang 


>>> 
> On 2016/6/17 17:28, Junxiao Bi wrote:
>> Journal replay will be run when do recovery for a dead node,
>> to avoid the stale cache impact, all blocks of dead node's
>> journal inode were reload from disk. This hurts the performance,
>> check whether one block is cached before reload it can improve
>> a lot performance. In my test env, the time doing recovery was
>> improved from 120s to 1s.
>> 
>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> Looks good to me. And it indeed has performance improvement from my
> test.
> Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
> 
>> ---
>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>  1 file changed, 22 insertions(+), 19 deletions(-)
>> 
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index e607419cdfa4..bc0e21e8a674 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode 
> *inode)
>>  	int status = 0;
>>  	int i;
>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>> -
>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>> +	struct buffer_head *bh = NULL;
>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>  
>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>  	v_blkno = 0;
>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode 
> *inode)
>>  			goto bail;
>>  		}
>>  
>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>> +		for (i = 0; i < p_blocks; i++) {
>> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
>> +					osb->sb->s_blocksize);
>> +			/* block not cached. */
>> +			if (!bh) {
>> +				p_blkno++;
>> +				continue;
>> +			}
>>  
>> -		/* We are reading journal data which should not
>> -		 * be put in the uptodate cache */
>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>> -						p_blkno, p_blocks, bhs);
>> -		if (status < 0) {
>> -			mlog_errno(status);
>> -			goto bail;
>> -		}
>> +			brelse(bh);
>> +			bh = NULL;
>> +			/* We are reading journal data which should not
>> +			 * be put in the uptodate cache.
>> +			 */
>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
>> +			if (status < 0) {
>> +				mlog_errno(status);
>> +				goto bail;
>> +			}
>>  
>> -		for(i = 0; i < p_blocks; i++) {
>> -			brelse(bhs[i]);
>> -			bhs[i] = NULL;
>> +			brelse(bh);
>> +			bh = NULL;
>>  		}
>>  
>>  		v_blkno += p_blocks;
>>  	}
>>  
>>  bail:
>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>> -		brelse(bhs[i]);
>>  	return status;
>>  }
>>  
>> 
> 
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com 
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-20  3:10   ` Gang He
@ 2016-06-21  9:03     ` Junxiao Bi
  0 siblings, 0 replies; 12+ messages in thread
From: Junxiao Bi @ 2016-06-21  9:03 UTC (permalink / raw)
  To: ocfs2-devel

Hi Gang,

On 06/20/2016 11:10 AM, Gang He wrote:
> Hello Junxiao,
> 
> I think this change will bring a performance improvement, but from the function comments
> /*
>  * JBD Might read a cached version of another nodes journal file. We
>  * don't want this as this file changes often and we get no
>  * notification on those changes. The only way to be sure that we've
>  * got the most up to date version of those blocks then is to force
>  * read them off disk. Just searching through the buffer cache won't
>  * work as there may be pages backing this file which are still marked
>  * up to date. We know things can't change on this file underneath us
>  * as we have the lock by now :)
>  */
> static int ocfs2_force_read_journal(struct inode *inode)
> 
> Did we consider this potential risk behind this patch? I am not familiar with this part code, 
> I want to know if there is any sync mechanism to make sure the block cache for another node journal file is really the latest data?  
I don't see that is needed, because those stale info will not be used
except journal replay.

Thanks,
Junxiao.
> 
> 
> 
> Thanks
> Gang 
> 
> 
>>>>
>> On 2016/6/17 17:28, Junxiao Bi wrote:
>>> Journal replay will be run when do recovery for a dead node,
>>> to avoid the stale cache impact, all blocks of dead node's
>>> journal inode were reload from disk. This hurts the performance,
>>> check whether one block is cached before reload it can improve
>>> a lot performance. In my test env, the time doing recovery was
>>> improved from 120s to 1s.
>>>
>>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>> Looks good to me. And it indeed has performance improvement from my
>> test.
>> Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
>>
>>> ---
>>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>>> index e607419cdfa4..bc0e21e8a674 100644
>>> --- a/fs/ocfs2/journal.c
>>> +++ b/fs/ocfs2/journal.c
>>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode 
>> *inode)
>>>  	int status = 0;
>>>  	int i;
>>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>>> -
>>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>>> +	struct buffer_head *bh = NULL;
>>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>  
>>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>>  	v_blkno = 0;
>>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode 
>> *inode)
>>>  			goto bail;
>>>  		}
>>>  
>>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>>> +		for (i = 0; i < p_blocks; i++) {
>>> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
>>> +					osb->sb->s_blocksize);
>>> +			/* block not cached. */
>>> +			if (!bh) {
>>> +				p_blkno++;
>>> +				continue;
>>> +			}
>>>  
>>> -		/* We are reading journal data which should not
>>> -		 * be put in the uptodate cache */
>>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>>> -						p_blkno, p_blocks, bhs);
>>> -		if (status < 0) {
>>> -			mlog_errno(status);
>>> -			goto bail;
>>> -		}
>>> +			brelse(bh);
>>> +			bh = NULL;
>>> +			/* We are reading journal data which should not
>>> +			 * be put in the uptodate cache.
>>> +			 */
>>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
>>> +			if (status < 0) {
>>> +				mlog_errno(status);
>>> +				goto bail;
>>> +			}
>>>  
>>> -		for(i = 0; i < p_blocks; i++) {
>>> -			brelse(bhs[i]);
>>> -			bhs[i] = NULL;
>>> +			brelse(bh);
>>> +			bh = NULL;
>>>  		}
>>>  
>>>  		v_blkno += p_blocks;
>>>  	}
>>>  
>>>  bail:
>>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>>> -		brelse(bhs[i]);
>>>  	return status;
>>>  }
>>>  
>>>
>>
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel at oss.oracle.com 
>> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-17  9:43 ` Joseph Qi
  2016-06-20  3:10   ` Gang He
@ 2016-06-23  1:17   ` Junxiao Bi
  2016-06-23 22:13     ` Andrew Morton
  1 sibling, 1 reply; 12+ messages in thread
From: Junxiao Bi @ 2016-06-23  1:17 UTC (permalink / raw)
  To: ocfs2-devel

Hi Andrew,

Did you miss this patch to your tree?

Thanks,
Junxiao.

On 06/17/2016 05:43 PM, Joseph Qi wrote:
> On 2016/6/17 17:28, Junxiao Bi wrote:
>> Journal replay will be run when do recovery for a dead node,
>> to avoid the stale cache impact, all blocks of dead node's
>> journal inode were reload from disk. This hurts the performance,
>> check whether one block is cached before reload it can improve
>> a lot performance. In my test env, the time doing recovery was
>> improved from 120s to 1s.
>>
>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> Looks good to me. And it indeed has performance improvement from my
> test.
> Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
> 
>> ---
>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index e607419cdfa4..bc0e21e8a674 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  	int status = 0;
>>  	int i;
>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>> -
>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>> +	struct buffer_head *bh = NULL;
>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>  
>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>  	v_blkno = 0;
>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  			goto bail;
>>  		}
>>  
>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>> +		for (i = 0; i < p_blocks; i++) {
>> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
>> +					osb->sb->s_blocksize);
>> +			/* block not cached. */
>> +			if (!bh) {
>> +				p_blkno++;
>> +				continue;
>> +			}
>>  
>> -		/* We are reading journal data which should not
>> -		 * be put in the uptodate cache */
>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>> -						p_blkno, p_blocks, bhs);
>> -		if (status < 0) {
>> -			mlog_errno(status);
>> -			goto bail;
>> -		}
>> +			brelse(bh);
>> +			bh = NULL;
>> +			/* We are reading journal data which should not
>> +			 * be put in the uptodate cache.
>> +			 */
>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
>> +			if (status < 0) {
>> +				mlog_errno(status);
>> +				goto bail;
>> +			}
>>  
>> -		for(i = 0; i < p_blocks; i++) {
>> -			brelse(bhs[i]);
>> -			bhs[i] = NULL;
>> +			brelse(bh);
>> +			bh = NULL;
>>  		}
>>  
>>  		v_blkno += p_blocks;
>>  	}
>>  
>>  bail:
>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>> -		brelse(bhs[i]);
>>  	return status;
>>  }
>>  
>>
> 
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-23  1:17   ` Junxiao Bi
@ 2016-06-23 22:13     ` Andrew Morton
  2016-06-24  0:46       ` Junxiao Bi
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2016-06-23 22:13 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, 23 Jun 2016 09:17:53 +0800 Junxiao Bi <junxiao.bi@oracle.com> wrote:

> Hi Andrew,
> 
> Did you miss this patch to your tree?

I would have seen it eventually.  Explicitly cc'ing me on patches
helps, please.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-23 22:13     ` Andrew Morton
@ 2016-06-24  0:46       ` Junxiao Bi
  0 siblings, 0 replies; 12+ messages in thread
From: Junxiao Bi @ 2016-06-24  0:46 UTC (permalink / raw)
  To: ocfs2-devel

On 06/24/2016 06:13 AM, Andrew Morton wrote:
> On Thu, 23 Jun 2016 09:17:53 +0800 Junxiao Bi <junxiao.bi@oracle.com> wrote:
> 
>> Hi Andrew,
>>
>> Did you miss this patch to your tree?
> 
> I would have seen it eventually.  Explicitly cc'ing me on patches
> helps, please.
I see, will cc you next time.

Thanks,
Junxiao.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-06-17  9:28 [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance Junxiao Bi
  2016-06-17  9:43 ` Joseph Qi
@ 2016-07-07  2:05 ` xuejiufei
  2016-07-07  2:16   ` Junxiao Bi
  1 sibling, 1 reply; 12+ messages in thread
From: xuejiufei @ 2016-07-07  2:05 UTC (permalink / raw)
  To: ocfs2-devel

Hi Junxiao,
p_blkno is not increased after force reading from disk, so
this block is read many times from disk while other blocks
remain in cached are not reloaded.

Thanks,
Jiufei

On 2016/6/17 17:28, Junxiao Bi wrote:
> Journal replay will be run when do recovery for a dead node,
> to avoid the stale cache impact, all blocks of dead node's
> journal inode were reload from disk. This hurts the performance,
> check whether one block is cached before reload it can improve
> a lot performance. In my test env, the time doing recovery was
> improved from 120s to 1s.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> ---
>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index e607419cdfa4..bc0e21e8a674 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  	int status = 0;
>  	int i;
>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
> -#define CONCURRENT_JOURNAL_FILL 32ULL
> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
> -
> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
> +	struct buffer_head *bh = NULL;
> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>  
>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>  	v_blkno = 0;
> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  			goto bail;
>  		}
>  
> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
> -			p_blocks = CONCURRENT_JOURNAL_FILL;
> +		for (i = 0; i < p_blocks; i++) {
> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
> +					osb->sb->s_blocksize);
> +			/* block not cached. */
> +			if (!bh) {
> +				p_blkno++;
> +				continue;
> +			}
>  
> -		/* We are reading journal data which should not
> -		 * be put in the uptodate cache */
> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
> -						p_blkno, p_blocks, bhs);
> -		if (status < 0) {
> -			mlog_errno(status);
> -			goto bail;
> -		}
> +			brelse(bh);
> +			bh = NULL;
> +			/* We are reading journal data which should not
> +			 * be put in the uptodate cache.
> +			 */
> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
> +			if (status < 0) {
> +				mlog_errno(status);
> +				goto bail;
> +			}
>  
> -		for(i = 0; i < p_blocks; i++) {
> -			brelse(bhs[i]);
> -			bhs[i] = NULL;
> +			brelse(bh);
> +			bh = NULL;
>  		}
>  
>  		v_blkno += p_blocks;
>  	}
>  
>  bail:
> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
> -		brelse(bhs[i]);
>  	return status;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-07-07  2:05 ` xuejiufei
@ 2016-07-07  2:16   ` Junxiao Bi
  0 siblings, 0 replies; 12+ messages in thread
From: Junxiao Bi @ 2016-07-07  2:16 UTC (permalink / raw)
  To: ocfs2-devel

On 07/07/2016 10:05 AM, xuejiufei wrote:
> Hi Junxiao,
> p_blkno is not increased after force reading from disk, so
> this block is read many times from disk while other blocks
> remain in cached are not reloaded.
Good catch. Will send a v2 version.

Thanks,
Junxiao.
> 
> Thanks,
> Jiufei
> 
> On 2016/6/17 17:28, Junxiao Bi wrote:
>> Journal replay will be run when do recovery for a dead node,
>> to avoid the stale cache impact, all blocks of dead node's
>> journal inode were reload from disk. This hurts the performance,
>> check whether one block is cached before reload it can improve
>> a lot performance. In my test env, the time doing recovery was
>> improved from 120s to 1s.
>>
>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>> ---
>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index e607419cdfa4..bc0e21e8a674 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  	int status = 0;
>>  	int i;
>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>> -
>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>> +	struct buffer_head *bh = NULL;
>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>  
>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>  	v_blkno = 0;
>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  			goto bail;
>>  		}
>>  
>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>> +		for (i = 0; i < p_blocks; i++) {
>> +			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
>> +					osb->sb->s_blocksize);
>> +			/* block not cached. */
>> +			if (!bh) {
>> +				p_blkno++;
>> +				continue;
>> +			}
>>  
>> -		/* We are reading journal data which should not
>> -		 * be put in the uptodate cache */
>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>> -						p_blkno, p_blocks, bhs);
>> -		if (status < 0) {
>> -			mlog_errno(status);
>> -			goto bail;
>> -		}
>> +			brelse(bh);
>> +			bh = NULL;
>> +			/* We are reading journal data which should not
>> +			 * be put in the uptodate cache.
>> +			 */
>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
>> +			if (status < 0) {
>> +				mlog_errno(status);
>> +				goto bail;
>> +			}
>>  
>> -		for(i = 0; i < p_blocks; i++) {
>> -			brelse(bhs[i]);
>> -			bhs[i] = NULL;
>> +			brelse(bh);
>> +			bh = NULL;
>>  		}
>>  
>>  		v_blkno += p_blocks;
>>  	}
>>  
>>  bail:
>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>> -		brelse(bhs[i]);
>>  	return status;
>>  }
>>  
>>
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-07-08 21:23 ` Andrew Morton
@ 2016-07-11  2:12   ` Junxiao Bi
  0 siblings, 0 replies; 12+ messages in thread
From: Junxiao Bi @ 2016-07-11  2:12 UTC (permalink / raw)
  To: ocfs2-devel

On 07/09/2016 05:23 AM, Andrew Morton wrote:
> On Thu,  7 Jul 2016 10:24:48 +0800 Junxiao Bi <junxiao.bi@oracle.com> wrote:
> 
>> Journal replay will be run when do recovery for a dead node,
>> to avoid the stale cache impact, all blocks of dead node's
>> journal inode were reload from disk. This hurts the performance,
>> check whether one block is cached before reload it can improve
>> a lot performance. In my test env, the time doing recovery was
>> improved from 120s to 1s.
> 
> So since v1 you did this (unchangelogged bugfix!):
> 
> --- a/fs/ocfs2/journal.c~ocfs2-improve-recovery-performance-v2
> +++ a/fs/ocfs2/journal.c
> @@ -1194,6 +1194,7 @@ static int ocfs2_force_read_journal(stru
>  
>  			brelse(bh);
>  			bh = NULL;
> +			p_blkno++;
>  		}
>  
>  		v_blkno += p_blocks;
> 
> 
> I suppose this is a bit neater?
Yes, looks good. Thank you.

Thanks,
Junxiao.
> 
> --- a/fs/ocfs2/journal.c~ocfs2-improve-recovery-performance-v2-fix
> +++ a/fs/ocfs2/journal.c
> @@ -1172,14 +1172,12 @@ static int ocfs2_force_read_journal(stru
>  			goto bail;
>  		}
>  
> -		for (i = 0; i < p_blocks; i++) {
> +		for (i = 0; i < p_blocks; i++, p_blkno++) {
>  			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
>  					osb->sb->s_blocksize);
>  			/* block not cached. */
> -			if (!bh) {
> -				p_blkno++;
> +			if (!bh)
>  				continue;
> -			}
>  
>  			brelse(bh);
>  			bh = NULL;
> @@ -1194,7 +1192,6 @@ static int ocfs2_force_read_journal(stru
>  
>  			brelse(bh);
>  			bh = NULL;
> -			p_blkno++;
>  		}
>  
>  		v_blkno += p_blocks;
> _
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
  2016-07-07  2:24 Junxiao Bi
@ 2016-07-08 21:23 ` Andrew Morton
  2016-07-11  2:12   ` Junxiao Bi
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2016-07-08 21:23 UTC (permalink / raw)
  To: ocfs2-devel

On Thu,  7 Jul 2016 10:24:48 +0800 Junxiao Bi <junxiao.bi@oracle.com> wrote:

> Journal replay will be run when do recovery for a dead node,
> to avoid the stale cache impact, all blocks of dead node's
> journal inode were reload from disk. This hurts the performance,
> check whether one block is cached before reload it can improve
> a lot performance. In my test env, the time doing recovery was
> improved from 120s to 1s.

So since v1 you did this (unchangelogged bugfix!):

--- a/fs/ocfs2/journal.c~ocfs2-improve-recovery-performance-v2
+++ a/fs/ocfs2/journal.c
@@ -1194,6 +1194,7 @@ static int ocfs2_force_read_journal(stru
 
 			brelse(bh);
 			bh = NULL;
+			p_blkno++;
 		}
 
 		v_blkno += p_blocks;


I suppose this is a bit neater?

--- a/fs/ocfs2/journal.c~ocfs2-improve-recovery-performance-v2-fix
+++ a/fs/ocfs2/journal.c
@@ -1172,14 +1172,12 @@ static int ocfs2_force_read_journal(stru
 			goto bail;
 		}
 
-		for (i = 0; i < p_blocks; i++) {
+		for (i = 0; i < p_blocks; i++, p_blkno++) {
 			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
 					osb->sb->s_blocksize);
 			/* block not cached. */
-			if (!bh) {
-				p_blkno++;
+			if (!bh)
 				continue;
-			}
 
 			brelse(bh);
 			bh = NULL;
@@ -1194,7 +1192,6 @@ static int ocfs2_force_read_journal(stru
 
 			brelse(bh);
 			bh = NULL;
-			p_blkno++;
 		}
 
 		v_blkno += p_blocks;
_

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance
@ 2016-07-07  2:24 Junxiao Bi
  2016-07-08 21:23 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Junxiao Bi @ 2016-07-07  2:24 UTC (permalink / raw)
  To: ocfs2-devel

Journal replay will be run when do recovery for a dead node,
to avoid the stale cache impact, all blocks of dead node's
journal inode were reload from disk. This hurts the performance,
check whether one block is cached before reload it can improve
a lot performance. In my test env, the time doing recovery was
improved from 120s to 1s.

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
---
 fs/ocfs2/journal.c |   42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e607419cdfa4..67179cf60525 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
 	int status = 0;
 	int i;
 	u64 v_blkno, p_blkno, p_blocks, num_blocks;
-#define CONCURRENT_JOURNAL_FILL 32ULL
-	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
-
-	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
+	struct buffer_head *bh = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 	v_blkno = 0;
@@ -1174,29 +1172,35 @@ static int ocfs2_force_read_journal(struct inode *inode)
 			goto bail;
 		}
 
-		if (p_blocks > CONCURRENT_JOURNAL_FILL)
-			p_blocks = CONCURRENT_JOURNAL_FILL;
+		for (i = 0; i < p_blocks; i++) {
+			bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+					osb->sb->s_blocksize);
+			/* block not cached. */
+			if (!bh) {
+				p_blkno++;
+				continue;
+			}
 
-		/* We are reading journal data which should not
-		 * be put in the uptodate cache */
-		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
-						p_blkno, p_blocks, bhs);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
+			brelse(bh);
+			bh = NULL;
+			/* We are reading journal data which should not
+			 * be put in the uptodate cache.
+			 */
+			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail;
+			}
 
-		for(i = 0; i < p_blocks; i++) {
-			brelse(bhs[i]);
-			bhs[i] = NULL;
+			brelse(bh);
+			bh = NULL;
+			p_blkno++;
 		}
 
 		v_blkno += p_blocks;
 	}
 
 bail:
-	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
-		brelse(bhs[i]);
 	return status;
 }
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-07-11  2:12 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-17  9:28 [Ocfs2-devel] [PATCH v2] ocfs2: improve recovery performance Junxiao Bi
2016-06-17  9:43 ` Joseph Qi
2016-06-20  3:10   ` Gang He
2016-06-21  9:03     ` Junxiao Bi
2016-06-23  1:17   ` Junxiao Bi
2016-06-23 22:13     ` Andrew Morton
2016-06-24  0:46       ` Junxiao Bi
2016-07-07  2:05 ` xuejiufei
2016-07-07  2:16   ` Junxiao Bi
2016-07-07  2:24 Junxiao Bi
2016-07-08 21:23 ` Andrew Morton
2016-07-11  2:12   ` Junxiao Bi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.