All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance
@ 2016-06-17  6:10 Junxiao Bi
  2016-06-17  7:44 ` Joseph Qi
  0 siblings, 1 reply; 5+ messages in thread
From: Junxiao Bi @ 2016-06-17  6:10 UTC (permalink / raw)
  To: ocfs2-devel

Journal replay will be run when do recovery for a dead node,
to avoid the stale cache impact, all blocks of dead node's
journal inode were reload from disk. This hurts the performance,
check whether one block is cached before reload it can improve
a lot performance. In my test env, the time doing recovery was
improved from 120s to 1s.

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
---
 fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e607419cdfa4..8b808afd5f82 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
 	int status = 0;
 	int i;
 	u64 v_blkno, p_blkno, p_blocks, num_blocks;
-#define CONCURRENT_JOURNAL_FILL 32ULL
-	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
-
-	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
+	struct buffer_head *bhs[1] = {NULL};
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 	v_blkno = 0;
@@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
 			goto bail;
 		}
 
-		if (p_blocks > CONCURRENT_JOURNAL_FILL)
-			p_blocks = CONCURRENT_JOURNAL_FILL;
+		for (i = 0; i < p_blocks; i++) {
+			bhs[0] = __find_get_block(osb->sb->s_bdev, p_blkno,
+					osb->sb->s_blocksize);
+			/* block not cached. */
+			if (!bhs[0]) {
+				p_blkno++;
+				continue;
+			}
 
-		/* We are reading journal data which should not
-		 * be put in the uptodate cache */
-		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
-						p_blkno, p_blocks, bhs);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
+			brelse(bhs[0]);
+			bhs[0] = NULL;
+			/* We are reading journal data which should not
+			 * be put in the uptodate cache.
+			 */
+			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, bhs);
+			if (status < 0) {
+				mlog_errno(status);
+				goto bail;
+			}
 
-		for(i = 0; i < p_blocks; i++) {
-			brelse(bhs[i]);
-			bhs[i] = NULL;
+			brelse(bhs[0]);
+			bhs[0] = NULL;
 		}
 
 		v_blkno += p_blocks;
 	}
 
 bail:
-	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
-		brelse(bhs[i]);
 	return status;
 }
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance
  2016-06-17  6:10 [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance Junxiao Bi
@ 2016-06-17  7:44 ` Joseph Qi
  2016-06-17  7:50   ` Junxiao Bi
  0 siblings, 1 reply; 5+ messages in thread
From: Joseph Qi @ 2016-06-17  7:44 UTC (permalink / raw)
  To: ocfs2-devel

Hi Junxiao,

On 2016/6/17 14:10, Junxiao Bi wrote:
> Journal replay will be run when do recovery for a dead node,
> to avoid the stale cache impact, all blocks of dead node's
> journal inode were reload from disk. This hurts the performance,
> check whether one block is cached before reload it can improve
> a lot performance. In my test env, the time doing recovery was
> improved from 120s to 1s.
> 
> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
> ---
>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index e607419cdfa4..8b808afd5f82 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  	int status = 0;
>  	int i;
>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
> -#define CONCURRENT_JOURNAL_FILL 32ULL
> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
> -
> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
> +	struct buffer_head *bhs[1] = {NULL};
Since now we do not need batch load, how about make the logic like:

	struct buffer_head *bh = NULL;
	...
	ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);

Thanks,
Joseph

> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>  
>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>  	v_blkno = 0;
> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>  			goto bail;
>  		}
>  
> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
> -			p_blocks = CONCURRENT_JOURNAL_FILL;
> +		for (i = 0; i < p_blocks; i++) {
> +			bhs[0] = __find_get_block(osb->sb->s_bdev, p_blkno,
> +					osb->sb->s_blocksize);
> +			/* block not cached. */
> +			if (!bhs[0]) {
> +				p_blkno++;
> +				continue;
> +			}
>  
> -		/* We are reading journal data which should not
> -		 * be put in the uptodate cache */
> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
> -						p_blkno, p_blocks, bhs);
> -		if (status < 0) {
> -			mlog_errno(status);
> -			goto bail;
> -		}
> +			brelse(bhs[0]);
> +			bhs[0] = NULL;
> +			/* We are reading journal data which should not
> +			 * be put in the uptodate cache.
> +			 */
> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, bhs);
> +			if (status < 0) {
> +				mlog_errno(status);
> +				goto bail;
> +			}
>  
> -		for(i = 0; i < p_blocks; i++) {
> -			brelse(bhs[i]);
> -			bhs[i] = NULL;
> +			brelse(bhs[0]);
> +			bhs[0] = NULL;
>  		}
>  
>  		v_blkno += p_blocks;
>  	}
>  
>  bail:
> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
> -		brelse(bhs[i]);
>  	return status;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance
  2016-06-17  7:44 ` Joseph Qi
@ 2016-06-17  7:50   ` Junxiao Bi
  2016-06-17  8:32     ` Joseph Qi
  0 siblings, 1 reply; 5+ messages in thread
From: Junxiao Bi @ 2016-06-17  7:50 UTC (permalink / raw)
  To: ocfs2-devel

Hi Joseph,

On 06/17/2016 03:44 PM, Joseph Qi wrote:
> Hi Junxiao,
> 
> On 2016/6/17 14:10, Junxiao Bi wrote:
>> Journal replay will be run when do recovery for a dead node,
>> to avoid the stale cache impact, all blocks of dead node's
>> journal inode were reload from disk. This hurts the performance,
>> check whether one block is cached before reload it can improve
>> a lot performance. In my test env, the time doing recovery was
>> improved from 120s to 1s.
>>
>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>> ---
>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index e607419cdfa4..8b808afd5f82 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  	int status = 0;
>>  	int i;
>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>> -
>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>> +	struct buffer_head *bhs[1] = {NULL};
> Since now we do not need batch load, how about make the logic like:
> 
> 	struct buffer_head *bh = NULL;
> 	...
> 	ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
This array is used because ocfs2_read_blocks_sync() needs it as last
parameter.

Thanks,
Junxiao.
> 
> Thanks,
> Joseph
> 
>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>  
>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>  	v_blkno = 0;
>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>  			goto bail;
>>  		}
>>  
>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>> +		for (i = 0; i < p_blocks; i++) {
>> +			bhs[0] = __find_get_block(osb->sb->s_bdev, p_blkno,
>> +					osb->sb->s_blocksize);
>> +			/* block not cached. */
>> +			if (!bhs[0]) {
>> +				p_blkno++;
>> +				continue;
>> +			}
>>  
>> -		/* We are reading journal data which should not
>> -		 * be put in the uptodate cache */
>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>> -						p_blkno, p_blocks, bhs);
>> -		if (status < 0) {
>> -			mlog_errno(status);
>> -			goto bail;
>> -		}
>> +			brelse(bhs[0]);
>> +			bhs[0] = NULL;
>> +			/* We are reading journal data which should not
>> +			 * be put in the uptodate cache.
>> +			 */
>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, bhs);
>> +			if (status < 0) {
>> +				mlog_errno(status);
>> +				goto bail;
>> +			}
>>  
>> -		for(i = 0; i < p_blocks; i++) {
>> -			brelse(bhs[i]);
>> -			bhs[i] = NULL;
>> +			brelse(bhs[0]);
>> +			bhs[0] = NULL;
>>  		}
>>  
>>  		v_blkno += p_blocks;
>>  	}
>>  
>>  bail:
>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>> -		brelse(bhs[i]);
>>  	return status;
>>  }
>>  
>>
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance
  2016-06-17  7:50   ` Junxiao Bi
@ 2016-06-17  8:32     ` Joseph Qi
  2016-06-17  8:56       ` Junxiao Bi
  0 siblings, 1 reply; 5+ messages in thread
From: Joseph Qi @ 2016-06-17  8:32 UTC (permalink / raw)
  To: ocfs2-devel

On 2016/6/17 15:50, Junxiao Bi wrote:
> Hi Joseph,
> 
> On 06/17/2016 03:44 PM, Joseph Qi wrote:
>> Hi Junxiao,
>>
>> On 2016/6/17 14:10, Junxiao Bi wrote:
>>> Journal replay will be run when do recovery for a dead node,
>>> to avoid the stale cache impact, all blocks of dead node's
>>> journal inode were reload from disk. This hurts the performance,
>>> check whether one block is cached before reload it can improve
>>> a lot performance. In my test env, the time doing recovery was
>>> improved from 120s to 1s.
>>>
>>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>>> ---
>>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>>> index e607419cdfa4..8b808afd5f82 100644
>>> --- a/fs/ocfs2/journal.c
>>> +++ b/fs/ocfs2/journal.c
>>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>>  	int status = 0;
>>>  	int i;
>>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>>> -
>>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>>> +	struct buffer_head *bhs[1] = {NULL};
>> Since now we do not need batch load, how about make the logic like:
>>
>> 	struct buffer_head *bh = NULL;
>> 	...
>> 	ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
> This array is used because ocfs2_read_blocks_sync() needs it as last
> parameter.
IC, so we pass &bh like ocfs2_read_locked_inode.

Thanks,
Joseph

> 
> Thanks,
> Junxiao.
>>
>> Thanks,
>> Joseph
>>
>>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>  
>>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>>  	v_blkno = 0;
>>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>>  			goto bail;
>>>  		}
>>>  
>>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>>> +		for (i = 0; i < p_blocks; i++) {
>>> +			bhs[0] = __find_get_block(osb->sb->s_bdev, p_blkno,
>>> +					osb->sb->s_blocksize);
>>> +			/* block not cached. */
>>> +			if (!bhs[0]) {
>>> +				p_blkno++;
>>> +				continue;
>>> +			}
>>>  
>>> -		/* We are reading journal data which should not
>>> -		 * be put in the uptodate cache */
>>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>>> -						p_blkno, p_blocks, bhs);
>>> -		if (status < 0) {
>>> -			mlog_errno(status);
>>> -			goto bail;
>>> -		}
>>> +			brelse(bhs[0]);
>>> +			bhs[0] = NULL;
>>> +			/* We are reading journal data which should not
>>> +			 * be put in the uptodate cache.
>>> +			 */
>>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, bhs);
>>> +			if (status < 0) {
>>> +				mlog_errno(status);
>>> +				goto bail;
>>> +			}
>>>  
>>> -		for(i = 0; i < p_blocks; i++) {
>>> -			brelse(bhs[i]);
>>> -			bhs[i] = NULL;
>>> +			brelse(bhs[0]);
>>> +			bhs[0] = NULL;
>>>  		}
>>>  
>>>  		v_blkno += p_blocks;
>>>  	}
>>>  
>>>  bail:
>>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>>> -		brelse(bhs[i]);
>>>  	return status;
>>>  }
>>>  
>>>
>>
>>
> 
> 
> .
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance
  2016-06-17  8:32     ` Joseph Qi
@ 2016-06-17  8:56       ` Junxiao Bi
  0 siblings, 0 replies; 5+ messages in thread
From: Junxiao Bi @ 2016-06-17  8:56 UTC (permalink / raw)
  To: ocfs2-devel

On 06/17/2016 04:32 PM, Joseph Qi wrote:
> On 2016/6/17 15:50, Junxiao Bi wrote:
>> Hi Joseph,
>>
>> On 06/17/2016 03:44 PM, Joseph Qi wrote:
>>> Hi Junxiao,
>>>
>>> On 2016/6/17 14:10, Junxiao Bi wrote:
>>>> Journal replay will be run when do recovery for a dead node,
>>>> to avoid the stale cache impact, all blocks of dead node's
>>>> journal inode were reload from disk. This hurts the performance,
>>>> check whether one block is cached before reload it can improve
>>>> a lot performance. In my test env, the time doing recovery was
>>>> improved from 120s to 1s.
>>>>
>>>> Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
>>>> ---
>>>>  fs/ocfs2/journal.c |   41 ++++++++++++++++++++++-------------------
>>>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>>>> index e607419cdfa4..8b808afd5f82 100644
>>>> --- a/fs/ocfs2/journal.c
>>>> +++ b/fs/ocfs2/journal.c
>>>> @@ -1159,10 +1159,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>>>  	int status = 0;
>>>>  	int i;
>>>>  	u64 v_blkno, p_blkno, p_blocks, num_blocks;
>>>> -#define CONCURRENT_JOURNAL_FILL 32ULL
>>>> -	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
>>>> -
>>>> -	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
>>>> +	struct buffer_head *bhs[1] = {NULL};
>>> Since now we do not need batch load, how about make the logic like:
>>>
>>> 	struct buffer_head *bh = NULL;
>>> 	...
>>> 	ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
>> This array is used because ocfs2_read_blocks_sync() needs it as last
>> parameter.
> IC, so we pass &bh like ocfs2_read_locked_inode.
Right, will submit v2.

Thanks,
Junxiao.
> 
> Thanks,
> Joseph
> 
>>
>> Thanks,
>> Junxiao.
>>>
>>> Thanks,
>>> Joseph
>>>
>>>> +	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>>  
>>>>  	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
>>>>  	v_blkno = 0;
>>>> @@ -1174,29 +1172,34 @@ static int ocfs2_force_read_journal(struct inode *inode)
>>>>  			goto bail;
>>>>  		}
>>>>  
>>>> -		if (p_blocks > CONCURRENT_JOURNAL_FILL)
>>>> -			p_blocks = CONCURRENT_JOURNAL_FILL;
>>>> +		for (i = 0; i < p_blocks; i++) {
>>>> +			bhs[0] = __find_get_block(osb->sb->s_bdev, p_blkno,
>>>> +					osb->sb->s_blocksize);
>>>> +			/* block not cached. */
>>>> +			if (!bhs[0]) {
>>>> +				p_blkno++;
>>>> +				continue;
>>>> +			}
>>>>  
>>>> -		/* We are reading journal data which should not
>>>> -		 * be put in the uptodate cache */
>>>> -		status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
>>>> -						p_blkno, p_blocks, bhs);
>>>> -		if (status < 0) {
>>>> -			mlog_errno(status);
>>>> -			goto bail;
>>>> -		}
>>>> +			brelse(bhs[0]);
>>>> +			bhs[0] = NULL;
>>>> +			/* We are reading journal data which should not
>>>> +			 * be put in the uptodate cache.
>>>> +			 */
>>>> +			status = ocfs2_read_blocks_sync(osb, p_blkno, 1, bhs);
>>>> +			if (status < 0) {
>>>> +				mlog_errno(status);
>>>> +				goto bail;
>>>> +			}
>>>>  
>>>> -		for(i = 0; i < p_blocks; i++) {
>>>> -			brelse(bhs[i]);
>>>> -			bhs[i] = NULL;
>>>> +			brelse(bhs[0]);
>>>> +			bhs[0] = NULL;
>>>>  		}
>>>>  
>>>>  		v_blkno += p_blocks;
>>>>  	}
>>>>  
>>>>  bail:
>>>> -	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
>>>> -		brelse(bhs[i]);
>>>>  	return status;
>>>>  }
>>>>  
>>>>
>>>
>>>
>>
>>
>> .
>>
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-06-17  8:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-17  6:10 [Ocfs2-devel] [PATCH] ocfs2: improve recovery performance Junxiao Bi
2016-06-17  7:44 ` Joseph Qi
2016-06-17  7:50   ` Junxiao Bi
2016-06-17  8:32     ` Joseph Qi
2016-06-17  8:56       ` Junxiao Bi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.