All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
@ 2014-06-24 10:20 Gu Zheng
  2014-06-25  2:30 ` [f2fs-dev] " Chao Yu
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Gu Zheng @ 2014-06-24 10:20 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: f2fs, fsdevel, 이창만

Use find_next_bit_le rather than test_bit_le to improve search speed
lightly.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
---
 fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
 1 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3edd561..ba510fb 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 			const char *name, size_t namelen, int *max_slots,
 			f2fs_hash_t namehash, struct page **res_page)
 {
-	struct f2fs_dir_entry *de;
-	unsigned long bit_pos = 0;
+	unsigned long bit_pos = 0, bit_start = 0;
 	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
 	const void *dentry_bits = &dentry_blk->dentry_bitmap;
-	int max_len = 0;
 
-	while (bit_pos < NR_DENTRY_IN_BLOCK) {
-		if (!test_bit_le(bit_pos, dentry_bits)) {
-			if (bit_pos == 0)
-				max_len = 1;
-			else if (!test_bit_le(bit_pos - 1, dentry_bits))
-				max_len++;
-			bit_pos++;
-			continue;
+	while (bit_start < NR_DENTRY_IN_BLOCK) {
+		struct f2fs_dir_entry *de;
+		int max_len = 0;
+
+		bit_pos = find_next_bit_le(dentry_bits,
+					NR_DENTRY_IN_BLOCK, bit_start);
+
+		max_len = bit_pos - bit_start;
+		if (max_len > *max_slots) {
+			*max_slots = max_len;
+			max_len = 0;
 		}
+
+		if (bit_pos >= NR_DENTRY_IN_BLOCK)
+			break;
+
 		de = &dentry_blk->dentry[bit_pos];
 		if (early_match_name(name, namelen, namehash, de)) {
 			if (!memcmp(dentry_blk->filename[bit_pos],
 							name, namelen)) {
 				*res_page = dentry_page;
-				goto found;
+				return de;
 			}
 		}
-		if (max_len > *max_slots) {
-			*max_slots = max_len;
-			max_len = 0;
-		}
-		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+
+		bit_start = bit_pos
+				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 	}
 
-	de = NULL;
 	kunmap(dentry_page);
-found:
-	if (max_len > *max_slots)
-		*max_slots = max_len;
-	return de;
+	return NULL;
 }
 
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* RE: [f2fs-dev] [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-06-24 10:20 [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Gu Zheng
@ 2014-06-25  2:30 ` Chao Yu
  2014-06-25  2:53   ` Gu Zheng
  2014-06-27  9:58 ` [PATCH V2 3/4] f2fs: use find_next_bit_le rather than test_bit_le in find_in_block Gu Zheng
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2014-06-25  2:30 UTC (permalink / raw)
  To: 'Gu Zheng', 'Jaegeuk Kim'
  Cc: 'fsdevel', 'f2fs'

Hi Gu,

Just one nitpick.

> -----Original Message-----
> From: Gu Zheng [mailto:guz.fnst@cn.fujitsu.com]
> Sent: Tuesday, June 24, 2014 6:21 PM
> To: Jaegeuk Kim
> Cc: fsdevel; f2fs
> Subject: [f2fs-dev] [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in,
> find_in_block
> 
> Use find_next_bit_le rather than test_bit_le to improve search speed
> lightly.
> 
> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> ---
>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>  1 files changed, 21 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 3edd561..ba510fb 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>  			const char *name, size_t namelen, int *max_slots,
>  			f2fs_hash_t namehash, struct page **res_page)
>  {
> -	struct f2fs_dir_entry *de;
> -	unsigned long bit_pos = 0;
> +	unsigned long bit_pos = 0, bit_start = 0;
>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
> -	int max_len = 0;
> 
> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
> -		if (!test_bit_le(bit_pos, dentry_bits)) {
> -			if (bit_pos == 0)
> -				max_len = 1;
> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
> -				max_len++;
> -			bit_pos++;
> -			continue;
> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
> +		struct f2fs_dir_entry *de;
> +		int max_len = 0;
> +
> +		bit_pos = find_next_bit_le(dentry_bits,
> +					NR_DENTRY_IN_BLOCK, bit_start);
> +
> +		max_len = bit_pos - bit_start;
> +		if (max_len > *max_slots) {
> +			*max_slots = max_len;
> +			max_len = 0;

It could be removed here, and also do not need to initialize above.

Thanks.
Yu

>  		}
> +
> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
> +			break;
> +
>  		de = &dentry_blk->dentry[bit_pos];
>  		if (early_match_name(name, namelen, namehash, de)) {
>  			if (!memcmp(dentry_blk->filename[bit_pos],
>  							name, namelen)) {
>  				*res_page = dentry_page;
> -				goto found;
> +				return de;
>  			}
>  		}
> -		if (max_len > *max_slots) {
> -			*max_slots = max_len;
> -			max_len = 0;
> -		}
> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
> +
> +		bit_start = bit_pos
> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>  	}
> 
> -	de = NULL;
>  	kunmap(dentry_page);
> -found:
> -	if (max_len > *max_slots)
> -		*max_slots = max_len;
> -	return de;
> +	return NULL;
>  }
> 
>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> --
> 1.7.7
> 
> 
> ------------------------------------------------------------------------------
> Open source business process management suite built on Java and Eclipse
> Turn processes into business applications with Bonita BPM Community Edition
> Quickly connect people, data, and systems into organized workflows
> Winner of BOSSIE, CODIE, OW2 and Gartner awards
> http://p.sf.net/sfu/Bonitasoft
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [f2fs-dev] [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-06-25  2:30 ` [f2fs-dev] " Chao Yu
@ 2014-06-25  2:53   ` Gu Zheng
  0 siblings, 0 replies; 16+ messages in thread
From: Gu Zheng @ 2014-06-25  2:53 UTC (permalink / raw)
  To: Chao Yu; +Cc: 'Jaegeuk Kim', 'fsdevel', 'f2fs'

Hi Yu,
On 06/25/2014 10:30 AM, Chao Yu wrote:

> Hi Gu,
> 
> Just one nitpick.
> 
>> -----Original Message-----
>> From: Gu Zheng [mailto:guz.fnst@cn.fujitsu.com]
>> Sent: Tuesday, June 24, 2014 6:21 PM
>> To: Jaegeuk Kim
>> Cc: fsdevel; f2fs
>> Subject: [f2fs-dev] [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in,
>> find_in_block
>>
>> Use find_next_bit_le rather than test_bit_le to improve search speed
>> lightly.
>>
>> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
>> ---
>>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>>  1 files changed, 21 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>> index 3edd561..ba510fb 100644
>> --- a/fs/f2fs/dir.c
>> +++ b/fs/f2fs/dir.c
>> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>>  			const char *name, size_t namelen, int *max_slots,
>>  			f2fs_hash_t namehash, struct page **res_page)
>>  {
>> -	struct f2fs_dir_entry *de;
>> -	unsigned long bit_pos = 0;
>> +	unsigned long bit_pos = 0, bit_start = 0;
>>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
>> -	int max_len = 0;
>>
>> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
>> -		if (!test_bit_le(bit_pos, dentry_bits)) {
>> -			if (bit_pos == 0)
>> -				max_len = 1;
>> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
>> -				max_len++;
>> -			bit_pos++;
>> -			continue;
>> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
>> +		struct f2fs_dir_entry *de;
>> +		int max_len = 0;
>> +
>> +		bit_pos = find_next_bit_le(dentry_bits,
>> +					NR_DENTRY_IN_BLOCK, bit_start);
>> +
>> +		max_len = bit_pos - bit_start;
>> +		if (max_len > *max_slots) {
>> +			*max_slots = max_len;
>> +			max_len = 0;
> 
> It could be removed here, and also do not need to initialize above.

Nice catch, thanks.;)

Regards,
Gu

> 
> Thanks.
> Yu
> 
>>  		}
>> +
>> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
>> +			break;
>> +
>>  		de = &dentry_blk->dentry[bit_pos];
>>  		if (early_match_name(name, namelen, namehash, de)) {
>>  			if (!memcmp(dentry_blk->filename[bit_pos],
>>  							name, namelen)) {
>>  				*res_page = dentry_page;
>> -				goto found;
>> +				return de;
>>  			}
>>  		}
>> -		if (max_len > *max_slots) {
>> -			*max_slots = max_len;
>> -			max_len = 0;
>> -		}
>> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>> +
>> +		bit_start = bit_pos
>> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>>  	}
>>
>> -	de = NULL;
>>  	kunmap(dentry_page);
>> -found:
>> -	if (max_len > *max_slots)
>> -		*max_slots = max_len;
>> -	return de;
>> +	return NULL;
>>  }
>>
>>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>> --
>> 1.7.7
>>
>>
>> ------------------------------------------------------------------------------
>> Open source business process management suite built on Java and Eclipse
>> Turn processes into business applications with Bonita BPM Community Edition
>> Quickly connect people, data, and systems into organized workflows
>> Winner of BOSSIE, CODIE, OW2 and Gartner awards
>> http://p.sf.net/sfu/Bonitasoft
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH V2 3/4] f2fs: use find_next_bit_le rather than test_bit_le in find_in_block
  2014-06-24 10:20 [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Gu Zheng
  2014-06-25  2:30 ` [f2fs-dev] " Chao Yu
@ 2014-06-27  9:58 ` Gu Zheng
  2014-07-02  7:49 ` [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Changman Lee
  2014-07-02 10:30 ` Jaegeuk Kim
  3 siblings, 0 replies; 16+ messages in thread
From: Gu Zheng @ 2014-06-27  9:58 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: f2fs, fsdevel, 이창만, 俞超

Use find_next_bit_le rather than test_bit_le to improve search speed
lightly.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>

---
v2: cleanup the needless code suggested by Yu Chao
---
---
 fs/f2fs/dir.c |   43 ++++++++++++++++++++-----------------------
 1 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bcbfbc4..39bea6c 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -93,42 +93,39 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 			const char *name, size_t namelen, int *max_slots,
 			f2fs_hash_t namehash, struct page **res_page)
 {
-	struct f2fs_dir_entry *de;
-	unsigned long bit_pos = 0;
+	unsigned long bit_pos = 0, bit_start = 0;
 	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
 	const void *dentry_bits = &dentry_blk->dentry_bitmap;
-	int max_len = 0;
 
-	while (bit_pos < NR_DENTRY_IN_BLOCK) {
-		if (!test_bit_le(bit_pos, dentry_bits)) {
-			if (bit_pos == 0)
-				max_len = 1;
-			else if (!test_bit_le(bit_pos - 1, dentry_bits))
-				max_len++;
-			bit_pos++;
-			continue;
-		}
+	while (bit_start < NR_DENTRY_IN_BLOCK) {
+		struct f2fs_dir_entry *de;
+		int max_len;
+
+		bit_pos = find_next_bit_le(dentry_bits,
+					NR_DENTRY_IN_BLOCK, bit_start);
+
+		max_len = bit_pos - bit_start;
+		if (max_len > *max_slots)
+			*max_slots = max_len;
+
+		if (bit_pos >= NR_DENTRY_IN_BLOCK)
+			break;
+
 		de = &dentry_blk->dentry[bit_pos];
 		if (early_match_name(name, namelen, namehash, de)) {
 			if (!memcmp(dentry_blk->filename[bit_pos],
 							name, namelen)) {
 				*res_page = dentry_page;
-				goto found;
+				return de;
 			}
 		}
-		if (max_len > *max_slots) {
-			*max_slots = max_len;
-			max_len = 0;
-		}
-		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+
+		bit_start = bit_pos
+				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 	}
 
-	de = NULL;
 	kunmap(dentry_page);
-found:
-	if (max_len > *max_slots)
-		*max_slots = max_len;
-	return de;
+	return NULL;
 }
 
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
-- 
1.7.7



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-06-24 10:20 [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Gu Zheng
  2014-06-25  2:30 ` [f2fs-dev] " Chao Yu
  2014-06-27  9:58 ` [PATCH V2 3/4] f2fs: use find_next_bit_le rather than test_bit_le in find_in_block Gu Zheng
@ 2014-07-02  7:49 ` Changman Lee
  2014-07-03  1:05   ` Gu Zheng
  2014-07-02 10:30 ` Jaegeuk Kim
  3 siblings, 1 reply; 16+ messages in thread
From: Changman Lee @ 2014-07-02  7:49 UTC (permalink / raw)
  To: Gu Zheng; +Cc: Jaegeuk Kim, f2fs, fsdevel

Hi, Gu

Unfortunately, find_next_bit isn't always better than test_bit.
Refer to commit 5d0c667121bfc8be76d1580f485bddbe73465d1a

I remember that
Perviously, Jaegeuk had changed find_next_bit to test_bit because
find_next_bit spent much cpu time in the case of there is lot of dentries like a postmark.
Sorry, I should have reported this quickly.

On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
> Use find_next_bit_le rather than test_bit_le to improve search speed
> lightly.
> 
> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> ---
>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>  1 files changed, 21 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 3edd561..ba510fb 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>  			const char *name, size_t namelen, int *max_slots,
>  			f2fs_hash_t namehash, struct page **res_page)
>  {
> -	struct f2fs_dir_entry *de;
> -	unsigned long bit_pos = 0;
> +	unsigned long bit_pos = 0, bit_start = 0;
>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
> -	int max_len = 0;
>  
> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
> -		if (!test_bit_le(bit_pos, dentry_bits)) {
> -			if (bit_pos == 0)
> -				max_len = 1;
> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
> -				max_len++;
> -			bit_pos++;
> -			continue;
> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
> +		struct f2fs_dir_entry *de;
> +		int max_len = 0;
> +
> +		bit_pos = find_next_bit_le(dentry_bits,
> +					NR_DENTRY_IN_BLOCK, bit_start);
> +
> +		max_len = bit_pos - bit_start;
> +		if (max_len > *max_slots) {
> +			*max_slots = max_len;
> +			max_len = 0;
>  		}
> +
> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
> +			break;
> +
>  		de = &dentry_blk->dentry[bit_pos];
>  		if (early_match_name(name, namelen, namehash, de)) {
>  			if (!memcmp(dentry_blk->filename[bit_pos],
>  							name, namelen)) {
>  				*res_page = dentry_page;
> -				goto found;
> +				return de;
>  			}
>  		}
> -		if (max_len > *max_slots) {
> -			*max_slots = max_len;
> -			max_len = 0;
> -		}
> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
> +
> +		bit_start = bit_pos
> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>  	}
>  
> -	de = NULL;
>  	kunmap(dentry_page);
> -found:
> -	if (max_len > *max_slots)
> -		*max_slots = max_len;
> -	return de;
> +	return NULL;
>  }
>  
>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> -- 
> 1.7.7

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-06-24 10:20 [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Gu Zheng
                   ` (2 preceding siblings ...)
  2014-07-02  7:49 ` [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Changman Lee
@ 2014-07-02 10:30 ` Jaegeuk Kim
  2014-07-03  1:13   ` Gu Zheng
  2014-07-03 10:14   ` Gu Zheng
  3 siblings, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2014-07-02 10:30 UTC (permalink / raw)
  To: Gu Zheng; +Cc: f2fs, fsdevel, 이창만

Thanks Changman for reminding this. :)

If there are a lot of ones in the bit stream, find_next_bit_le would cause some
overhead to translate the bits.

However, it would be effective to use find_next_bit_le if the bit stream looks
like 0000000001.

Well, IMO the former case would be a little bit more common.

Gu,
Can you provide some performance numbers wrt this?

On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
> Use find_next_bit_le rather than test_bit_le to improve search speed
> lightly.
> 
> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> ---
>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>  1 files changed, 21 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 3edd561..ba510fb 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>  			const char *name, size_t namelen, int *max_slots,
>  			f2fs_hash_t namehash, struct page **res_page)
>  {
> -	struct f2fs_dir_entry *de;
> -	unsigned long bit_pos = 0;
> +	unsigned long bit_pos = 0, bit_start = 0;
>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
> -	int max_len = 0;
>  
> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
> -		if (!test_bit_le(bit_pos, dentry_bits)) {
> -			if (bit_pos == 0)
> -				max_len = 1;
> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
> -				max_len++;
> -			bit_pos++;
> -			continue;
> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
> +		struct f2fs_dir_entry *de;
> +		int max_len = 0;
> +
> +		bit_pos = find_next_bit_le(dentry_bits,
> +					NR_DENTRY_IN_BLOCK, bit_start);
> +
> +		max_len = bit_pos - bit_start;
> +		if (max_len > *max_slots) {
> +			*max_slots = max_len;
> +			max_len = 0;
>  		}
> +
> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
> +			break;
> +
>  		de = &dentry_blk->dentry[bit_pos];
>  		if (early_match_name(name, namelen, namehash, de)) {
>  			if (!memcmp(dentry_blk->filename[bit_pos],
>  							name, namelen)) {
>  				*res_page = dentry_page;
> -				goto found;
> +				return de;
>  			}
>  		}
> -		if (max_len > *max_slots) {
> -			*max_slots = max_len;
> -			max_len = 0;
> -		}
> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
> +
> +		bit_start = bit_pos
> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>  	}
>  
> -	de = NULL;
>  	kunmap(dentry_page);
> -found:
> -	if (max_len > *max_slots)
> -		*max_slots = max_len;
> -	return de;
> +	return NULL;
>  }
>  
>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> -- 
> 1.7.7

-- 
Jaegeuk Kim

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-02  7:49 ` [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Changman Lee
@ 2014-07-03  1:05   ` Gu Zheng
  0 siblings, 0 replies; 16+ messages in thread
From: Gu Zheng @ 2014-07-03  1:05 UTC (permalink / raw)
  To: Changman Lee; +Cc: Jaegeuk Kim, f2fs, fsdevel

Hi Changman,
On 07/02/2014 03:49 PM, Changman Lee wrote:

> Hi, Gu
> 
> Unfortunately, find_next_bit isn't always better than test_bit.
> Refer to commit 5d0c667121bfc8be76d1580f485bddbe73465d1a
> 
> I remember that
> Perviously, Jaegeuk had changed find_next_bit to test_bit because
> find_next_bit spent much cpu time in the case of there is lot of dentries like a postmark.

Ah~, there was such a discussion before. But I think whether we can get
benefit from this depends on the condition of the bit-map.
I'll follow Jaegeuk's suggestion to get more effective data about this,
maybe it it can dispel our qualm.

> Sorry, I should have reported this quickly.

It's not late.:)

Thanks,
Gu

> 
> On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
>> Use find_next_bit_le rather than test_bit_le to improve search speed
>> lightly.
>>
>> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
>> ---
>>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>>  1 files changed, 21 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>> index 3edd561..ba510fb 100644
>> --- a/fs/f2fs/dir.c
>> +++ b/fs/f2fs/dir.c
>> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>>  			const char *name, size_t namelen, int *max_slots,
>>  			f2fs_hash_t namehash, struct page **res_page)
>>  {
>> -	struct f2fs_dir_entry *de;
>> -	unsigned long bit_pos = 0;
>> +	unsigned long bit_pos = 0, bit_start = 0;
>>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
>> -	int max_len = 0;
>>  
>> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
>> -		if (!test_bit_le(bit_pos, dentry_bits)) {
>> -			if (bit_pos == 0)
>> -				max_len = 1;
>> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
>> -				max_len++;
>> -			bit_pos++;
>> -			continue;
>> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
>> +		struct f2fs_dir_entry *de;
>> +		int max_len = 0;
>> +
>> +		bit_pos = find_next_bit_le(dentry_bits,
>> +					NR_DENTRY_IN_BLOCK, bit_start);
>> +
>> +		max_len = bit_pos - bit_start;
>> +		if (max_len > *max_slots) {
>> +			*max_slots = max_len;
>> +			max_len = 0;
>>  		}
>> +
>> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
>> +			break;
>> +
>>  		de = &dentry_blk->dentry[bit_pos];
>>  		if (early_match_name(name, namelen, namehash, de)) {
>>  			if (!memcmp(dentry_blk->filename[bit_pos],
>>  							name, namelen)) {
>>  				*res_page = dentry_page;
>> -				goto found;
>> +				return de;
>>  			}
>>  		}
>> -		if (max_len > *max_slots) {
>> -			*max_slots = max_len;
>> -			max_len = 0;
>> -		}
>> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>> +
>> +		bit_start = bit_pos
>> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>>  	}
>>  
>> -	de = NULL;
>>  	kunmap(dentry_page);
>> -found:
>> -	if (max_len > *max_slots)
>> -		*max_slots = max_len;
>> -	return de;
>> +	return NULL;
>>  }
>>  
>>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>> -- 
>> 1.7.7
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-02 10:30 ` Jaegeuk Kim
@ 2014-07-03  1:13   ` Gu Zheng
  2014-07-03 10:14   ` Gu Zheng
  1 sibling, 0 replies; 16+ messages in thread
From: Gu Zheng @ 2014-07-03  1:13 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: f2fs, fsdevel, 이창만

Hi Jaegeuk,
On 07/02/2014 06:30 PM, Jaegeuk Kim wrote:

> Thanks Changman for reminding this. :)
> 
> If there are a lot of ones in the bit stream, find_next_bit_le would cause some
> overhead to translate the bits.
> 
> However, it would be effective to use find_next_bit_le if the bit stream looks
> like 0000000001.

Agree.

> 
> Well, IMO the former case would be a little bit more common.
> 
> Gu,
> Can you provide some performance numbers wrt this?

OK. I'll do it.:)

Thanks,
Gu

> 
> On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
>> Use find_next_bit_le rather than test_bit_le to improve search speed
>> lightly.
>>
>> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
>> ---
>>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>>  1 files changed, 21 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>> index 3edd561..ba510fb 100644
>> --- a/fs/f2fs/dir.c
>> +++ b/fs/f2fs/dir.c
>> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>>  			const char *name, size_t namelen, int *max_slots,
>>  			f2fs_hash_t namehash, struct page **res_page)
>>  {
>> -	struct f2fs_dir_entry *de;
>> -	unsigned long bit_pos = 0;
>> +	unsigned long bit_pos = 0, bit_start = 0;
>>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
>> -	int max_len = 0;
>>  
>> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
>> -		if (!test_bit_le(bit_pos, dentry_bits)) {
>> -			if (bit_pos == 0)
>> -				max_len = 1;
>> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
>> -				max_len++;
>> -			bit_pos++;
>> -			continue;
>> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
>> +		struct f2fs_dir_entry *de;
>> +		int max_len = 0;
>> +
>> +		bit_pos = find_next_bit_le(dentry_bits,
>> +					NR_DENTRY_IN_BLOCK, bit_start);
>> +
>> +		max_len = bit_pos - bit_start;
>> +		if (max_len > *max_slots) {
>> +			*max_slots = max_len;
>> +			max_len = 0;
>>  		}
>> +
>> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
>> +			break;
>> +
>>  		de = &dentry_blk->dentry[bit_pos];
>>  		if (early_match_name(name, namelen, namehash, de)) {
>>  			if (!memcmp(dentry_blk->filename[bit_pos],
>>  							name, namelen)) {
>>  				*res_page = dentry_page;
>> -				goto found;
>> +				return de;
>>  			}
>>  		}
>> -		if (max_len > *max_slots) {
>> -			*max_slots = max_len;
>> -			max_len = 0;
>> -		}
>> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>> +
>> +		bit_start = bit_pos
>> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>>  	}
>>  
>> -	de = NULL;
>>  	kunmap(dentry_page);
>> -found:
>> -	if (max_len > *max_slots)
>> -		*max_slots = max_len;
>> -	return de;
>> +	return NULL;
>>  }
>>  
>>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>> -- 
>> 1.7.7
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-02 10:30 ` Jaegeuk Kim
  2014-07-03  1:13   ` Gu Zheng
@ 2014-07-03 10:14   ` Gu Zheng
  2014-07-04  5:36     ` Jaegeuk Kim
  1 sibling, 1 reply; 16+ messages in thread
From: Gu Zheng @ 2014-07-03 10:14 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: f2fs, fsdevel, 이창만, 俞超

[-- Attachment #1: Type: text/plain, Size: 3470 bytes --]

Hi Jaegeuk, Changman

Just a simple test, not very sure it can address
our qualm.

Bitmap size:216(the same as f2fs dentry_bits).
CPU: Intel i5 x86_64.

Time counting based on tsc(the less the fast).
[Index of 1]	find_next_bit_le	test_bit_le
0		20			117
1		20			114
2		20			113
3		20			139
4		22			121
5		22			118
6		22			115
8		22			112
9		22			106
10		22			105
11		22			100
16		22			98
48		22			97
80		27			95	
104		27			92
136		32			95
160		32			92
184		32			90
200		27			87
208		35			84

According to the result, find_next_bit_le is always
better than test_bit_le, though there may be some
noise, but I think the result is clear.
Hope it can help us.:)
ps.The sample is attached too.

Thanks,
Gu
On 07/02/2014 06:30 PM, Jaegeuk Kim wrote:

> Thanks Changman for reminding this. :)
> 
> If there are a lot of ones in the bit stream, find_next_bit_le would cause some
> overhead to translate the bits.
> 
> However, it would be effective to use find_next_bit_le if the bit stream looks
> like 0000000001.
> 
> Well, IMO the former case would be a little bit more common.
> 
> Gu,
> Can you provide some performance numbers wrt this?
> 
> On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
>> Use find_next_bit_le rather than test_bit_le to improve search speed
>> lightly.
>>
>> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
>> ---
>>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
>>  1 files changed, 21 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>> index 3edd561..ba510fb 100644
>> --- a/fs/f2fs/dir.c
>> +++ b/fs/f2fs/dir.c
>> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
>>  			const char *name, size_t namelen, int *max_slots,
>>  			f2fs_hash_t namehash, struct page **res_page)
>>  {
>> -	struct f2fs_dir_entry *de;
>> -	unsigned long bit_pos = 0;
>> +	unsigned long bit_pos = 0, bit_start = 0;
>>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
>>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
>> -	int max_len = 0;
>>  
>> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
>> -		if (!test_bit_le(bit_pos, dentry_bits)) {
>> -			if (bit_pos == 0)
>> -				max_len = 1;
>> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
>> -				max_len++;
>> -			bit_pos++;
>> -			continue;
>> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
>> +		struct f2fs_dir_entry *de;
>> +		int max_len = 0;
>> +
>> +		bit_pos = find_next_bit_le(dentry_bits,
>> +					NR_DENTRY_IN_BLOCK, bit_start);
>> +
>> +		max_len = bit_pos - bit_start;
>> +		if (max_len > *max_slots) {
>> +			*max_slots = max_len;
>> +			max_len = 0;
>>  		}
>> +
>> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
>> +			break;
>> +
>>  		de = &dentry_blk->dentry[bit_pos];
>>  		if (early_match_name(name, namelen, namehash, de)) {
>>  			if (!memcmp(dentry_blk->filename[bit_pos],
>>  							name, namelen)) {
>>  				*res_page = dentry_page;
>> -				goto found;
>> +				return de;
>>  			}
>>  		}
>> -		if (max_len > *max_slots) {
>> -			*max_slots = max_len;
>> -			max_len = 0;
>> -		}
>> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>> +
>> +		bit_start = bit_pos
>> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
>>  	}
>>  
>> -	de = NULL;
>>  	kunmap(dentry_page);
>> -found:
>> -	if (max_len > *max_slots)
>> -		*max_slots = max_len;
>> -	return de;
>> +	return NULL;
>>  }
>>  
>>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>> -- 
>> 1.7.7
> 



[-- Attachment #2: bit_test_mod.c --]
[-- Type: text/plain, Size: 2977 bytes --]

#include <linux/module.h>

__u8 bitmaps[20][27] = {
		{1, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{2, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{4, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{8, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{16, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 1,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{32, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{64, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 1, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 2, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 4, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 8, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 1, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 1,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 1, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 1,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 1, 0, 0, 0,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 1,
		 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 1, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 1, 0},
		{0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 0, 0,
		 0, 0, 0, 0, 0, 1}
		};

uint64_t rdtsc(void) {
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}

static void test_bit_search_speed(void)
{
	unsigned long flags;
	uint64_t tsc_s, tsc_diff;
	int i, j, pos;
	const void *bit_addr;

	local_irq_save(flags);
	preempt_disable();

	for (i = 0; i < 20; i++) {
		printk("Test bitmap %d... \n", i);

		bit_addr = &bitmaps[i];

		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++)
			find_next_bit_le(bit_addr, 216, 0);

		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("find_next_bit_le: %llu \n", tsc_diff);

		pos = 0;
		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++)
			while (!test_bit_le(pos++, bit_addr));

		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("test_bit_le     : %llu \n", tsc_diff);
	}

	preempt_enable();
	local_irq_restore(flags);
}

static int __init start_test(void) {
	test_bit_search_speed();
	return 0;
}

static void __exit exit_test(void) {

}

module_init(start_test);
module_exit(exit_test);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gu Zheng");

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-03 10:14   ` Gu Zheng
@ 2014-07-04  5:36     ` Jaegeuk Kim
  2014-07-04  6:21       ` Chao Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2014-07-04  5:36 UTC (permalink / raw)
  To: Gu Zheng; +Cc: f2fs, fsdevel, 이창만, 俞�

Well, how about testing with many ones in the bit streams?
Thanks,

On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
> Hi Jaegeuk, Changman
> 
> Just a simple test, not very sure it can address
> our qualm.
> 
> Bitmap size:216(the same as f2fs dentry_bits).
> CPU: Intel i5 x86_64.
> 
> Time counting based on tsc(the less the fast).
> [Index of 1]	find_next_bit_le	test_bit_le
> 0		20			117
> 1		20			114
> 2		20			113
> 3		20			139
> 4		22			121
> 5		22			118
> 6		22			115
> 8		22			112
> 9		22			106
> 10		22			105
> 11		22			100
> 16		22			98
> 48		22			97
> 80		27			95	
> 104		27			92
> 136		32			95
> 160		32			92
> 184		32			90
> 200		27			87
> 208		35			84
> 
> According to the result, find_next_bit_le is always
> better than test_bit_le, though there may be some
> noise, but I think the result is clear.
> Hope it can help us.:)
> ps.The sample is attached too.
> 
> Thanks,
> Gu
> On 07/02/2014 06:30 PM, Jaegeuk Kim wrote:
> 
> > Thanks Changman for reminding this. :)
> > 
> > If there are a lot of ones in the bit stream, find_next_bit_le would cause some
> > overhead to translate the bits.
> > 
> > However, it would be effective to use find_next_bit_le if the bit stream looks
> > like 0000000001.
> > 
> > Well, IMO the former case would be a little bit more common.
> > 
> > Gu,
> > Can you provide some performance numbers wrt this?
> > 
> > On Tue, Jun 24, 2014 at 06:20:41PM +0800, Gu Zheng wrote:
> >> Use find_next_bit_le rather than test_bit_le to improve search speed
> >> lightly.
> >>
> >> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> >> ---
> >>  fs/f2fs/dir.c |   43 +++++++++++++++++++++----------------------
> >>  1 files changed, 21 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >> index 3edd561..ba510fb 100644
> >> --- a/fs/f2fs/dir.c
> >> +++ b/fs/f2fs/dir.c
> >> @@ -93,42 +93,41 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
> >>  			const char *name, size_t namelen, int *max_slots,
> >>  			f2fs_hash_t namehash, struct page **res_page)
> >>  {
> >> -	struct f2fs_dir_entry *de;
> >> -	unsigned long bit_pos = 0;
> >> +	unsigned long bit_pos = 0, bit_start = 0;
> >>  	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
> >>  	const void *dentry_bits = &dentry_blk->dentry_bitmap;
> >> -	int max_len = 0;
> >>  
> >> -	while (bit_pos < NR_DENTRY_IN_BLOCK) {
> >> -		if (!test_bit_le(bit_pos, dentry_bits)) {
> >> -			if (bit_pos == 0)
> >> -				max_len = 1;
> >> -			else if (!test_bit_le(bit_pos - 1, dentry_bits))
> >> -				max_len++;
> >> -			bit_pos++;
> >> -			continue;
> >> +	while (bit_start < NR_DENTRY_IN_BLOCK) {
> >> +		struct f2fs_dir_entry *de;
> >> +		int max_len = 0;
> >> +
> >> +		bit_pos = find_next_bit_le(dentry_bits,
> >> +					NR_DENTRY_IN_BLOCK, bit_start);
> >> +
> >> +		max_len = bit_pos - bit_start;
> >> +		if (max_len > *max_slots) {
> >> +			*max_slots = max_len;
> >> +			max_len = 0;
> >>  		}
> >> +
> >> +		if (bit_pos >= NR_DENTRY_IN_BLOCK)
> >> +			break;
> >> +
> >>  		de = &dentry_blk->dentry[bit_pos];
> >>  		if (early_match_name(name, namelen, namehash, de)) {
> >>  			if (!memcmp(dentry_blk->filename[bit_pos],
> >>  							name, namelen)) {
> >>  				*res_page = dentry_page;
> >> -				goto found;
> >> +				return de;
> >>  			}
> >>  		}
> >> -		if (max_len > *max_slots) {
> >> -			*max_slots = max_len;
> >> -			max_len = 0;
> >> -		}
> >> -		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
> >> +
> >> +		bit_start = bit_pos
> >> +				+ GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
> >>  	}
> >>  
> >> -	de = NULL;
> >>  	kunmap(dentry_page);
> >> -found:
> >> -	if (max_len > *max_slots)
> >> -		*max_slots = max_len;
> >> -	return de;
> >> +	return NULL;
> >>  }
> >>  
> >>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >> -- 
> >> 1.7.7
> > 
> 
> 

> #include <linux/module.h>
> 
> __u8 bitmaps[20][27] = {
> 		{1, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{2, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{4, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{8, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{16, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 1,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{32, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{64, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 1, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 2, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 4, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 8, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 1, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 1,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 1, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 1,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 1, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 1,
> 		 0, 0, 0, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 1, 0, 0, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 1, 0},
> 		{0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 0, 0,
> 		 0, 0, 0, 0, 0, 1}
> 		};
> 
> uint64_t rdtsc(void) {
> uint32_t lo, hi;
> __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
> return (uint64_t)hi << 32 | lo;
> }
> 
> static void test_bit_search_speed(void)
> {
> 	unsigned long flags;
> 	uint64_t tsc_s, tsc_diff;
> 	int i, j, pos;
> 	const void *bit_addr;
> 
> 	local_irq_save(flags);
> 	preempt_disable();
> 
> 	for (i = 0; i < 20; i++) {
> 		printk("Test bitmap %d... \n", i);
> 
> 		bit_addr = &bitmaps[i];
> 
> 		tsc_s = rdtsc();
> 
> 		for (j = 0; j < 1000; j++)
> 			find_next_bit_le(bit_addr, 216, 0);
> 
> 		tsc_diff = (rdtsc() - tsc_s)/1000;
> 
> 		printk("find_next_bit_le: %llu \n", tsc_diff);
> 
> 		pos = 0;
> 		tsc_s = rdtsc();
> 
> 		for (j = 0; j < 1000; j++)
> 			while (!test_bit_le(pos++, bit_addr));
> 
> 		tsc_diff = (rdtsc() - tsc_s)/1000;
> 
> 		printk("test_bit_le     : %llu \n", tsc_diff);
> 	}
> 
> 	preempt_enable();
> 	local_irq_restore(flags);
> }
> 
> static int __init start_test(void) {
> 	test_bit_search_speed();
> 	return 0;
> }
> 
> static void __exit exit_test(void) {
> 
> }
> 
> module_init(start_test);
> module_exit(exit_test);
> 
> MODULE_LICENSE("GPL");
> MODULE_AUTHOR("Gu Zheng");


-- 
Jaegeuk Kim

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-04  5:36     ` Jaegeuk Kim
@ 2014-07-04  6:21       ` Chao Yu
  2014-07-04  8:04         ` Gu Zheng
  0 siblings, 1 reply; 16+ messages in thread
From: Chao Yu @ 2014-07-04  6:21 UTC (permalink / raw)
  To: 'Jaegeuk Kim', '이창만',
	'Gu Zheng'
  Cc: 'f2fs', 'fsdevel'

Hi Jaegeuk, Gu, Changman

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Friday, July 04, 2014 1:36 PM
> To: Gu Zheng
> Cc: f2fs; fsdevel; 이창만; 俞
> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
> 
> Well, how about testing with many ones in the bit streams?
> Thanks,
> 
> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
> > Hi Jaegeuk, Changman
> >
> > Just a simple test, not very sure it can address
> > our qualm.
> >
> > Bitmap size:216(the same as f2fs dentry_bits).
> > CPU: Intel i5 x86_64.
> >
> > Time counting based on tsc(the less the fast).
> > [Index of 1]	find_next_bit_le	test_bit_le
> > 0		20			117
> > 1		20			114
> > 2		20			113
> > 3		20			139
> > 4		22			121
> > 5		22			118
> > 6		22			115
> > 8		22			112
> > 9		22			106
> > 10		22			105
> > 11		22			100
> > 16		22			98
> > 48		22			97
> > 80		27			95
> > 104		27			92
> > 136		32			95
> > 160		32			92
> > 184		32			90
> > 200		27			87
> > 208		35			84
> >
> > According to the result, find_next_bit_le is always
> > better than test_bit_le, though there may be some
> > noise, but I think the result is clear.
> > Hope it can help us.:)
> > ps.The sample is attached too.
> >
> > Thanks,
> > Gu

I hope this could provide some help for this patch.

I modify Gu's code like this, and add few test case:

static void test_bit_search_speed(void)
{
	unsigned long flags;
	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
	int i, j, pos;
	const void *bit_addr;

	local_irq_save(flags);
	preempt_disable();
	
	printk("find_next_bit	test_bit_le\n");

	for (i = 0; i < 24; i++) {

		bit_addr = &bitmaps[i];

		tsc_s_b1 = rdtsc();

		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				pos = find_next_bit_le(bit_addr, 216, pos) + 1;

		mb();
		tsc_s_e1 = rdtsc();
		tsc_s_e1 -= tsc_s_b1;
		do_div(tsc_s_e1, 1000);

		tsc_s_b2 = rdtsc();

		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				test_bit_le(pos++, bit_addr);

		mb();
		tsc_s_e2 = rdtsc();
		tsc_s_e2 -= tsc_s_b2;
		do_div(tsc_s_e2, 1000);

		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
	}

	preempt_enable();
	local_irq_restore(flags);
}
case: 11111111 11111111
		{255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255},
case: 10101010 10101010
		{170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170},
case: 11111111 00000000
		{255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255, 0,
		 255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255},
case: 00001111 00001111
		 {15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15}

and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
It seems find_next_bit works not so bad as I thought.

find_next_bit    test_bit_le
73               4209
62               1271
69               1585
50               2031
67               2255
82               2261
52               4007
79               2159
50               2043
55               2215
53               2393
72               3784
76               1879
61               2562
70               2702
62               2489
56               2307
54               2063
51               2258
69               2712
4133             3989  -- case: 11111111 11111111
2370             3024  -- case: 10101010 10101010
2608             2413  -- case: 11111111 00000000
2457             2506  -- case: 00001111 00001111

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-04  6:21       ` Chao Yu
@ 2014-07-04  8:04         ` Gu Zheng
  2014-07-05  6:25           ` Jaegeuk Kim
  0 siblings, 1 reply; 16+ messages in thread
From: Gu Zheng @ 2014-07-04  8:04 UTC (permalink / raw)
  To: Chao Yu
  Cc: 'Jaegeuk Kim', '이창만',
	'f2fs', 'fsdevel'

[-- Attachment #1: Type: text/plain, Size: 4627 bytes --]

Hi Yu,
Thanks.
On 07/04/2014 02:21 PM, Chao Yu wrote:

> Hi Jaegeuk, Gu, Changman
> 
>> -----Original Message-----
>> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
>> Sent: Friday, July 04, 2014 1:36 PM
>> To: Gu Zheng
>> Cc: f2fs; fsdevel; 이창만; 俞
>> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
>>
>> Well, how about testing with many ones in the bit streams?
>> Thanks,
>>
>> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
>>> Hi Jaegeuk, Changman
>>>
>>> Just a simple test, not very sure it can address
>>> our qualm.
>>>
>>> Bitmap size:216(the same as f2fs dentry_bits).
>>> CPU: Intel i5 x86_64.
>>>
>>> Time counting based on tsc(the less the fast).
>>> [Index of 1]	find_next_bit_le	test_bit_le
>>> 0		20			117
>>> 1		20			114
>>> 2		20			113
>>> 3		20			139
>>> 4		22			121
>>> 5		22			118
>>> 6		22			115
>>> 8		22			112
>>> 9		22			106
>>> 10		22			105
>>> 11		22			100
>>> 16		22			98
>>> 48		22			97
>>> 80		27			95
>>> 104		27			92
>>> 136		32			95
>>> 160		32			92
>>> 184		32			90
>>> 200		27			87
>>> 208		35			84
>>>
>>> According to the result, find_next_bit_le is always
>>> better than test_bit_le, though there may be some
>>> noise, but I think the result is clear.
>>> Hope it can help us.:)
>>> ps.The sample is attached too.
>>>
>>> Thanks,
>>> Gu
> 
> I hope this could provide some help for this patch.
> 
> I modify Gu's code like this, and add few test case:
> 
> static void test_bit_search_speed(void)
> {
> 	unsigned long flags;
> 	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
> 	int i, j, pos;
> 	const void *bit_addr;
> 
> 	local_irq_save(flags);
> 	preempt_disable();
> 	
> 	printk("find_next_bit	test_bit_le\n");
> 
> 	for (i = 0; i < 24; i++) {
> 
> 		bit_addr = &bitmaps[i];
> 
> 		tsc_s_b1 = rdtsc();
> 
> 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> 
> 		mb();
> 		tsc_s_e1 = rdtsc();
> 		tsc_s_e1 -= tsc_s_b1;
> 		do_div(tsc_s_e1, 1000);
> 
> 		tsc_s_b2 = rdtsc();
> 
> 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				test_bit_le(pos++, bit_addr);
> 
> 		mb();
> 		tsc_s_e2 = rdtsc();
> 		tsc_s_e2 -= tsc_s_b2;
> 		do_div(tsc_s_e2, 1000);
> 
> 		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
> 	}
> 
> 	preempt_enable();
> 	local_irq_restore(flags);
> }
> case: 11111111 11111111
> 		{255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255},
> case: 10101010 10101010
> 		{170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170},
> case: 11111111 00000000
> 		{255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255, 0,
> 		 255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255},
> case: 00001111 00001111
> 		 {15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15}
> 
> and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> It seems find_next_bit works not so bad as I thought.
> 
> find_next_bit    test_bit_le
> 73               4209
> 62               1271
> 69               1585
> 50               2031
> 67               2255
> 82               2261
> 52               4007
> 79               2159
> 50               2043
> 55               2215
> 53               2393
> 72               3784
> 76               1879
> 61               2562
> 70               2702
> 62               2489
> 56               2307
> 54               2063
> 51               2258
> 69               2712
> 4133             3989  -- case: 11111111 11111111
> 2370             3024  -- case: 10101010 10101010
> 2608             2413  -- case: 11111111 00000000
> 2457             2506  -- case: 00001111 00001111

The time cost of test_bit_le shakes violently, it should be
very smooth according to the test case, maybe it has relation
to your vm env.

To Jaegeuk,
Following test result is walking a bitmap via find_next_bit_le
and test_bit_le.
(Front 7 are random bitmaps, last four are cases from Yu, see
attached sample for detail):

find_next_bit_le    test_bit_le

     3615                3492 
     2640                3492 
     2431                3492 
     1957                3494 
     2160                3492 
     1933                3495 
     1096                3492 

     8078                3492 
     3732                3492 
     4237                3493 
     3824                3492

Thanks,
Gu 

> 
> .
> 



[-- Attachment #2: bit_test_mod.c --]
[-- Type: text/plain, Size: 2493 bytes --]

#include <linux/module.h>

__u8 bitmaps[11][27] = {
		{1, 123, 2, 3, 127, 200, 100,
		 123, 23, 13, 78, 42, 123, 50,
		 123, 56, 198, 0, 58, 22, 19,
		 123, 23, 13, 78, 42, 123},
		{2, 0, 0, 0, 0, 0, 0,
		 123, 23, 13, 78, 42, 123, 50,
		 123, 56, 198, 0, 58, 22, 19,
		 23, 123, 2, 34, 123, 12},
		{4, 0, 0, 0, 0, 0, 0,
		 123, 56, 198, 0, 58, 22, 19,
		 123, 23, 13, 78, 42, 123,
		 0, 0, 0, 34, 45, 29},
		{8, 0, 0, 0, 0, 0, 0,
		 1, 123, 2, 3, 127, 200, 100,
		 123, 23, 13, 78, 42, 123, 50,
		 0, 0, 0, 0, 0, 0},
		{16, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 231, 11, 11, 88, 77, 99},
		{32, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 231, 11, 11, 88, 77, 99},
		{64, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 0, 0, 0, 0, 0, 0},
		{255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255},
		{170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170},
		{255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255, 0,
		 255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255},
		 {15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15}
		};

uint64_t rdtsc(void) {
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}

static void test_bit_search_speed(void)
{
	unsigned long flags;
	uint64_t tsc_s, tsc_diff;
	int i, j, pos;
	const void *bit_addr;

	local_irq_save(flags);
	preempt_disable();

	printk("find_next_bit_le    test_bit_le\n");

	for (i = 0; i < 11; i++) {
		bit_addr = &bitmaps[i];

		pos = 0;
		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
		mb();
		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("%8llu", tsc_diff);

		pos = 0;
		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				test_bit_le(pos++, bit_addr);

		mb();
		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("%20llu \n", tsc_diff);
	}

	preempt_enable();
	local_irq_restore(flags);
}

static int __init start_test(void) {
	test_bit_search_speed();
	return 0;
}

static void __exit exit_test(void) {

}

module_init(start_test);
module_exit(exit_test);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gu Zheng");

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-04  8:04         ` Gu Zheng
@ 2014-07-05  6:25           ` Jaegeuk Kim
  2014-07-05 11:15             ` Chao Yu
  2014-07-07  1:45             ` Changman Lee
  0 siblings, 2 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2014-07-05  6:25 UTC (permalink / raw)
  To: '이창만'
  Cc: Gu Zheng, Chao Yu, 'f2fs', 'fsdevel'

To Changman,

Just for sure, can you reproduce this issue in the x86 machine with proper
benchmarks? (i.e., test_bit_le vs. find_next_bit_le)

To all,

I cautiously suspect that the performances might be different when processing
f2fs_find_entry, since L1/L2 cache misses due to the intermediate routines like
matching strings can make some effect on it.

But, IMO, it is still worth to investigate this issue and contemplate how to
detect all ones or not.

Ah, one solution may be using 2 bytes from the reserved space, total 3, to
indicate how many valid dentries are stored in the dentry block.

Any ideas?

Thanks,

On Fri, Jul 04, 2014 at 04:04:09PM +0800, Gu Zheng wrote:
> Hi Yu,
> Thanks.
> On 07/04/2014 02:21 PM, Chao Yu wrote:
> 
> > Hi Jaegeuk, Gu, Changman
> > 
> >> -----Original Message-----
> >> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> >> Sent: Friday, July 04, 2014 1:36 PM
> >> To: Gu Zheng
> >> Cc: f2fs; fsdevel; 이창만; 俞
> >> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
> >>
> >> Well, how about testing with many ones in the bit streams?
> >> Thanks,
> >>
> >> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
> >>> Hi Jaegeuk, Changman
> >>>
> >>> Just a simple test, not very sure it can address
> >>> our qualm.
> >>>
> >>> Bitmap size:216(the same as f2fs dentry_bits).
> >>> CPU: Intel i5 x86_64.
> >>>
> >>> Time counting based on tsc(the less the fast).
> >>> [Index of 1]	find_next_bit_le	test_bit_le
> >>> 0		20			117
> >>> 1		20			114
> >>> 2		20			113
> >>> 3		20			139
> >>> 4		22			121
> >>> 5		22			118
> >>> 6		22			115
> >>> 8		22			112
> >>> 9		22			106
> >>> 10		22			105
> >>> 11		22			100
> >>> 16		22			98
> >>> 48		22			97
> >>> 80		27			95
> >>> 104		27			92
> >>> 136		32			95
> >>> 160		32			92
> >>> 184		32			90
> >>> 200		27			87
> >>> 208		35			84
> >>>
> >>> According to the result, find_next_bit_le is always
> >>> better than test_bit_le, though there may be some
> >>> noise, but I think the result is clear.
> >>> Hope it can help us.:)
> >>> ps.The sample is attached too.
> >>>
> >>> Thanks,
> >>> Gu
> > 
> > I hope this could provide some help for this patch.
> > 
> > I modify Gu's code like this, and add few test case:
> > 
> > static void test_bit_search_speed(void)
> > {
> > 	unsigned long flags;
> > 	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
> > 	int i, j, pos;
> > 	const void *bit_addr;
> > 
> > 	local_irq_save(flags);
> > 	preempt_disable();
> > 	
> > 	printk("find_next_bit	test_bit_le\n");
> > 
> > 	for (i = 0; i < 24; i++) {
> > 
> > 		bit_addr = &bitmaps[i];
> > 
> > 		tsc_s_b1 = rdtsc();
> > 
> > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> > 
> > 		mb();
> > 		tsc_s_e1 = rdtsc();
> > 		tsc_s_e1 -= tsc_s_b1;
> > 		do_div(tsc_s_e1, 1000);
> > 
> > 		tsc_s_b2 = rdtsc();
> > 
> > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				test_bit_le(pos++, bit_addr);
> > 
> > 		mb();
> > 		tsc_s_e2 = rdtsc();
> > 		tsc_s_e2 -= tsc_s_b2;
> > 		do_div(tsc_s_e2, 1000);
> > 
> > 		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
> > 	}
> > 
> > 	preempt_enable();
> > 	local_irq_restore(flags);
> > }
> > case: 11111111 11111111
> > 		{255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255},
> > case: 10101010 10101010
> > 		{170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170},
> > case: 11111111 00000000
> > 		{255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255, 0,
> > 		 255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255},
> > case: 00001111 00001111
> > 		 {15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15}
> > 
> > and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> > It seems find_next_bit works not so bad as I thought.
> > 
> > find_next_bit    test_bit_le
> > 73               4209
> > 62               1271
> > 69               1585
> > 50               2031
> > 67               2255
> > 82               2261
> > 52               4007
> > 79               2159
> > 50               2043
> > 55               2215
> > 53               2393
> > 72               3784
> > 76               1879
> > 61               2562
> > 70               2702
> > 62               2489
> > 56               2307
> > 54               2063
> > 51               2258
> > 69               2712
> > 4133             3989  -- case: 11111111 11111111
> > 2370             3024  -- case: 10101010 10101010
> > 2608             2413  -- case: 11111111 00000000
> > 2457             2506  -- case: 00001111 00001111
> 
> The time cost of test_bit_le shakes violently, it should be
> very smooth according to the test case, maybe it has relation
> to your vm env.
> 
> To Jaegeuk,
> Following test result is walking a bitmap via find_next_bit_le
> and test_bit_le.
> (Front 7 are random bitmaps, last four are cases from Yu, see
> attached sample for detail):
> 
> find_next_bit_le    test_bit_le
> 
>      3615                3492 
>      2640                3492 
>      2431                3492 
>      1957                3494 
>      2160                3492 
>      1933                3495 
>      1096                3492 
> 
>      8078                3492 
>      3732                3492 
>      4237                3493 
>      3824                3492
> 
> Thanks,
> Gu 
> 
> > 
> > .
> > 
> 
> 

> #include <linux/module.h>
> 
> __u8 bitmaps[11][27] = {
> 		{1, 123, 2, 3, 127, 200, 100,
> 		 123, 23, 13, 78, 42, 123, 50,
> 		 123, 56, 198, 0, 58, 22, 19,
> 		 123, 23, 13, 78, 42, 123},
> 		{2, 0, 0, 0, 0, 0, 0,
> 		 123, 23, 13, 78, 42, 123, 50,
> 		 123, 56, 198, 0, 58, 22, 19,
> 		 23, 123, 2, 34, 123, 12},
> 		{4, 0, 0, 0, 0, 0, 0,
> 		 123, 56, 198, 0, 58, 22, 19,
> 		 123, 23, 13, 78, 42, 123,
> 		 0, 0, 0, 34, 45, 29},
> 		{8, 0, 0, 0, 0, 0, 0,
> 		 1, 123, 2, 3, 127, 200, 100,
> 		 123, 23, 13, 78, 42, 123, 50,
> 		 0, 0, 0, 0, 0, 0},
> 		{16, 0, 0, 0, 0, 0, 0,
> 		 8, 78, 0, 0, 23, 0, 213,
> 		 0, 12, 0, 45, 0, 109, 111,
> 		 231, 11, 11, 88, 77, 99},
> 		{32, 0, 0, 0, 0, 0, 0,
> 		 8, 78, 0, 0, 23, 0, 213,
> 		 0, 12, 0, 45, 0, 109, 111,
> 		 231, 11, 11, 88, 77, 99},
> 		{64, 0, 0, 0, 0, 0, 0,
> 		 8, 78, 0, 0, 23, 0, 213,
> 		 0, 12, 0, 45, 0, 109, 111,
> 		 0, 0, 0, 0, 0, 0},
> 		{255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255},
> 		{170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170},
> 		{255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255, 0,
> 		 255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255},
> 		 {15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15}
> 		};
> 
> uint64_t rdtsc(void) {
> uint32_t lo, hi;
> __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
> return (uint64_t)hi << 32 | lo;
> }
> 
> static void test_bit_search_speed(void)
> {
> 	unsigned long flags;
> 	uint64_t tsc_s, tsc_diff;
> 	int i, j, pos;
> 	const void *bit_addr;
> 
> 	local_irq_save(flags);
> 	preempt_disable();
> 
> 	printk("find_next_bit_le    test_bit_le\n");
> 
> 	for (i = 0; i < 11; i++) {
> 		bit_addr = &bitmaps[i];
> 
> 		pos = 0;
> 		tsc_s = rdtsc();
> 
> 		for (j = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> 		mb();
> 		tsc_diff = (rdtsc() - tsc_s)/1000;
> 
> 		printk("%8llu", tsc_diff);
> 
> 		pos = 0;
> 		tsc_s = rdtsc();
> 
> 		for (j = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				test_bit_le(pos++, bit_addr);
> 
> 		mb();
> 		tsc_diff = (rdtsc() - tsc_s)/1000;
> 
> 		printk("%20llu \n", tsc_diff);
> 	}
> 
> 	preempt_enable();
> 	local_irq_restore(flags);
> }
> 
> static int __init start_test(void) {
> 	test_bit_search_speed();
> 	return 0;
> }
> 
> static void __exit exit_test(void) {
> 
> }
> 
> module_init(start_test);
> module_exit(exit_test);
> 
> MODULE_LICENSE("GPL");
> MODULE_AUTHOR("Gu Zheng");


-- 
Jaegeuk Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-05  6:25           ` Jaegeuk Kim
@ 2014-07-05 11:15             ` Chao Yu
  2014-07-07  1:45             ` Changman Lee
  1 sibling, 0 replies; 16+ messages in thread
From: Chao Yu @ 2014-07-05 11:15 UTC (permalink / raw)
  To: 'Jaegeuk Kim', '이창만'
  Cc: 'Gu Zheng', 'f2fs', 'fsdevel'

Hi,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Saturday, July 05, 2014 2:26 PM
> To: '이창만'
> Cc: Gu Zheng; Chao Yu; 'f2fs'; 'fsdevel'
> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
> 
> To Changman,
> 
> Just for sure, can you reproduce this issue in the x86 machine with proper
> benchmarks? (i.e., test_bit_le vs. find_next_bit_le)
> 
> To all,
> 
> I cautiously suspect that the performances might be different when processing
> f2fs_find_entry, since L1/L2 cache misses due to the intermediate routines like
> matching strings can make some effect on it.
> 
> But, IMO, it is still worth to investigate this issue and contemplate how to
> detect all ones or not.
> 
> Ah, one solution may be using 2 bytes from the reserved space, total 3, to
> indicate how many valid dentries are stored in the dentry block.
> 
> Any ideas?

That's good one!
IMO, one more byte could be maintained additionally to indicate last one in
bitmap, we save the searching time for exist target name by skipping testing area
behind the last one.

Thanks,
Yu

> 
> Thanks,
> 
> On Fri, Jul 04, 2014 at 04:04:09PM +0800, Gu Zheng wrote:
> > Hi Yu,
> > Thanks.
> > On 07/04/2014 02:21 PM, Chao Yu wrote:
> >
> > > Hi Jaegeuk, Gu, Changman
> > >
> > >> -----Original Message-----
> > >> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > >> Sent: Friday, July 04, 2014 1:36 PM
> > >> To: Gu Zheng
> > >> Cc: f2fs; fsdevel; 이창만; 俞
> > >> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in,
> find_in_block
> > >>
> > >> Well, how about testing with many ones in the bit streams?
> > >> Thanks,
> > >>
> > >> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
> > >>> Hi Jaegeuk, Changman
> > >>>
> > >>> Just a simple test, not very sure it can address
> > >>> our qualm.
> > >>>
> > >>> Bitmap size:216(the same as f2fs dentry_bits).
> > >>> CPU: Intel i5 x86_64.
> > >>>
> > >>> Time counting based on tsc(the less the fast).
> > >>> [Index of 1]	find_next_bit_le	test_bit_le
> > >>> 0		20			117
> > >>> 1		20			114
> > >>> 2		20			113
> > >>> 3		20			139
> > >>> 4		22			121
> > >>> 5		22			118
> > >>> 6		22			115
> > >>> 8		22			112
> > >>> 9		22			106
> > >>> 10		22			105
> > >>> 11		22			100
> > >>> 16		22			98
> > >>> 48		22			97
> > >>> 80		27			95
> > >>> 104		27			92
> > >>> 136		32			95
> > >>> 160		32			92
> > >>> 184		32			90
> > >>> 200		27			87
> > >>> 208		35			84
> > >>>
> > >>> According to the result, find_next_bit_le is always
> > >>> better than test_bit_le, though there may be some
> > >>> noise, but I think the result is clear.
> > >>> Hope it can help us.:)
> > >>> ps.The sample is attached too.
> > >>>
> > >>> Thanks,
> > >>> Gu
> > >
> > > I hope this could provide some help for this patch.
> > >
> > > I modify Gu's code like this, and add few test case:
> > >
> > > static void test_bit_search_speed(void)
> > > {
> > > 	unsigned long flags;
> > > 	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
> > > 	int i, j, pos;
> > > 	const void *bit_addr;
> > >
> > > 	local_irq_save(flags);
> > > 	preempt_disable();
> > >
> > > 	printk("find_next_bit	test_bit_le\n");
> > >
> > > 	for (i = 0; i < 24; i++) {
> > >
> > > 		bit_addr = &bitmaps[i];
> > >
> > > 		tsc_s_b1 = rdtsc();
> > >
> > > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > > 			while (pos < 216)
> > > 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> > >
> > > 		mb();
> > > 		tsc_s_e1 = rdtsc();
> > > 		tsc_s_e1 -= tsc_s_b1;
> > > 		do_div(tsc_s_e1, 1000);
> > >
> > > 		tsc_s_b2 = rdtsc();
> > >
> > > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > > 			while (pos < 216)
> > > 				test_bit_le(pos++, bit_addr);
> > >
> > > 		mb();
> > > 		tsc_s_e2 = rdtsc();
> > > 		tsc_s_e2 -= tsc_s_b2;
> > > 		do_div(tsc_s_e2, 1000);
> > >
> > > 		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
> > > 	}
> > >
> > > 	preempt_enable();
> > > 	local_irq_restore(flags);
> > > }
> > > case: 11111111 11111111
> > > 		{255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255},
> > > case: 10101010 10101010
> > > 		{170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170},
> > > case: 11111111 00000000
> > > 		{255, 0, 255, 0, 255, 0, 255,
> > > 		 0, 255, 0, 255, 0, 255, 0,
> > > 		 255, 0, 255, 0, 255, 0, 255,
> > > 		 0, 255, 0, 255, 0, 255},
> > > case: 00001111 00001111
> > > 		 {15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15}
> > >
> > > and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> > > It seems find_next_bit works not so bad as I thought.
> > >
> > > find_next_bit    test_bit_le
> > > 73               4209
> > > 62               1271
> > > 69               1585
> > > 50               2031
> > > 67               2255
> > > 82               2261
> > > 52               4007
> > > 79               2159
> > > 50               2043
> > > 55               2215
> > > 53               2393
> > > 72               3784
> > > 76               1879
> > > 61               2562
> > > 70               2702
> > > 62               2489
> > > 56               2307
> > > 54               2063
> > > 51               2258
> > > 69               2712
> > > 4133             3989  -- case: 11111111 11111111
> > > 2370             3024  -- case: 10101010 10101010
> > > 2608             2413  -- case: 11111111 00000000
> > > 2457             2506  -- case: 00001111 00001111
> >
> > The time cost of test_bit_le shakes violently, it should be
> > very smooth according to the test case, maybe it has relation
> > to your vm env.
> >
> > To Jaegeuk,
> > Following test result is walking a bitmap via find_next_bit_le
> > and test_bit_le.
> > (Front 7 are random bitmaps, last four are cases from Yu, see
> > attached sample for detail):
> >
> > find_next_bit_le    test_bit_le
> >
> >      3615                3492
> >      2640                3492
> >      2431                3492
> >      1957                3494
> >      2160                3492
> >      1933                3495
> >      1096                3492
> >
> >      8078                3492
> >      3732                3492
> >      4237                3493
> >      3824                3492
> >
> > Thanks,
> > Gu
> >
> > >
> > > .
> > >
> >
> >
> 
> > #include <linux/module.h>
> >
> > __u8 bitmaps[11][27] = {
> > 		{1, 123, 2, 3, 127, 200, 100,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 123, 23, 13, 78, 42, 123},
> > 		{2, 0, 0, 0, 0, 0, 0,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 23, 123, 2, 34, 123, 12},
> > 		{4, 0, 0, 0, 0, 0, 0,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 123, 23, 13, 78, 42, 123,
> > 		 0, 0, 0, 34, 45, 29},
> > 		{8, 0, 0, 0, 0, 0, 0,
> > 		 1, 123, 2, 3, 127, 200, 100,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 0, 0, 0, 0, 0, 0},
> > 		{16, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 231, 11, 11, 88, 77, 99},
> > 		{32, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 231, 11, 11, 88, 77, 99},
> > 		{64, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 0, 0, 0, 0, 0, 0},
> > 		{255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255},
> > 		{170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170},
> > 		{255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255, 0,
> > 		 255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255},
> > 		 {15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15}
> > 		};
> >
> > uint64_t rdtsc(void) {
> > uint32_t lo, hi;
> > __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
> > return (uint64_t)hi << 32 | lo;
> > }
> >
> > static void test_bit_search_speed(void)
> > {
> > 	unsigned long flags;
> > 	uint64_t tsc_s, tsc_diff;
> > 	int i, j, pos;
> > 	const void *bit_addr;
> >
> > 	local_irq_save(flags);
> > 	preempt_disable();
> >
> > 	printk("find_next_bit_le    test_bit_le\n");
> >
> > 	for (i = 0; i < 11; i++) {
> > 		bit_addr = &bitmaps[i];
> >
> > 		pos = 0;
> > 		tsc_s = rdtsc();
> >
> > 		for (j = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> > 		mb();
> > 		tsc_diff = (rdtsc() - tsc_s)/1000;
> >
> > 		printk("%8llu", tsc_diff);
> >
> > 		pos = 0;
> > 		tsc_s = rdtsc();
> >
> > 		for (j = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				test_bit_le(pos++, bit_addr);
> >
> > 		mb();
> > 		tsc_diff = (rdtsc() - tsc_s)/1000;
> >
> > 		printk("%20llu \n", tsc_diff);
> > 	}
> >
> > 	preempt_enable();
> > 	local_irq_restore(flags);
> > }
> >
> > static int __init start_test(void) {
> > 	test_bit_search_speed();
> > 	return 0;
> > }
> >
> > static void __exit exit_test(void) {
> >
> > }
> >
> > module_init(start_test);
> > module_exit(exit_test);
> >
> > MODULE_LICENSE("GPL");
> > MODULE_AUTHOR("Gu Zheng");
> 
> 
> --
> Jaegeuk Kim

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-05  6:25           ` Jaegeuk Kim
  2014-07-05 11:15             ` Chao Yu
@ 2014-07-07  1:45             ` Changman Lee
  2014-07-07  2:13               ` Gu Zheng
  1 sibling, 1 reply; 16+ messages in thread
From: Changman Lee @ 2014-07-07  1:45 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: Gu Zheng, Chao Yu, 'f2fs', 'fsdevel'

Hello,

On Fri, Jul 04, 2014 at 11:25:35PM -0700, Jaegeuk Kim wrote:
> To Changman,
> 
> Just for sure, can you reproduce this issue in the x86 machine with proper
> benchmarks? (i.e., test_bit_le vs. find_next_bit_le)

It shows quite a different result of bit_mod_test between server and desktop.

CPU i5 x86_64 Ubuntu Server - 3.16.0-rc3

[266627.204776] find_next_bit_le    test_bit_le
[266627.205319]     1832                1774
[266627.206223]     1292                1746
[266627.207092]     1205                1746
[266627.207876]      914                1746
[266627.208710]     1082                1746
[266627.209506]      956                1746
[266627.210175]      523                1746

[266627.211839]     3907                1746
[266627.212898]     1850                1746
[266627.214046]     2153                1746
[266627.215118]     1894                1746


CPU i7 x86_64 Mint Desktop - 3.13.0-24-generic

[432284.422356] find_next_bit_le    test_bit_le
[432284.423470]     3771                3878
[432284.425400]     2671                3696
[432284.427221]     2492                3760
[432284.428908]     1971                3696
[432284.430640]     2191                3730
[432284.432323]     1986                3696
[432284.433741]     1123                3698

[432284.437269]     8299                3696
[432284.439487]     3842                3696
[432284.441850]     4334                3696
[432284.444080]     3885                3696

> 
> To all,
> 
> I cautiously suspect that the performances might be different when processing
> f2fs_find_entry, since L1/L2 cache misses due to the intermediate routines like
> matching strings can make some effect on it.
> 
> But, IMO, it is still worth to investigate this issue and contemplate how to
> detect all ones or not.
> 
> Ah, one solution may be using 2 bytes from the reserved space, total 3, to
> indicate how many valid dentries are stored in the dentry block.
> 
> Any ideas?

Agree. In the case of one bits is over than half, test_bit is better
than find_next_bit. So we can decide whether using test_bit or
find_next_bit depending on count of one bits.

When just comparing test_bit and find_next_bit, I think test_bit is more effective
in f2fs because let's think about f2fs's dentry management policy.
One dentry bucket is filled then next dentry bucket is filled from
lower to higher level. If empty slots of lower level exist, they are used first.
So, I guess that one bits are getting more than zero bits as time goes by.

Thanks,

> 
> Thanks,
> 
> On Fri, Jul 04, 2014 at 04:04:09PM +0800, Gu Zheng wrote:
> > Hi Yu,
> > Thanks.
> > On 07/04/2014 02:21 PM, Chao Yu wrote:
> > 
> > > Hi Jaegeuk, Gu, Changman
> > > 
> > >> -----Original Message-----
> > >> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > >> Sent: Friday, July 04, 2014 1:36 PM
> > >> To: Gu Zheng
> > >> Cc: f2fs; fsdevel; 이창만; 俞
> > >> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
> > >>
> > >> Well, how about testing with many ones in the bit streams?
> > >> Thanks,
> > >>
> > >> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
> > >>> Hi Jaegeuk, Changman
> > >>>
> > >>> Just a simple test, not very sure it can address
> > >>> our qualm.
> > >>>
> > >>> Bitmap size:216(the same as f2fs dentry_bits).
> > >>> CPU: Intel i5 x86_64.
> > >>>
> > >>> Time counting based on tsc(the less the fast).
> > >>> [Index of 1]	find_next_bit_le	test_bit_le
> > >>> 0		20			117
> > >>> 1		20			114
> > >>> 2		20			113
> > >>> 3		20			139
> > >>> 4		22			121
> > >>> 5		22			118
> > >>> 6		22			115
> > >>> 8		22			112
> > >>> 9		22			106
> > >>> 10		22			105
> > >>> 11		22			100
> > >>> 16		22			98
> > >>> 48		22			97
> > >>> 80		27			95
> > >>> 104		27			92
> > >>> 136		32			95
> > >>> 160		32			92
> > >>> 184		32			90
> > >>> 200		27			87
> > >>> 208		35			84
> > >>>
> > >>> According to the result, find_next_bit_le is always
> > >>> better than test_bit_le, though there may be some
> > >>> noise, but I think the result is clear.
> > >>> Hope it can help us.:)
> > >>> ps.The sample is attached too.
> > >>>
> > >>> Thanks,
> > >>> Gu
> > > 
> > > I hope this could provide some help for this patch.
> > > 
> > > I modify Gu's code like this, and add few test case:
> > > 
> > > static void test_bit_search_speed(void)
> > > {
> > > 	unsigned long flags;
> > > 	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
> > > 	int i, j, pos;
> > > 	const void *bit_addr;
> > > 
> > > 	local_irq_save(flags);
> > > 	preempt_disable();
> > > 	
> > > 	printk("find_next_bit	test_bit_le\n");
> > > 
> > > 	for (i = 0; i < 24; i++) {
> > > 
> > > 		bit_addr = &bitmaps[i];
> > > 
> > > 		tsc_s_b1 = rdtsc();
> > > 
> > > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > > 			while (pos < 216)
> > > 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> > > 
> > > 		mb();
> > > 		tsc_s_e1 = rdtsc();
> > > 		tsc_s_e1 -= tsc_s_b1;
> > > 		do_div(tsc_s_e1, 1000);
> > > 
> > > 		tsc_s_b2 = rdtsc();
> > > 
> > > 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> > > 			while (pos < 216)
> > > 				test_bit_le(pos++, bit_addr);
> > > 
> > > 		mb();
> > > 		tsc_s_e2 = rdtsc();
> > > 		tsc_s_e2 -= tsc_s_b2;
> > > 		do_div(tsc_s_e2, 1000);
> > > 
> > > 		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
> > > 	}
> > > 
> > > 	preempt_enable();
> > > 	local_irq_restore(flags);
> > > }
> > > case: 11111111 11111111
> > > 		{255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255, 255,
> > > 		 255, 255, 255, 255, 255, 255},
> > > case: 10101010 10101010
> > > 		{170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170, 170,
> > > 		 170, 170, 170, 170, 170, 170},
> > > case: 11111111 00000000
> > > 		{255, 0, 255, 0, 255, 0, 255,
> > > 		 0, 255, 0, 255, 0, 255, 0,
> > > 		 255, 0, 255, 0, 255, 0, 255,
> > > 		 0, 255, 0, 255, 0, 255},
> > > case: 00001111 00001111
> > > 		 {15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15, 15,
> > > 		 15, 15, 15, 15, 15, 15}
> > > 
> > > and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> > > It seems find_next_bit works not so bad as I thought.
> > > 
> > > find_next_bit    test_bit_le
> > > 73               4209
> > > 62               1271
> > > 69               1585
> > > 50               2031
> > > 67               2255
> > > 82               2261
> > > 52               4007
> > > 79               2159
> > > 50               2043
> > > 55               2215
> > > 53               2393
> > > 72               3784
> > > 76               1879
> > > 61               2562
> > > 70               2702
> > > 62               2489
> > > 56               2307
> > > 54               2063
> > > 51               2258
> > > 69               2712
> > > 4133             3989  -- case: 11111111 11111111
> > > 2370             3024  -- case: 10101010 10101010
> > > 2608             2413  -- case: 11111111 00000000
> > > 2457             2506  -- case: 00001111 00001111
> > 
> > The time cost of test_bit_le shakes violently, it should be
> > very smooth according to the test case, maybe it has relation
> > to your vm env.
> > 
> > To Jaegeuk,
> > Following test result is walking a bitmap via find_next_bit_le
> > and test_bit_le.
> > (Front 7 are random bitmaps, last four are cases from Yu, see
> > attached sample for detail):
> > 
> > find_next_bit_le    test_bit_le
> > 
> >      3615                3492 
> >      2640                3492 
> >      2431                3492 
> >      1957                3494 
> >      2160                3492 
> >      1933                3495 
> >      1096                3492 
> > 
> >      8078                3492 
> >      3732                3492 
> >      4237                3493 
> >      3824                3492
> > 
> > Thanks,
> > Gu 
> > 
> > > 
> > > .
> > > 
> > 
> > 
> 
> > #include <linux/module.h>
> > 
> > __u8 bitmaps[11][27] = {
> > 		{1, 123, 2, 3, 127, 200, 100,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 123, 23, 13, 78, 42, 123},
> > 		{2, 0, 0, 0, 0, 0, 0,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 23, 123, 2, 34, 123, 12},
> > 		{4, 0, 0, 0, 0, 0, 0,
> > 		 123, 56, 198, 0, 58, 22, 19,
> > 		 123, 23, 13, 78, 42, 123,
> > 		 0, 0, 0, 34, 45, 29},
> > 		{8, 0, 0, 0, 0, 0, 0,
> > 		 1, 123, 2, 3, 127, 200, 100,
> > 		 123, 23, 13, 78, 42, 123, 50,
> > 		 0, 0, 0, 0, 0, 0},
> > 		{16, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 231, 11, 11, 88, 77, 99},
> > 		{32, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 231, 11, 11, 88, 77, 99},
> > 		{64, 0, 0, 0, 0, 0, 0,
> > 		 8, 78, 0, 0, 23, 0, 213,
> > 		 0, 12, 0, 45, 0, 109, 111,
> > 		 0, 0, 0, 0, 0, 0},
> > 		{255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255, 255,
> > 		 255, 255, 255, 255, 255, 255},
> > 		{170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170, 170,
> > 		 170, 170, 170, 170, 170, 170},
> > 		{255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255, 0,
> > 		 255, 0, 255, 0, 255, 0, 255,
> > 		 0, 255, 0, 255, 0, 255},
> > 		 {15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15, 15,
> > 		 15, 15, 15, 15, 15, 15}
> > 		};
> > 
> > uint64_t rdtsc(void) {
> > uint32_t lo, hi;
> > __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
> > return (uint64_t)hi << 32 | lo;
> > }
> > 
> > static void test_bit_search_speed(void)
> > {
> > 	unsigned long flags;
> > 	uint64_t tsc_s, tsc_diff;
> > 	int i, j, pos;
> > 	const void *bit_addr;
> > 
> > 	local_irq_save(flags);
> > 	preempt_disable();
> > 
> > 	printk("find_next_bit_le    test_bit_le\n");
> > 
> > 	for (i = 0; i < 11; i++) {
> > 		bit_addr = &bitmaps[i];
> > 
> > 		pos = 0;
> > 		tsc_s = rdtsc();
> > 
> > 		for (j = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> > 		mb();
> > 		tsc_diff = (rdtsc() - tsc_s)/1000;
> > 
> > 		printk("%8llu", tsc_diff);
> > 
> > 		pos = 0;
> > 		tsc_s = rdtsc();
> > 
> > 		for (j = 0; j < 1000; j++, pos = 0)
> > 			while (pos < 216)
> > 				test_bit_le(pos++, bit_addr);
> > 
> > 		mb();
> > 		tsc_diff = (rdtsc() - tsc_s)/1000;
> > 
> > 		printk("%20llu \n", tsc_diff);
> > 	}
> > 
> > 	preempt_enable();
> > 	local_irq_restore(flags);
> > }
> > 
> > static int __init start_test(void) {
> > 	test_bit_search_speed();
> > 	return 0;
> > }
> > 
> > static void __exit exit_test(void) {
> > 
> > }
> > 
> > module_init(start_test);
> > module_exit(exit_test);
> > 
> > MODULE_LICENSE("GPL");
> > MODULE_AUTHOR("Gu Zheng");
> 
> 
> -- 
> Jaegeuk Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
  2014-07-07  1:45             ` Changman Lee
@ 2014-07-07  2:13               ` Gu Zheng
  0 siblings, 0 replies; 16+ messages in thread
From: Gu Zheng @ 2014-07-07  2:13 UTC (permalink / raw)
  To: Changman Lee; +Cc: Jaegeuk Kim, Chao Yu, 'f2fs', 'fsdevel'

Hi Changman,
On 07/07/2014 09:45 AM, Changman Lee wrote:

> Hello,
> 
> On Fri, Jul 04, 2014 at 11:25:35PM -0700, Jaegeuk Kim wrote:
>> To Changman,
>>
>> Just for sure, can you reproduce this issue in the x86 machine with proper
>> benchmarks? (i.e., test_bit_le vs. find_next_bit_le)
> 
> It shows quite a different result of bit_mod_test between server and desktop.

It is possible. And the trend of the result is correct, so I think it is credible.

> 
> CPU i5 x86_64 Ubuntu Server - 3.16.0-rc3
> 
> [266627.204776] find_next_bit_le    test_bit_le
> [266627.205319]     1832                1774
> [266627.206223]     1292                1746
> [266627.207092]     1205                1746
> [266627.207876]      914                1746
> [266627.208710]     1082                1746
> [266627.209506]      956                1746
> [266627.210175]      523                1746
> 
> [266627.211839]     3907                1746
> [266627.212898]     1850                1746
> [266627.214046]     2153                1746
> [266627.215118]     1894                1746
> 
> 
> CPU i7 x86_64 Mint Desktop - 3.13.0-24-generic
> 
> [432284.422356] find_next_bit_le    test_bit_le
> [432284.423470]     3771                3878
> [432284.425400]     2671                3696
> [432284.427221]     2492                3760
> [432284.428908]     1971                3696
> [432284.430640]     2191                3730
> [432284.432323]     1986                3696
> [432284.433741]     1123                3698
> 
> [432284.437269]     8299                3696
> [432284.439487]     3842                3696
> [432284.441850]     4334                3696
> [432284.444080]     3885                3696
> 
>>
>> To all,
>>
>> I cautiously suspect that the performances might be different when processing
>> f2fs_find_entry, since L1/L2 cache misses due to the intermediate routines like
>> matching strings can make some effect on it.
>>
>> But, IMO, it is still worth to investigate this issue and contemplate how to
>> detect all ones or not.
>>
>> Ah, one solution may be using 2 bytes from the reserved space, total 3, to
>> indicate how many valid dentries are stored in the dentry block.
>>
>> Any ideas?
> 
> Agree. In the case of one bits is over than half, test_bit is better
> than find_next_bit. So we can decide whether using test_bit or
> find_next_bit depending on count of one bits.
> 
> When just comparing test_bit and find_next_bit, I think test_bit is more effective
> in f2fs because let's think about f2fs's dentry management policy.
> One dentry bucket is filled then next dentry bucket is filled from
> lower to higher level. If empty slots of lower level exist, they are used first.
> So, I guess that one bits are getting more than zero bits as time goes by.

Partly agree.
IMO, what we seriously care about is *find a suitable slot*, not counting the
bitmap, we can gain more benefit from find_next_bit rather than test_bit, not only
the efficiency, but also the readability.

Thanks,
Gu

> 
> Thanks,
> 
>>
>> Thanks,
>>



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2014-07-07  2:25 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-24 10:20 [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Gu Zheng
2014-06-25  2:30 ` [f2fs-dev] " Chao Yu
2014-06-25  2:53   ` Gu Zheng
2014-06-27  9:58 ` [PATCH V2 3/4] f2fs: use find_next_bit_le rather than test_bit_le in find_in_block Gu Zheng
2014-07-02  7:49 ` [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block Changman Lee
2014-07-03  1:05   ` Gu Zheng
2014-07-02 10:30 ` Jaegeuk Kim
2014-07-03  1:13   ` Gu Zheng
2014-07-03 10:14   ` Gu Zheng
2014-07-04  5:36     ` Jaegeuk Kim
2014-07-04  6:21       ` Chao Yu
2014-07-04  8:04         ` Gu Zheng
2014-07-05  6:25           ` Jaegeuk Kim
2014-07-05 11:15             ` Chao Yu
2014-07-07  1:45             ` Changman Lee
2014-07-07  2:13               ` Gu Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.