[PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
@ 2018-09-21  3:43 Gao Xiang
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled Gao Xiang
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Gao Xiang @ 2018-09-21  3:43 UTC (permalink / raw)


This patch introduces inode hash function, test and set callbacks,
and iget5_locked to find the right inode for 32-bit platforms.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
 drivers/staging/erofs/internal.h | 10 ++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index da8693a..a2fa2aa 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
 	return err;
 }
 
+/*
+ * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
+ * we should do more for 32-bit platform to find the right inode.
+ */
+#if BITS_PER_LONG < 64
+static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	return EROFS_V(inode)->nid == nid;
+}
+
+static int erofs_iget_set_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	inode->i_ino = erofs_inode_hash(nid);
+	return 0;
+}
+#endif
+
+static inline struct inode *erofs_iget_locked(struct super_block *sb,
+					      erofs_nid_t nid)
+{
+	const unsigned long hashval = erofs_inode_hash(nid);
+
+#if BITS_PER_LONG >= 64
+	/* it is safe to use iget_locked for >= 64-bit platform */
+	return iget_locked(sb, hashval);
+#else
+	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
+		erofs_iget_set_actor, &nid);
+#endif
+}
+
 struct inode *erofs_iget(struct super_block *sb,
 	erofs_nid_t nid, bool isdir)
 {
-	struct inode *inode = iget_locked(sb, nid);
+	struct inode *inode = erofs_iget_locked(sb, nid);
 
 	if (unlikely(inode == NULL))
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index c2cc4a016..823929a 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -511,6 +511,16 @@ struct erofs_map_blocks_iter {
 }
 
 /* inode.c */
+static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
+{
+	u64 h = nid;
+
+#if BITS_PER_LONG == 32
+	h = (h >> 32) ^ (h & 0xffffffff);
+#endif
+	return (unsigned long)h;
+}
+
 extern struct inode *erofs_iget(struct super_block *sb,
 	erofs_nid_t nid, bool dir);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled
  2018-09-21  3:43 [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
@ 2018-09-21  3:43 ` Gao Xiang
  2018-10-01  9:49   ` Chao Yu
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression Gao Xiang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-09-21  3:43 UTC (permalink / raw)


When the managed cache is enabled, the last reference count
of a workgroup must be used for its workstation.

Otherwise, it could lead to incorrect (un)freezes in
the reclaim path, and it would be harmful.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/utils.c | 131 ++++++++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 36 deletions(-)

diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index ddd220a..8ef13c8 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -87,12 +87,28 @@ int erofs_register_workgroup(struct super_block *sb,
 		grp = (void *)((unsigned long)grp |
 			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
+	/*
+	 * If managed cache is enabled, the reclaim path assumes
+	 * that the last reference count is used for its workstation.
+	 * Therefore we should bump up reference count before
+	 * making this workgroup visible to other users.
+	 */
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	/* refcount should be at least 2 to get on well with reclaim path */
+	__erofs_workgroup_get(grp);
+#endif
+
 	err = radix_tree_insert(&sbi->workstn_tree,
 		grp->index, grp);
 
-	if (!err) {
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	if (unlikely(err))
+		/* it is safe to decrease for refcount >= 2 */
+		atomic_dec(&grp->refcount);
+#else
+	if (!err)
 		__erofs_workgroup_get(grp);
-	}
+#endif
 
 	erofs_workstn_unlock(sbi);
 	radix_tree_preload_end();
@@ -101,19 +117,90 @@ int erofs_register_workgroup(struct super_block *sb,
 
 extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
 
+static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
+{
+	atomic_long_dec(&erofs_global_shrink_cnt);
+	erofs_workgroup_free_rcu(grp);
+}
+
 int erofs_workgroup_put(struct erofs_workgroup *grp)
 {
 	int count = atomic_dec_return(&grp->refcount);
 
 	if (count == 1)
 		atomic_long_inc(&erofs_global_shrink_cnt);
-	else if (!count) {
-		atomic_long_dec(&erofs_global_shrink_cnt);
-		erofs_workgroup_free_rcu(grp);
-	}
+	else if (!count)
+		__erofs_workgroup_free(grp);
 	return count;
 }
 
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+
+static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
+{
+	erofs_workgroup_unfreeze(grp, 0);
+	__erofs_workgroup_free(grp);
+}
+
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	/*
+	 * for managed cache enabled, the refcount of workgroups
+	 * themselves could be < 0 (freezed). So there is no guarantee
+	 * that all refcount > 0 if managed cache is enabled.
+	 */
+	if (!erofs_workgroup_try_to_freeze(grp, 1))
+		return false;
+
+	/*
+	 * note that all cached pages should be unlinked
+	 * before delete it from the radix tree.
+	 * Otherwise some cached pages of an orphan old workgroup
+	 * could be still linked after the new one is available.
+	 */
+	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
+		erofs_workgroup_unfreeze(grp, 1);
+		return false;
+	}
+
+	/* it is impossible to fail after we freeze the workgroup */
+	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
+		BUG();	/* should never happen */
+
+	/*
+	 * if managed cache is enable, the last refcount
+	 * should indicate the related workstation.
+	 */
+	erofs_workgroup_unfreeze_final(grp);
+	return true;
+}
+
+#else
+
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	int cnt = atomic_read(&grp->refcount);
+
+	DBG_BUGON(cnt <= 0);
+	DBG_BUGON(cleanup && cnt != 1);
+
+	if (cnt > 1)
+		return false;
+
+	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
+		return false;
+
+	/* (rarely) could be grabbed again when freeing */
+	erofs_workgroup_put(grp);
+	return true;
+}
+
+#endif
+
 unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 				       unsigned long nr_shrink,
 				       bool cleanup)
@@ -130,43 +217,15 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 		batch, first_index, PAGEVEC_SIZE);
 
 	for (i = 0; i < found; ++i) {
-		int cnt;
 		struct erofs_workgroup *grp = (void *)
 			((unsigned long)batch[i] &
 				~RADIX_TREE_EXCEPTIONAL_ENTRY);
 
 		first_index = grp->index + 1;
 
-		cnt = atomic_read(&grp->refcount);
-		BUG_ON(cnt <= 0);
-
-		if (cleanup)
-			BUG_ON(cnt != 1);
-
-#ifndef EROFS_FS_HAS_MANAGED_CACHE
-		else if (cnt > 1)
-#else
-		if (!erofs_workgroup_try_to_freeze(grp, 1))
-#endif
-			continue;
-
-		if (radix_tree_delete(&sbi->workstn_tree,
-			grp->index) != grp) {
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-skip:
-			erofs_workgroup_unfreeze(grp, 1);
-#endif
+		/* try to shrink each workgroup */
+		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 			continue;
-		}
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-		if (erofs_try_to_free_all_cached_pages(sbi, grp))
-			goto skip;
-
-		erofs_workgroup_unfreeze(grp, 1);
-#endif
-		/* (rarely) grabbed again when freeing */
-		erofs_workgroup_put(grp);
 
 		++freed;
 		if (unlikely(!--nr_shrink))
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression
  2018-09-21  3:43 [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled Gao Xiang
@ 2018-09-21  3:43 ` Gao Xiang
  2018-10-01 10:03   ` Chao Yu
  2018-09-30 16:22 ` [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
  2018-10-01  1:57 ` Chao Yu
  3 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-09-21  3:43 UTC (permalink / raw)


Managed compressed pages should be unlocked when IO ends by design.
Therefore, these pages will be up-to-date and unlocked at the time
of decompression in general.

However managed pages could be re-locked in some other paths, such as
the reclaim path. Let's remove the related assertion in decompression.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/unzip_vle.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 337a82d..3b14126 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -872,7 +872,6 @@ static int z_erofs_vle_unzip(struct super_block *sb,
 			continue;
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 		else if (page->mapping == mngda) {
-			BUG_ON(PageLocked(page));
 			BUG_ON(!PageUptodate(page));
 			continue;
 		}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-09-21  3:43 [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled Gao Xiang
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression Gao Xiang
@ 2018-09-30 16:22 ` Gao Xiang
  2018-10-01  0:37   ` Chao Yu
  2018-10-01  1:57 ` Chao Yu
  3 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-09-30 16:22 UTC (permalink / raw)


Hi Chao,

Could you help merge these patches into erofs-dev (rebased on the latest erofs-master)?
I need to fix several issues reported these days formally.

If you have time, could you also take some time review these patches?

Thanks,
Gao Xiang

On 2018/9/21 11:43, Gao Xiang wrote:
> This patch introduces inode hash function, test and set callbacks,
> and iget5_locked to find the right inode for 32-bit platforms.
> 
> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-09-30 16:22 ` [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
@ 2018-10-01  0:37   ` Chao Yu
  2018-10-01  2:08     ` Gao Xiang
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2018-10-01  0:37 UTC (permalink / raw)


Hi Xiang,

Sorry for late response to those patches.

On 2018-10-1 0:22, Gao Xiang wrote:
> Hi Chao,
> 
> Could you help merge these patches into erofs-dev (rebased on the latest erofs-master)?
> I need to fix several issues reported these days formally.

No problem.

> 
> If you have time, could you also take some time review these patches?

Okay, will review them soon. :)

Thanks,

> 
> Thanks,
> Gao Xiang
> 
> On 2018/9/21 11:43, Gao Xiang wrote:
>> This patch introduces inode hash function, test and set callbacks,
>> and iget5_locked to find the right inode for 32-bit platforms.
>>
>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-09-21  3:43 [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
                   ` (2 preceding siblings ...)
  2018-09-30 16:22 ` [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
@ 2018-10-01  1:57 ` Chao Yu
  2018-10-01  2:40   ` Gao Xiang
  3 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2018-10-01  1:57 UTC (permalink / raw)


On 2018-9-21 11:43, Gao Xiang wrote:
> This patch introduces inode hash function, test and set callbacks,
> and iget5_locked to find the right inode for 32-bit platforms.
> 
> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
> ---
>  drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
>  drivers/staging/erofs/internal.h | 10 ++++++++++
>  2 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
> index da8693a..a2fa2aa 100644
> --- a/drivers/staging/erofs/inode.c
> +++ b/drivers/staging/erofs/inode.c
> @@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
>  	return err;
>  }
>  
> +/*
> + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
> + * we should do more for 32-bit platform to find the right inode.
> + */
> +#if BITS_PER_LONG < 64

#if BITS_PER_LOG == 32

> +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
> +{
> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
> +
> +	return EROFS_V(inode)->nid == nid;
> +}
> +
> +static int erofs_iget_set_actor(struct inode *inode, void *opaque)
> +{
> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
> +
> +	inode->i_ino = erofs_inode_hash(nid);
> +	return 0;
> +}
> +#endif
> +
> +static inline struct inode *erofs_iget_locked(struct super_block *sb,
> +					      erofs_nid_t nid)
> +{
> +	const unsigned long hashval = erofs_inode_hash(nid);
> +
> +#if BITS_PER_LONG >= 64
> +	/* it is safe to use iget_locked for >= 64-bit platform */
> +	return iget_locked(sb, hashval);
> +#else
> +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> +		erofs_iget_set_actor, &nid);

How about just using iget5_locked? and then we can only handle 32/64-bit
platforms difference in erofs_inode_hash()?

> +#endif
> +}
> +
>  struct inode *erofs_iget(struct super_block *sb,
>  	erofs_nid_t nid, bool isdir)
>  {
> -	struct inode *inode = iget_locked(sb, nid);
> +	struct inode *inode = erofs_iget_locked(sb, nid);
>  
>  	if (unlikely(inode == NULL))
>  		return ERR_PTR(-ENOMEM);
> diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
> index c2cc4a016..823929a 100644
> --- a/drivers/staging/erofs/internal.h
> +++ b/drivers/staging/erofs/internal.h
> @@ -511,6 +511,16 @@ struct erofs_map_blocks_iter {
>  }
>  
>  /* inode.c */
> +static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
> +{
> +	u64 h = nid;

unsigned long h;

> +
> +#if BITS_PER_LONG == 32
> +	h = (h >> 32) ^ (h & 0xffffffff);

h = (nid >> 32) ^ (nid & 0xffffffff);

> +#endif
> +	return (unsigned long)h;

return h;

Thanks,

> +}
> +
>  extern struct inode *erofs_iget(struct super_block *sb,
>  	erofs_nid_t nid, bool dir);
>  
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  0:37   ` Chao Yu
@ 2018-10-01  2:08     ` Gao Xiang
  2018-10-01 10:25       ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-10-01  2:08 UTC (permalink / raw)

Hi Chao,

On 2018/10/1 8:37, Chao Yu wrote:
> Hi Xiang,
> 
> Sorry for late response to those patches.

That doesn't matter I know you are now busy with f2fs stuffs. ;)

I have to deal with internal reported issues which could impact our products
and do more cleanup apart from stability fixes.

It seems that Linux-4.20 will be out weeks later, and we also need
to prepare more for it.

And I tend to make progress in moving into fs tree before the next LTS version,
we need to do more...

Source code of previewed mkfs will be released this month and it also seems much work to clean up it.

> 
> On 2018-10-1 0:22, Gao Xiang wrote:
>> Hi Chao,
>>
>> Could you help merge these patches into erofs-dev (rebased on the latest erofs-master)?
>> I need to fix several issues reported these days formally.
> 
> No problem.
> 
>>
>> If you have time, could you also take some time review these patches?
> 
> Okay, will review them soon. :)

Thanks Chao :)

Thanks,
Gao Xiang

> 
> Thanks,
> 
>>
>> Thanks,
>> Gao Xiang
>>
>> On 2018/9/21 11:43, Gao Xiang wrote:
>>> This patch introduces inode hash function, test and set callbacks,
>>> and iget5_locked to find the right inode for 32-bit platforms.
>>>
>>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  1:57 ` Chao Yu
@ 2018-10-01  2:40   ` Gao Xiang
  2018-10-01  3:04     ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-10-01  2:40 UTC (permalink / raw)


Hi Chao,

On 2018/10/1 9:57, Chao Yu wrote:
> On 2018-9-21 11:43, Gao Xiang wrote:
>> This patch introduces inode hash function, test and set callbacks,
>> and iget5_locked to find the right inode for 32-bit platforms.
>>
>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
>> ---
>>  drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
>>  drivers/staging/erofs/internal.h | 10 ++++++++++
>>  2 files changed, 46 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
>> index da8693a..a2fa2aa 100644
>> --- a/drivers/staging/erofs/inode.c
>> +++ b/drivers/staging/erofs/inode.c
>> @@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
>>  	return err;
>>  }
>>  
>> +/*
>> + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
>> + * we should do more for 32-bit platform to find the right inode.
>> + */
>> +#if BITS_PER_LONG < 64
> 
> #if BITS_PER_LOG == 32

OK.

> 
>> +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
>> +{
>> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
>> +
>> +	return EROFS_V(inode)->nid == nid;
>> +}
>> +
>> +static int erofs_iget_set_actor(struct inode *inode, void *opaque)
>> +{
>> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
>> +
>> +	inode->i_ino = erofs_inode_hash(nid);
>> +	return 0;
>> +}
>> +#endif
>> +
>> +static inline struct inode *erofs_iget_locked(struct super_block *sb,
>> +					      erofs_nid_t nid)
>> +{
>> +	const unsigned long hashval = erofs_inode_hash(nid);
>> +
>> +#if BITS_PER_LONG >= 64
>> +	/* it is safe to use iget_locked for >= 64-bit platform */
>> +	return iget_locked(sb, hashval);
>> +#else
>> +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
>> +		erofs_iget_set_actor, &nid);
> 
> How about just using iget5_locked? and then we can only handle 32/64-bit
> platforms difference in erofs_inode_hash()?

It seems iget_locked is faster than iget5_locked because it has
more fast paths and no indirect callbacks for each inode...

> 
>> +#endif
>> +}
>> +
>>  struct inode *erofs_iget(struct super_block *sb,
>>  	erofs_nid_t nid, bool isdir)
>>  {
>> -	struct inode *inode = iget_locked(sb, nid);
>> +	struct inode *inode = erofs_iget_locked(sb, nid);
>>  
>>  	if (unlikely(inode == NULL))
>>  		return ERR_PTR(-ENOMEM);
>> diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
>> index c2cc4a016..823929a 100644
>> --- a/drivers/staging/erofs/internal.h
>> +++ b/drivers/staging/erofs/internal.h
>> @@ -511,6 +511,16 @@ struct erofs_map_blocks_iter {
>>  }
>>  
>>  /* inode.c */
>> +static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
>> +{
>> +	u64 h = nid;
> 
> unsigned long h;
> 
>> +
>> +#if BITS_PER_LONG == 32
>> +	h = (h >> 32) ^ (h & 0xffffffff);
> 
> h = (nid >> 32) ^ (nid & 0xffffffff);
> 
>> +#endif
>> +	return (unsigned long)h;
> 
> return h;

OK, will fix soon.

Thanks,
Gao Xiang

> 
> Thanks,
> 
>> +}
>> +
>>  extern struct inode *erofs_iget(struct super_block *sb,
>>  	erofs_nid_t nid, bool dir);
>>  
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  2:40   ` Gao Xiang
@ 2018-10-01  3:04     ` Chao Yu
  2018-10-01  4:26       ` Gao Xiang
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2018-10-01  3:04 UTC (permalink / raw)


On 2018-10-1 10:40, Gao Xiang wrote:
> Hi Chao,
> 
> On 2018/10/1 9:57, Chao Yu wrote:
>> On 2018-9-21 11:43, Gao Xiang wrote:
>>> This patch introduces inode hash function, test and set callbacks,
>>> and iget5_locked to find the right inode for 32-bit platforms.
>>>
>>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
>>> ---
>>>  drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
>>>  drivers/staging/erofs/internal.h | 10 ++++++++++
>>>  2 files changed, 46 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
>>> index da8693a..a2fa2aa 100644
>>> --- a/drivers/staging/erofs/inode.c
>>> +++ b/drivers/staging/erofs/inode.c
>>> @@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
>>>  	return err;
>>>  }
>>>  
>>> +/*
>>> + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
>>> + * we should do more for 32-bit platform to find the right inode.
>>> + */
>>> +#if BITS_PER_LONG < 64
>>
>> #if BITS_PER_LOG == 32
> 
> OK.
> 
>>
>>> +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
>>> +{
>>> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
>>> +
>>> +	return EROFS_V(inode)->nid == nid;
>>> +}
>>> +
>>> +static int erofs_iget_set_actor(struct inode *inode, void *opaque)
>>> +{
>>> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
>>> +
>>> +	inode->i_ino = erofs_inode_hash(nid);
>>> +	return 0;
>>> +}
>>> +#endif
>>> +
>>> +static inline struct inode *erofs_iget_locked(struct super_block *sb,
>>> +					      erofs_nid_t nid)
>>> +{
>>> +	const unsigned long hashval = erofs_inode_hash(nid);
>>> +
>>> +#if BITS_PER_LONG >= 64
>>> +	/* it is safe to use iget_locked for >= 64-bit platform */
>>> +	return iget_locked(sb, hashval);
>>> +#else
>>> +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
>>> +		erofs_iget_set_actor, &nid);
>>
>> How about just using iget5_locked? and then we can only handle 32/64-bit
>> platforms difference in erofs_inode_hash()?
> 
> It seems iget_locked is faster than iget5_locked because it has
> more fast paths and no indirect callbacks for each inode...

That's trivial cleanup. Well, if you are worried about the performance of
erofs_iget(), let's keep it as it is. :)

Thanks,

> 
>>
>>> +#endif
>>> +}
>>> +
>>>  struct inode *erofs_iget(struct super_block *sb,
>>>  	erofs_nid_t nid, bool isdir)
>>>  {
>>> -	struct inode *inode = iget_locked(sb, nid);
>>> +	struct inode *inode = erofs_iget_locked(sb, nid);
>>>  
>>>  	if (unlikely(inode == NULL))
>>>  		return ERR_PTR(-ENOMEM);
>>> diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
>>> index c2cc4a016..823929a 100644
>>> --- a/drivers/staging/erofs/internal.h
>>> +++ b/drivers/staging/erofs/internal.h
>>> @@ -511,6 +511,16 @@ struct erofs_map_blocks_iter {
>>>  }
>>>  
>>>  /* inode.c */
>>> +static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
>>> +{
>>> +	u64 h = nid;
>>
>> unsigned long h;
>>
>>> +
>>> +#if BITS_PER_LONG == 32
>>> +	h = (h >> 32) ^ (h & 0xffffffff);
>>
>> h = (nid >> 32) ^ (nid & 0xffffffff);
>>
>>> +#endif
>>> +	return (unsigned long)h;
>>
>> return h;
> 
> OK, will fix soon.
> 
> Thanks,
> Gao Xiang
> 
>>
>> Thanks,
>>
>>> +}
>>> +
>>>  extern struct inode *erofs_iget(struct super_block *sb,
>>>  	erofs_nid_t nid, bool dir);
>>>  
>>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  3:04     ` Chao Yu
@ 2018-10-01  4:26       ` Gao Xiang
  2018-10-01  4:32         ` [PREVIEW][PATCH v2 chao/erofs-dev] " Gao Xiang
  0 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-10-01  4:26 UTC (permalink / raw)


Hi Chao

On 2018/10/1 11:04, Chao Yu wrote:
>>  /* inode.c */
>> +static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
>> +{
>> +	u64 h = nid;
> unsigned long h;
> 
>> +
>> +#if BITS_PER_LONG == 32
>> +	h = (h >> 32) ^ (h & 0xffffffff);
> h = (nid >> 32) ^ (nid & 0xffffffff);
> 
>> +#endif
>> +	return (unsigned long)h;

inode hash could be wrong in 64-bit since h is not initialized.
I will fix it in another way.

Thanks,
Gao Xiang.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW][PATCH v2 chao/erofs-dev] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  4:26       ` Gao Xiang
@ 2018-10-01  4:32         ` Gao Xiang
  2018-10-01  5:24           ` Chao Yu
  0 siblings, 1 reply; 19+ messages in thread
From: Gao Xiang @ 2018-10-01  4:32 UTC (permalink / raw)


From: Gao Xiang <gaoxiang25@huawei.com>

This patch introduces inode hash function, test and set callbacks,
and iget5_locked to find the right inode for 32-bit platforms.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
 drivers/staging/erofs/internal.h |  9 +++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index da8693a..04c61a9 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
 	return err;
 }
 
+/*
+ * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
+ * we should do more for 32-bit platform to find the right inode.
+ */
+#if BITS_PER_LONG == 32
+static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	return EROFS_V(inode)->nid == nid;
+}
+
+static int erofs_iget_set_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	inode->i_ino = erofs_inode_hash(nid);
+	return 0;
+}
+#endif
+
+static inline struct inode *erofs_iget_locked(struct super_block *sb,
+					      erofs_nid_t nid)
+{
+	const unsigned long hashval = erofs_inode_hash(nid);
+
+#if BITS_PER_LONG >= 64
+	/* it is safe to use iget_locked for >= 64-bit platform */
+	return iget_locked(sb, hashval);
+#else
+	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
+		erofs_iget_set_actor, &nid);
+#endif
+}
+
 struct inode *erofs_iget(struct super_block *sb,
 	erofs_nid_t nid, bool isdir)
 {
-	struct inode *inode = iget_locked(sb, nid);
+	struct inode *inode = erofs_iget_locked(sb, nid);
 
 	if (unlikely(inode == NULL))
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index c2cc4a016..0e85aae 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -511,6 +511,15 @@ erofs_get_inline_page(struct inode *inode,
 }
 
 /* inode.c */
+static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
+{
+#if BITS_PER_LONG == 32
+	return (nid >> 32) ^ (nid & 0xffffffff);
+#else
+	return nid;
+#endif
+}
+
 extern struct inode *erofs_iget(struct super_block *sb,
 	erofs_nid_t nid, bool dir);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PREVIEW][PATCH v2 chao/erofs-dev] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  4:32         ` [PREVIEW][PATCH v2 chao/erofs-dev] " Gao Xiang
@ 2018-10-01  5:24           ` Chao Yu
  0 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2018-10-01  5:24 UTC (permalink / raw)


On 2018-10-1 12:32, Gao Xiang wrote:
> From: Gao Xiang <gaoxiang25 at huawei.com>
> 
> This patch introduces inode hash function, test and set callbacks,
> and iget5_locked to find the right inode for 32-bit platforms.
> 
> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
> ---
>  drivers/staging/erofs/inode.c    | 37 ++++++++++++++++++++++++++++++++++++-
>  drivers/staging/erofs/internal.h |  9 +++++++++
>  2 files changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
> index da8693a..04c61a9 100644
> --- a/drivers/staging/erofs/inode.c
> +++ b/drivers/staging/erofs/inode.c
> @@ -232,10 +232,45 @@ static int fill_inode(struct inode *inode, int isdir)
>  	return err;
>  }
>  
> +/*
> + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
> + * we should do more for 32-bit platform to find the right inode.
> + */
> +#if BITS_PER_LONG == 32
> +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
> +{
> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
> +
> +	return EROFS_V(inode)->nid == nid;
> +}
> +
> +static int erofs_iget_set_actor(struct inode *inode, void *opaque)
> +{
> +	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
> +
> +	inode->i_ino = erofs_inode_hash(nid);
> +	return 0;
> +}
> +#endif
> +
> +static inline struct inode *erofs_iget_locked(struct super_block *sb,
> +					      erofs_nid_t nid)
> +{
> +	const unsigned long hashval = erofs_inode_hash(nid);
> +
> +#if BITS_PER_LONG >= 64
> +	/* it is safe to use iget_locked for >= 64-bit platform */
> +	return iget_locked(sb, hashval);
> +#else
> +	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
> +		erofs_iget_set_actor, &nid);
> +#endif
> +}
> +
>  struct inode *erofs_iget(struct super_block *sb,
>  	erofs_nid_t nid, bool isdir)
>  {
> -	struct inode *inode = iget_locked(sb, nid);
> +	struct inode *inode = erofs_iget_locked(sb, nid);
>  
>  	if (unlikely(inode == NULL))
>  		return ERR_PTR(-ENOMEM);
> diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
> index c2cc4a016..0e85aae 100644
> --- a/drivers/staging/erofs/internal.h
> +++ b/drivers/staging/erofs/internal.h
> @@ -511,6 +511,15 @@ erofs_get_inline_page(struct inode *inode,
>  }
>  
>  /* inode.c */
> +static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
> +{
> +#if BITS_PER_LONG == 32
> +	return (nid >> 32) ^ (nid & 0xffffffff);
> +#else
> +	return nid;
> +#endif

That's the right way. :)

Reviewed-by: Chao Yu <yuchao0 at huawei.com>

Thanks,

> +}
> +
>  extern struct inode *erofs_iget(struct super_block *sb,
>  	erofs_nid_t nid, bool dir);
>  
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled Gao Xiang
@ 2018-10-01  9:49   ` Chao Yu
  2018-10-01 11:37     ` Gao Xiang
                       ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Chao Yu @ 2018-10-01  9:49 UTC (permalink / raw)


On 2018-9-21 11:43, Gao Xiang wrote:
> When the managed cache is enabled, the last reference count
> of a workgroup must be used for its workstation.
> 
> Otherwise, it could lead to incorrect (un)freezes in
> the reclaim path, and it would be harmful.

Could you add related race condition stack to demonstrate this problem more clearly?

Thanks,

> 
> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
> ---
>  drivers/staging/erofs/utils.c | 131 ++++++++++++++++++++++++++++++------------
>  1 file changed, 95 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
> index ddd220a..8ef13c8 100644
> --- a/drivers/staging/erofs/utils.c
> +++ b/drivers/staging/erofs/utils.c
> @@ -87,12 +87,28 @@ int erofs_register_workgroup(struct super_block *sb,
>  		grp = (void *)((unsigned long)grp |
>  			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
>  
> +	/*
> +	 * If managed cache is enabled, the reclaim path assumes
> +	 * that the last reference count is used for its workstation.
> +	 * Therefore we should bump up reference count before
> +	 * making this workgroup visible to other users.
> +	 */
> +#ifdef EROFS_FS_HAS_MANAGED_CACHE
> +	/* refcount should be at least 2 to get on well with reclaim path */
> +	__erofs_workgroup_get(grp);
> +#endif
> +
>  	err = radix_tree_insert(&sbi->workstn_tree,
>  		grp->index, grp);
>  
> -	if (!err) {
> +#ifdef EROFS_FS_HAS_MANAGED_CACHE
> +	if (unlikely(err))
> +		/* it is safe to decrease for refcount >= 2 */
> +		atomic_dec(&grp->refcount);
> +#else
> +	if (!err)
>  		__erofs_workgroup_get(grp);
> -	}
> +#endif
>  
>  	erofs_workstn_unlock(sbi);
>  	radix_tree_preload_end();
> @@ -101,19 +117,90 @@ int erofs_register_workgroup(struct super_block *sb,
>  
>  extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
>  
> +static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
> +{
> +	atomic_long_dec(&erofs_global_shrink_cnt);
> +	erofs_workgroup_free_rcu(grp);
> +}
> +
>  int erofs_workgroup_put(struct erofs_workgroup *grp)
>  {
>  	int count = atomic_dec_return(&grp->refcount);
>  
>  	if (count == 1)
>  		atomic_long_inc(&erofs_global_shrink_cnt);
> -	else if (!count) {
> -		atomic_long_dec(&erofs_global_shrink_cnt);
> -		erofs_workgroup_free_rcu(grp);
> -	}
> +	else if (!count)
> +		__erofs_workgroup_free(grp);
>  	return count;
>  }
>  
> +#ifdef EROFS_FS_HAS_MANAGED_CACHE
> +
> +static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
> +{
> +	erofs_workgroup_unfreeze(grp, 0);
> +	__erofs_workgroup_free(grp);
> +}
> +
> +bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> +				    struct erofs_workgroup *grp,
> +				    bool cleanup)
> +{
> +	/*
> +	 * for managed cache enabled, the refcount of workgroups
> +	 * themselves could be < 0 (freezed). So there is no guarantee
> +	 * that all refcount > 0 if managed cache is enabled.
> +	 */
> +	if (!erofs_workgroup_try_to_freeze(grp, 1))
> +		return false;
> +
> +	/*
> +	 * note that all cached pages should be unlinked
> +	 * before delete it from the radix tree.
> +	 * Otherwise some cached pages of an orphan old workgroup
> +	 * could be still linked after the new one is available.
> +	 */
> +	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
> +		erofs_workgroup_unfreeze(grp, 1);
> +		return false;
> +	}
> +
> +	/* it is impossible to fail after we freeze the workgroup */
> +	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
> +		BUG();	/* should never happen */
> +
> +	/*
> +	 * if managed cache is enable, the last refcount
> +	 * should indicate the related workstation.
> +	 */
> +	erofs_workgroup_unfreeze_final(grp);
> +	return true;
> +}
> +
> +#else
> +
> +bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> +				    struct erofs_workgroup *grp,
> +				    bool cleanup)
> +{
> +	int cnt = atomic_read(&grp->refcount);
> +
> +	DBG_BUGON(cnt <= 0);
> +	DBG_BUGON(cleanup && cnt != 1);
> +
> +	if (cnt > 1)
> +		return false;
> +
> +	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
> +		return false;
> +
> +	/* (rarely) could be grabbed again when freeing */
> +	erofs_workgroup_put(grp);
> +	return true;
> +}
> +
> +#endif
> +
>  unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
>  				       unsigned long nr_shrink,
>  				       bool cleanup)
> @@ -130,43 +217,15 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
>  		batch, first_index, PAGEVEC_SIZE);
>  
>  	for (i = 0; i < found; ++i) {
> -		int cnt;
>  		struct erofs_workgroup *grp = (void *)
>  			((unsigned long)batch[i] &
>  				~RADIX_TREE_EXCEPTIONAL_ENTRY);
>  
>  		first_index = grp->index + 1;
>  
> -		cnt = atomic_read(&grp->refcount);
> -		BUG_ON(cnt <= 0);
> -
> -		if (cleanup)
> -			BUG_ON(cnt != 1);
> -
> -#ifndef EROFS_FS_HAS_MANAGED_CACHE
> -		else if (cnt > 1)
> -#else
> -		if (!erofs_workgroup_try_to_freeze(grp, 1))
> -#endif
> -			continue;
> -
> -		if (radix_tree_delete(&sbi->workstn_tree,
> -			grp->index) != grp) {
> -#ifdef EROFS_FS_HAS_MANAGED_CACHE
> -skip:
> -			erofs_workgroup_unfreeze(grp, 1);
> -#endif
> +		/* try to shrink each workgroup */
> +		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
>  			continue;
> -		}
> -
> -#ifdef EROFS_FS_HAS_MANAGED_CACHE
> -		if (erofs_try_to_free_all_cached_pages(sbi, grp))
> -			goto skip;
> -
> -		erofs_workgroup_unfreeze(grp, 1);
> -#endif
> -		/* (rarely) grabbed again when freeing */
> -		erofs_workgroup_put(grp);
>  
>  		++freed;
>  		if (unlikely(!--nr_shrink))
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression
  2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression Gao Xiang
@ 2018-10-01 10:03   ` Chao Yu
  0 siblings, 0 replies; 19+ messages in thread
From: Chao Yu @ 2018-10-01 10:03 UTC (permalink / raw)


On 2018-9-21 11:43, Gao Xiang wrote:
> Managed compressed pages should be unlocked when IO ends by design.
> Therefore, these pages will be up-to-date and unlocked at the time
> of decompression in general.
> 
> However managed pages could be re-locked in some other paths, such as
> the reclaim path. Let's remove the related assertion in decompression.
> 
> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

Reviewed-by: Chao Yu <yuchao0 at huawei.com>

Thanks,

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01  2:08     ` Gao Xiang
@ 2018-10-01 10:25       ` Chao Yu
  2018-10-01 11:25         ` Gao Xiang
  0 siblings, 1 reply; 19+ messages in thread
From: Chao Yu @ 2018-10-01 10:25 UTC (permalink / raw)


Hi Xiang,

On 2018-10-1 10:08, Gao Xiang wrote:
> Hi Chao,
> 
> On 2018/10/1 8:37, Chao Yu wrote:
>> Hi Xiang,
>>
>> Sorry for late response to those patches.
> 
> That doesn't matter I know you are now busy with f2fs stuffs. ;)

Yeah, still struggling in fixing quota bugs, and try to write several test case
for xfstest suit.. :P

> 
> I have to deal with internal reported issues which could impact our products
> and do more cleanup apart from stability fixes.
> 
> It seems that Linux-4.20 will be out weeks later, and we also need
> to prepare more for it.
> 
> 
> And I tend to make progress in moving into fs tree before the next LTS version,
> we need to do more...

Sounds a lot of pending work there. :)

> 
> Source code of previewed mkfs will be released this month and it also seems much work to clean up it.

I hope Guifu can help to handle those work.

Thanks,

> 
>>
>> On 2018-10-1 0:22, Gao Xiang wrote:
>>> Hi Chao,
>>>
>>> Could you help merge these patches into erofs-dev (rebased on the latest erofs-master)?
>>> I need to fix several issues reported these days formally.
>>
>> No problem.
>>
>>>
>>> If you have time, could you also take some time review these patches?
>>
>> Okay, will review them soon. :)
> 
> Thanks Chao :)
> 
> Thanks,
> Gao Xiang
> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>> Gao Xiang
>>>
>>> On 2018/9/21 11:43, Gao Xiang wrote:
>>>> This patch introduces inode hash function, test and set callbacks,
>>>> and iget5_locked to find the right inode for 32-bit platforms.
>>>>
>>>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms
  2018-10-01 10:25       ` Chao Yu
@ 2018-10-01 11:25         ` Gao Xiang
  0 siblings, 0 replies; 19+ messages in thread
From: Gao Xiang @ 2018-10-01 11:25 UTC (permalink / raw)


Hi Chao,

On 2018/10/1 18:25, Chao Yu wrote:
> Hi Xiang,
> 
> On 2018-10-1 10:08, Gao Xiang wrote:
>> Hi Chao,
>>
>> On 2018/10/1 8:37, Chao Yu wrote:
>>> Hi Xiang,
>>>
>>> Sorry for late response to those patches.
>> That doesn't matter I know you are now busy with f2fs stuffs. ;)
> Yeah, still struggling in fixing quota bugs, and try to write several test case
> for xfstest suit.. :P
> 
>> I have to deal with internal reported issues which could impact our products
>> and do more cleanup apart from stability fixes.
>>
>> It seems that Linux-4.20 will be out weeks later, and we also need
>> to prepare more for it.
>>
>>
>> And I tend to make progress in moving into fs tree before the next LTS version,
>> we need to do more...
> Sounds a lot of pending work there. :)
> 
>> Source code of previewed mkfs will be released this month and it also seems much work to clean up it.
> I hope Guifu can help to handle those work.

Yes, I confirmed that Guifu would like to maintain erofs mkfs related stuffs as the original mkfs author.
Another guy will also take part in it.

If that is working fine, no worry about it ;)

Thanks,
Gao Xiang

> 
> Thanks,
> 
>>> On 2018-10-1 0:22, Gao Xiang wrote:
>>>> Hi Chao,
>>>>
>>>> Could you help merge these patches into erofs-dev (rebased on the latest erofs-master)?
>>>> I need to fix several issues reported these days formally.
>>> No problem.
>>>
>>>> If you have time, could you also take some time review these patches?
>>> Okay, will review them soon. :)
>> Thanks Chao :)
>>
>> Thanks,
>> Gao Xiang
>>
>>> Thanks,
>>>
>>>> Thanks,
>>>> Gao Xiang
>>>>
>>>> On 2018/9/21 11:43, Gao Xiang wrote:
>>>>> This patch introduces inode hash function, test and set callbacks,
>>>>> and iget5_locked to find the right inode for 32-bit platforms.
>>>>>
>>>>> Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled
  2018-10-01  9:49   ` Chao Yu
@ 2018-10-01 11:37     ` Gao Xiang
  2018-10-22  9:36     ` [PATCH v2 " Gao Xiang
  2018-10-22  9:51     ` [PATCH chao/erofs-dev] staging: erofs: add a full barrier in erofs_workgroup_unfreeze Gao Xiang
  2 siblings, 0 replies; 19+ messages in thread
From: Gao Xiang @ 2018-10-01 11:37 UTC (permalink / raw)

Hi Chao,

On 2018/10/1 17:49, Chao Yu wrote:
> On 2018-9-21 11:43, Gao Xiang wrote:
>> When the managed cache is enabled, the last reference count
>> of a workgroup must be used for its workstation.
>>
>> Otherwise, it could lead to incorrect (un)freezes in
>> the reclaim path, and it would be harmful.
> Could you add related race condition stack to demonstrate this problem more clearly?
> 

I remembered that the original related stack isn't directly related to code fixing in this patch.
I reviewed the reclaim path then and found a issue, so the patch it is.

The incorrect reclaim will do harm to other code, but I think I can try to give
a rough race condition (some stack) in the next patch.

That is because when the managed cache is enabled, the reclaim path will work for
every decompression workgroup with refcount == 1.

Just like the page cache (and pages are not truncated) refcount idea, the last refcount
should be reserved for its tree rather than other uses for all cases by design.

Thanks,
Gao Xiang

> Thanks,
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2 chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled
  2018-10-01  9:49   ` Chao Yu
  2018-10-01 11:37     ` Gao Xiang
@ 2018-10-22  9:36     ` Gao Xiang
  2018-10-22  9:51     ` [PATCH chao/erofs-dev] staging: erofs: add a full barrier in erofs_workgroup_unfreeze Gao Xiang
  2 siblings, 0 replies; 19+ messages in thread
From: Gao Xiang @ 2018-10-22  9:36 UTC (permalink / raw)


When the managed cache is enabled, the last reference count
of a workgroup must be used for its workstation.

Otherwise, it could lead to incorrect (un)freezes in
the reclaim path, and it would be harmful.

A typical race as follows:

Thread 1 (In the reclaim path)  Thread 2
workgroup_freeze(grp, 1)                                refcnt = 1
...
workgroup_unfreeze(grp, 1)                              refcnt = 1
                                workgroup_get(grp)      refcnt = 2 (x)
workgroup_put(grp)                                      refcnt = 1 (x)
                                ...unexpected behaviors

* grp is detached but still used, which violates cache-managed
  freeze constraint.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
Change log v2:
 - add the detail of race condition as pointed out by Chao 

 drivers/staging/erofs/utils.c | 130 +++++++++++++++++++++++++++++-------------
 1 file changed, 91 insertions(+), 39 deletions(-)

diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index ddd220ac33fd..467401d36e76 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -87,12 +87,21 @@ int erofs_register_workgroup(struct super_block *sb,
 		grp = (void *)((unsigned long)grp |
 			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
-	err = radix_tree_insert(&sbi->workstn_tree,
-		grp->index, grp);
+	/*
+	 * Bump up reference count before making this workgroup
+	 * visible to other users in order to avoid potential UAF
+	 * without serialized by erofs_workstn_lock.
+	 */
+	__erofs_workgroup_get(grp);
 
-	if (!err) {
-		__erofs_workgroup_get(grp);
-	}
+	err = radix_tree_insert(&sbi->workstn_tree,
+				grp->index, grp);
+	if (unlikely(err))
+		/*
+		 * it's safe to decrease since the workgroup isn't visible
+		 * and refcount >= 2 (cannot be freezed).
+		 */
+		atomic_dec(&grp->refcount);
 
 	erofs_workstn_unlock(sbi);
 	radix_tree_preload_end();
@@ -101,19 +110,90 @@ int erofs_register_workgroup(struct super_block *sb,
 
 extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
 
+static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
+{
+	atomic_long_dec(&erofs_global_shrink_cnt);
+	erofs_workgroup_free_rcu(grp);
+}
+
 int erofs_workgroup_put(struct erofs_workgroup *grp)
 {
 	int count = atomic_dec_return(&grp->refcount);
 
 	if (count == 1)
 		atomic_long_inc(&erofs_global_shrink_cnt);
-	else if (!count) {
-		atomic_long_dec(&erofs_global_shrink_cnt);
-		erofs_workgroup_free_rcu(grp);
-	}
+	else if (!count)
+		__erofs_workgroup_free(grp);
 	return count;
 }
 
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+/* for cache-managed case, customized reclaim paths exist */
+static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
+{
+	erofs_workgroup_unfreeze(grp, 0);
+	__erofs_workgroup_free(grp);
+}
+
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	/*
+	 * for managed cache enabled, the refcount of workgroups
+	 * themselves could be < 0 (freezed). So there is no guarantee
+	 * that all refcount > 0 if managed cache is enabled.
+	 */
+	if (!erofs_workgroup_try_to_freeze(grp, 1))
+		return false;
+
+	/*
+	 * note that all cached pages should be unlinked
+	 * before delete it from the radix tree.
+	 * Otherwise some cached pages of an orphan old workgroup
+	 * could be still linked after the new one is available.
+	 */
+	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
+		erofs_workgroup_unfreeze(grp, 1);
+		return false;
+	}
+
+	/* it is impossible to fail after we freeze the workgroup */
+	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
+		BUG();	/* should never happen */
+
+	/*
+	 * if managed cache is enable, the last refcount
+	 * should indicate the related workstation.
+	 */
+	erofs_workgroup_unfreeze_final(grp);
+	return true;
+}
+
+#else
+/* for nocache case, no customized reclaim path at all */
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	int cnt = atomic_read(&grp->refcount);
+
+	DBG_BUGON(cnt <= 0);
+	DBG_BUGON(cleanup && cnt != 1);
+
+	if (cnt > 1)
+		return false;
+
+	if (radix_tree_delete(&sbi->workstn_tree, grp->index) != grp)
+		return false;
+
+	/* (rarely) could be grabbed again when freeing */
+	erofs_workgroup_put(grp);
+	return true;
+}
+
+#endif
+
 unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 				       unsigned long nr_shrink,
 				       bool cleanup)
@@ -130,43 +210,15 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 		batch, first_index, PAGEVEC_SIZE);
 
 	for (i = 0; i < found; ++i) {
-		int cnt;
 		struct erofs_workgroup *grp = (void *)
 			((unsigned long)batch[i] &
 				~RADIX_TREE_EXCEPTIONAL_ENTRY);
 
 		first_index = grp->index + 1;
 
-		cnt = atomic_read(&grp->refcount);
-		BUG_ON(cnt <= 0);
-
-		if (cleanup)
-			BUG_ON(cnt != 1);
-
-#ifndef EROFS_FS_HAS_MANAGED_CACHE
-		else if (cnt > 1)
-#else
-		if (!erofs_workgroup_try_to_freeze(grp, 1))
-#endif
-			continue;
-
-		if (radix_tree_delete(&sbi->workstn_tree,
-			grp->index) != grp) {
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-skip:
-			erofs_workgroup_unfreeze(grp, 1);
-#endif
+		/* try to shrink each valid workgroup */
+		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 			continue;
-		}
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-		if (erofs_try_to_free_all_cached_pages(sbi, grp))
-			goto skip;
-
-		erofs_workgroup_unfreeze(grp, 1);
-#endif
-		/* (rarely) grabbed again when freeing */
-		erofs_workgroup_put(grp);
 
 		++freed;
 		if (unlikely(!--nr_shrink))
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH chao/erofs-dev] staging: erofs: add a full barrier in erofs_workgroup_unfreeze
  2018-10-01  9:49   ` Chao Yu
  2018-10-01 11:37     ` Gao Xiang
  2018-10-22  9:36     ` [PATCH v2 " Gao Xiang
@ 2018-10-22  9:51     ` Gao Xiang
  2 siblings, 0 replies; 19+ messages in thread
From: Gao Xiang @ 2018-10-22  9:51 UTC (permalink / raw)


Just like other generic locks, insert a full barrier
in case of memory reorder.

Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/internal.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index 9d7ed3d130e3..bec439e2b428 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -218,6 +218,8 @@ erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
 			 int orig_val)
 {
 #if defined(CONFIG_SMP)
+	smp_mb();
+
 	atomic_set(&grp->refcount, orig_val);
 #endif
 	preempt_enable();
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-10-22  9:51 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-21  3:43 [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 2/3] staging: erofs: fix race when the managed cache is enabled Gao Xiang
2018-10-01  9:49   ` Chao Yu
2018-10-01 11:37     ` Gao Xiang
2018-10-22  9:36     ` [PATCH v2 " Gao Xiang
2018-10-22  9:51     ` [PATCH chao/erofs-dev] staging: erofs: add a full barrier in erofs_workgroup_unfreeze Gao Xiang
2018-09-21  3:43 ` [PREVIEW] [PATCH chao/erofs-dev 3/3] staging: erofs: managed pages could be locked at the time of decompression Gao Xiang
2018-10-01 10:03   ` Chao Yu
2018-09-30 16:22 ` [PREVIEW] [PATCH chao/erofs-dev 1/3] staging: erofs: harden inode lookup for 32-bit platforms Gao Xiang
2018-10-01  0:37   ` Chao Yu
2018-10-01  2:08     ` Gao Xiang
2018-10-01 10:25       ` Chao Yu
2018-10-01 11:25         ` Gao Xiang
2018-10-01  1:57 ` Chao Yu
2018-10-01  2:40   ` Gao Xiang
2018-10-01  3:04     ` Chao Yu
2018-10-01  4:26       ` Gao Xiang
2018-10-01  4:32         ` [PREVIEW][PATCH v2 chao/erofs-dev] " Gao Xiang
2018-10-01  5:24           ` Chao Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.