All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gao Xiang <hsiangkao@aol.com>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	linux-erofs@lists.ozlabs.org, Chao Yu <yuchao0@huawei.com>,
	Gao Xiang <gaoxiang25@huawei.com>
Subject: [PATCH for-4.19 2/2] staging: erofs: fix race when the managed cache is enabled
Date: Tue, 26 Feb 2019 01:58:09 +0800	[thread overview]
Message-ID: <20190225175809.13427-2-hsiangkao@aol.com> (raw)
In-Reply-To: <20190225175809.13427-1-hsiangkao@aol.com>

From: Gao Xiang <gaoxiang25@huawei.com>

commit 51232df5e4b268936beccde5248f312a316800be upstream.

When the managed cache is enabled, the last reference count
of a workgroup must be used for its workstation.

Otherwise, it could lead to incorrect (un)freezes in
the reclaim path, and it would be harmful.

A typical race as follows:

Thread 1 (In the reclaim path)  Thread 2
workgroup_freeze(grp, 1)                                refcnt = 1
...
workgroup_unfreeze(grp, 1)                              refcnt = 1
                                workgroup_get(grp)      refcnt = 2 (x)
workgroup_put(grp)                                      refcnt = 1 (x)
                                ...unexpected behaviors

* grp is detached but still used, which violates cache-managed
  freeze constraint.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
 drivers/staging/erofs/internal.h |   1 +
 drivers/staging/erofs/utils.c    | 134 +++++++++++++++++++++++++++------------
 2 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index e6313c54e3ad..122ea5016f3b 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -240,6 +240,7 @@ static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
 }
 
 #define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
+#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
 
 extern int erofs_workgroup_put(struct erofs_workgroup *grp);
 
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index bdee9bd09f11..bcabbb85d40e 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -83,12 +83,21 @@ int erofs_register_workgroup(struct super_block *sb,
 
 	grp = xa_tag_pointer(grp, tag);
 
-	err = radix_tree_insert(&sbi->workstn_tree,
-		grp->index, grp);
+	/*
+	 * Bump up reference count before making this workgroup
+	 * visible to other users in order to avoid potential UAF
+	 * without serialized by erofs_workstn_lock.
+	 */
+	__erofs_workgroup_get(grp);
 
-	if (!err) {
-		__erofs_workgroup_get(grp);
-	}
+	err = radix_tree_insert(&sbi->workstn_tree,
+				grp->index, grp);
+	if (unlikely(err))
+		/*
+		 * it's safe to decrease since the workgroup isn't visible
+		 * and refcount >= 2 (cannot be freezed).
+		 */
+		__erofs_workgroup_put(grp);
 
 	erofs_workstn_unlock(sbi);
 	radix_tree_preload_end();
@@ -97,19 +106,94 @@ int erofs_register_workgroup(struct super_block *sb,
 
 extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
 
+static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
+{
+	atomic_long_dec(&erofs_global_shrink_cnt);
+	erofs_workgroup_free_rcu(grp);
+}
+
 int erofs_workgroup_put(struct erofs_workgroup *grp)
 {
 	int count = atomic_dec_return(&grp->refcount);
 
 	if (count == 1)
 		atomic_long_inc(&erofs_global_shrink_cnt);
-	else if (!count) {
-		atomic_long_dec(&erofs_global_shrink_cnt);
-		erofs_workgroup_free_rcu(grp);
-	}
+	else if (!count)
+		__erofs_workgroup_free(grp);
 	return count;
 }
 
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+/* for cache-managed case, customized reclaim paths exist */
+static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
+{
+	erofs_workgroup_unfreeze(grp, 0);
+	__erofs_workgroup_free(grp);
+}
+
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	/*
+	 * for managed cache enabled, the refcount of workgroups
+	 * themselves could be < 0 (freezed). So there is no guarantee
+	 * that all refcount > 0 if managed cache is enabled.
+	 */
+	if (!erofs_workgroup_try_to_freeze(grp, 1))
+		return false;
+
+	/*
+	 * note that all cached pages should be unlinked
+	 * before delete it from the radix tree.
+	 * Otherwise some cached pages of an orphan old workgroup
+	 * could be still linked after the new one is available.
+	 */
+	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
+		erofs_workgroup_unfreeze(grp, 1);
+		return false;
+	}
+
+	/*
+	 * it is impossible to fail after the workgroup is freezed,
+	 * however in order to avoid some race conditions, add a
+	 * DBG_BUGON to observe this in advance.
+	 */
+	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+						     grp->index)) != grp);
+
+	/*
+	 * if managed cache is enable, the last refcount
+	 * should indicate the related workstation.
+	 */
+	erofs_workgroup_unfreeze_final(grp);
+	return true;
+}
+
+#else
+/* for nocache case, no customized reclaim path at all */
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	int cnt = atomic_read(&grp->refcount);
+
+	DBG_BUGON(cnt <= 0);
+	DBG_BUGON(cleanup && cnt != 1);
+
+	if (cnt > 1)
+		return false;
+
+	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+						     grp->index)) != grp);
+
+	/* (rarely) could be grabbed again when freeing */
+	erofs_workgroup_put(grp);
+	return true;
+}
+
+#endif
+
 unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 				       unsigned long nr_shrink,
 				       bool cleanup)
@@ -126,42 +210,14 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 		batch, first_index, PAGEVEC_SIZE);
 
 	for (i = 0; i < found; ++i) {
-		int cnt;
 		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 
 		first_index = grp->index + 1;
 
-		cnt = atomic_read(&grp->refcount);
-		BUG_ON(cnt <= 0);
-
-		if (cleanup)
-			BUG_ON(cnt != 1);
-
-#ifndef EROFS_FS_HAS_MANAGED_CACHE
-		else if (cnt > 1)
-#else
-		if (!erofs_workgroup_try_to_freeze(grp, 1))
-#endif
+		/* try to shrink each valid workgroup */
+		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 			continue;
 
-		if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
-			grp->index)) != grp) {
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-skip:
-			erofs_workgroup_unfreeze(grp, 1);
-#endif
-			continue;
-		}
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-		if (erofs_try_to_free_all_cached_pages(sbi, grp))
-			goto skip;
-
-		erofs_workgroup_unfreeze(grp, 1);
-#endif
-		/* (rarely) grabbed again when freeing */
-		erofs_workgroup_put(grp);
-
 		++freed;
 		if (unlikely(!--nr_shrink))
 			break;
-- 
2.11.0


WARNING: multiple messages have this Message-ID (diff)
From: hsiangkao@aol.com (Gao Xiang)
Subject: [PATCH for-4.19 2/2] staging: erofs: fix race when the managed cache is enabled
Date: Tue, 26 Feb 2019 01:58:09 +0800	[thread overview]
Message-ID: <20190225175809.13427-2-hsiangkao@aol.com> (raw)
In-Reply-To: <20190225175809.13427-1-hsiangkao@aol.com>

From: Gao Xiang <gaoxiang25@huawei.com>

commit 51232df5e4b268936beccde5248f312a316800be upstream.

When the managed cache is enabled, the last reference count
of a workgroup must be used for its workstation.

Otherwise, it could lead to incorrect (un)freezes in
the reclaim path, and it would be harmful.

A typical race as follows:

Thread 1 (In the reclaim path)  Thread 2
workgroup_freeze(grp, 1)                                refcnt = 1
...
workgroup_unfreeze(grp, 1)                              refcnt = 1
                                workgroup_get(grp)      refcnt = 2 (x)
workgroup_put(grp)                                      refcnt = 1 (x)
                                ...unexpected behaviors

* grp is detached but still used, which violates cache-managed
  freeze constraint.

Reviewed-by: Chao Yu <yuchao0 at huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25 at huawei.com>
---
 drivers/staging/erofs/internal.h |   1 +
 drivers/staging/erofs/utils.c    | 134 +++++++++++++++++++++++++++------------
 2 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index e6313c54e3ad..122ea5016f3b 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -240,6 +240,7 @@ static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
 }
 
 #define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
+#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
 
 extern int erofs_workgroup_put(struct erofs_workgroup *grp);
 
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index bdee9bd09f11..bcabbb85d40e 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -83,12 +83,21 @@ int erofs_register_workgroup(struct super_block *sb,
 
 	grp = xa_tag_pointer(grp, tag);
 
-	err = radix_tree_insert(&sbi->workstn_tree,
-		grp->index, grp);
+	/*
+	 * Bump up reference count before making this workgroup
+	 * visible to other users in order to avoid potential UAF
+	 * without serialized by erofs_workstn_lock.
+	 */
+	__erofs_workgroup_get(grp);
 
-	if (!err) {
-		__erofs_workgroup_get(grp);
-	}
+	err = radix_tree_insert(&sbi->workstn_tree,
+				grp->index, grp);
+	if (unlikely(err))
+		/*
+		 * it's safe to decrease since the workgroup isn't visible
+		 * and refcount >= 2 (cannot be freezed).
+		 */
+		__erofs_workgroup_put(grp);
 
 	erofs_workstn_unlock(sbi);
 	radix_tree_preload_end();
@@ -97,19 +106,94 @@ int erofs_register_workgroup(struct super_block *sb,
 
 extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
 
+static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
+{
+	atomic_long_dec(&erofs_global_shrink_cnt);
+	erofs_workgroup_free_rcu(grp);
+}
+
 int erofs_workgroup_put(struct erofs_workgroup *grp)
 {
 	int count = atomic_dec_return(&grp->refcount);
 
 	if (count == 1)
 		atomic_long_inc(&erofs_global_shrink_cnt);
-	else if (!count) {
-		atomic_long_dec(&erofs_global_shrink_cnt);
-		erofs_workgroup_free_rcu(grp);
-	}
+	else if (!count)
+		__erofs_workgroup_free(grp);
 	return count;
 }
 
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+/* for cache-managed case, customized reclaim paths exist */
+static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
+{
+	erofs_workgroup_unfreeze(grp, 0);
+	__erofs_workgroup_free(grp);
+}
+
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	/*
+	 * for managed cache enabled, the refcount of workgroups
+	 * themselves could be < 0 (freezed). So there is no guarantee
+	 * that all refcount > 0 if managed cache is enabled.
+	 */
+	if (!erofs_workgroup_try_to_freeze(grp, 1))
+		return false;
+
+	/*
+	 * note that all cached pages should be unlinked
+	 * before delete it from the radix tree.
+	 * Otherwise some cached pages of an orphan old workgroup
+	 * could be still linked after the new one is available.
+	 */
+	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
+		erofs_workgroup_unfreeze(grp, 1);
+		return false;
+	}
+
+	/*
+	 * it is impossible to fail after the workgroup is freezed,
+	 * however in order to avoid some race conditions, add a
+	 * DBG_BUGON to observe this in advance.
+	 */
+	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+						     grp->index)) != grp);
+
+	/*
+	 * if managed cache is enable, the last refcount
+	 * should indicate the related workstation.
+	 */
+	erofs_workgroup_unfreeze_final(grp);
+	return true;
+}
+
+#else
+/* for nocache case, no customized reclaim path at all */
+bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+				    struct erofs_workgroup *grp,
+				    bool cleanup)
+{
+	int cnt = atomic_read(&grp->refcount);
+
+	DBG_BUGON(cnt <= 0);
+	DBG_BUGON(cleanup && cnt != 1);
+
+	if (cnt > 1)
+		return false;
+
+	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+						     grp->index)) != grp);
+
+	/* (rarely) could be grabbed again when freeing */
+	erofs_workgroup_put(grp);
+	return true;
+}
+
+#endif
+
 unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 				       unsigned long nr_shrink,
 				       bool cleanup)
@@ -126,42 +210,14 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 		batch, first_index, PAGEVEC_SIZE);
 
 	for (i = 0; i < found; ++i) {
-		int cnt;
 		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 
 		first_index = grp->index + 1;
 
-		cnt = atomic_read(&grp->refcount);
-		BUG_ON(cnt <= 0);
-
-		if (cleanup)
-			BUG_ON(cnt != 1);
-
-#ifndef EROFS_FS_HAS_MANAGED_CACHE
-		else if (cnt > 1)
-#else
-		if (!erofs_workgroup_try_to_freeze(grp, 1))
-#endif
+		/* try to shrink each valid workgroup */
+		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 			continue;
 
-		if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
-			grp->index)) != grp) {
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-skip:
-			erofs_workgroup_unfreeze(grp, 1);
-#endif
-			continue;
-		}
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-		if (erofs_try_to_free_all_cached_pages(sbi, grp))
-			goto skip;
-
-		erofs_workgroup_unfreeze(grp, 1);
-#endif
-		/* (rarely) grabbed again when freeing */
-		erofs_workgroup_put(grp);
-
 		++freed;
 		if (unlikely(!--nr_shrink))
 			break;
-- 
2.11.0

  reply	other threads:[~2019-02-25 17:58 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-20  9:18 [PATCH for-4.19 00/12] erofs fixes for linux-4.19.y Gao Xiang
2019-02-20  9:18 ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 01/12] staging: erofs: fix a bug when appling cache strategy Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 02/12] staging: erofs: complete error handing of z_erofs_do_read_page Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-25 14:59   ` Greg Kroah-Hartman
2019-02-25 14:59     ` Greg Kroah-Hartman
2019-02-25 15:04     ` Gao Xiang
2019-02-25 15:04       ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 03/12] staging: erofs: replace BUG_ON with DBG_BUGON in data.c Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 04/12] staging: erofs: drop multiref support temporarily Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 05/12] staging: erofs: remove the redundant d_rehash() for the root dentry Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 06/12] staging: erofs: fix race when the managed cache is enabled Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-25 15:04   ` Greg Kroah-Hartman
2019-02-25 15:04     ` Greg Kroah-Hartman
2019-02-25 15:07     ` Gao Xiang
2019-02-25 15:07       ` Gao Xiang
2019-02-25 15:51       ` Greg Kroah-Hartman
2019-02-25 15:51         ` Greg Kroah-Hartman
2019-02-25 15:57         ` Gao Xiang
2019-02-25 15:57           ` Gao Xiang
2019-02-25 17:58         ` [PATCH for-4.19 1/2] xarray: Replace exceptional entries Gao Xiang
2019-02-25 17:58           ` Gao Xiang
2019-02-25 17:58           ` Gao Xiang [this message]
2019-02-25 17:58             ` [PATCH for-4.19 2/2] staging: erofs: fix race when the managed cache is enabled Gao Xiang
2019-02-25 18:27           ` [PATCH for-4.19 1/2] xarray: Replace exceptional entries Matthew Wilcox
2019-02-25 18:27             ` Matthew Wilcox
2019-02-26  1:21             ` Gao Xiang
2019-02-26  1:21               ` Gao Xiang
2019-02-26  5:14             ` [PATCH v2 " Gao Xiang
2019-02-26  5:14               ` Gao Xiang
2019-02-26  5:14               ` [PATCH v2 for-4.19 2/2] staging: erofs: fix race when the managed cache is enabled Gao Xiang
2019-02-26  5:14                 ` Gao Xiang
2019-02-26 12:43               ` [PATCH v2 for-4.19 1/2] xarray: Replace exceptional entries Gao Xiang
2019-02-26 12:43                 ` Gao Xiang
2019-03-04  5:13                 ` Gao Xiang
2019-03-04  5:13                   ` Gao Xiang
2019-03-13  9:18                   ` Gao Xiang
2019-03-13  9:18                     ` Gao Xiang
2019-02-25 15:25     ` [PATCH for-4.19 06/12] staging: erofs: fix race when the managed cache is enabled Matthew Wilcox
2019-02-25 15:25       ` Matthew Wilcox
2019-02-25 15:52       ` Greg Kroah-Hartman
2019-02-25 15:52         ` Greg Kroah-Hartman
2019-02-25 16:04         ` Matthew Wilcox
2019-02-25 16:04           ` Matthew Wilcox
2019-02-20  9:18 ` [PATCH for-4.19 07/12] staging: erofs: atomic_cond_read_relaxed on ref-locked workgroup Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 08/12] staging: erofs: fix `erofs_workgroup_{try_to_freeze, unfreeze}' Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 09/12] staging: erofs: add a full barrier in erofs_workgroup_unfreeze Gao Xiang
2019-02-20  9:18   ` Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 10/12] staging: erofs: {dir,inode,super}.c: rectify BUG_ONs Gao Xiang
2019-02-20  9:18   ` [PATCH for-4.19 10/12] staging: erofs: {dir, inode, super}.c: " Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 11/12] staging: erofs: unzip_{pagevec.h,vle.c}: " Gao Xiang
2019-02-20  9:18   ` [PATCH for-4.19 11/12] staging: erofs: unzip_{pagevec.h, vle.c}: " Gao Xiang
2019-02-20  9:18 ` [PATCH for-4.19 12/12] staging: erofs: unzip_vle_lz4.c,utils.c: " Gao Xiang
2019-02-20  9:18   ` [PATCH for-4.19 12/12] staging: erofs: unzip_vle_lz4.c, utils.c: " Gao Xiang
2019-02-22  8:35 ` [PATCH for-4.19 00/12] erofs fixes for linux-4.19.y Greg Kroah-Hartman
2019-02-22  8:35   ` Greg Kroah-Hartman
2019-02-22  9:03   ` Gao Xiang
2019-02-22  9:03     ` Gao Xiang
2019-02-25 15:28 ` Greg Kroah-Hartman
2019-02-25 15:28   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190225175809.13427-2-hsiangkao@aol.com \
    --to=hsiangkao@aol.com \
    --cc=gaoxiang25@huawei.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-erofs@lists.ozlabs.org \
    --cc=stable@vger.kernel.org \
    --cc=yuchao0@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.