All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Andrei Vagin <avagin@gmail.com>
Cc: dhowells@redhat.com, Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, cgroups@vger.kernel.org,
	Li Zefan <lizefan@huawei.com>
Subject: Re: [PATCH vfs/for-next v2] cgroup: fix top cgroup refcnt leak
Date: Thu, 03 Jan 2019 15:27:09 +0000	[thread overview]
Message-ID: <9531.1546529229@warthog.procyon.org.uk> (raw)
In-Reply-To: <10060.1546522863@warthog.procyon.org.uk>

David Howells <dhowells@redhat.com> wrote:

> 	umount $d/a1
> 	umount $d/a2
> 
> then a cgroup gets leaked at the end.  Just retaining the final:
> 
> 	for i in `seq 2`; do 
> 		umount $d/a$i
> 	done
> 
> in place of the two final expanded umount commands fixes the issue.

The issue also goes away if I set PERCPU_REF_INIT_ATOMIC on the refcount
object when initialising it:-/

For reference, the sequence of css_get/tryget/put calls is:

*** 00000000e2655b5f RIN   1: cgroup1_get_tree+0x5de/0x73b
*** 00000000e2655b5f GET   2: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   1: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRYO  2: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f PUT   1: cgroup_do_get_tree+0x1c5/0x1fa
*** 00000000e2655b5f GET   2: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   1: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRY   2: cgroup_kn_lock_live+0x145/0x18a
*** 00000000e2655b5f GET   3: cgroup_mkdir+0x241/0x454
*** 00000000e2655b5f PUT   2: cgroup_mkdir+0xad/0x454
*** 00000000e2655b5f PUT   1: cgroup_kill_sb+0x134/0x161

*** 00000000e2655b5f TRYO  2: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f GET   3: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   2: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRYO  3: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f PUT   2: cgroup_do_get_tree+0x1c5/0x1fa
*** 00000000e2655b5f GET   3: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   2: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f KIL   2: cgroup_kill_sb+0x146/0x161
*** 00000000e2655b5f PUT   0: css_free_rwork_fn+0x399/0x5ec

As obtained with the attached patch.

I'm not sure the refcount is correct after the second block (at the end of the
second mount).  Should it have one ref per active sb?

David
---
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5e1694fe035b..23d022eb669f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -165,6 +165,8 @@ struct cgroup_subsys_state {
 	 * fields of the containing structure.
 	 */
 	struct cgroup_subsys_state *parent;
+
+	bool debug;
 };
 
 /*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bb0c7da50ed2..76ca2be4986d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -295,6 +295,8 @@ void css_task_iter_end(struct css_task_iter *it);
  * Inline functions.
  */
 
+extern void css_refcount(struct cgroup_subsys_state *css, const char *op);
+
 /**
  * css_get - obtain a reference on the specified css
  * @css: target css
@@ -303,8 +305,10 @@ void css_task_iter_end(struct css_task_iter *it);
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_get(&css->refcnt);
+		css_refcount(css, "GET");
+	}
 }
 
 /**
@@ -316,8 +320,10 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_get_many(&css->refcnt, n);
+		css_refcount(css, "GETM");
+	}
 }
 
 /**
@@ -333,8 +339,11 @@ static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
  */
 static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
-		return percpu_ref_tryget(&css->refcnt);
+	if (css->flags & CSS_NO_REF)
+		return true;
+	if (!percpu_ref_tryget(&css->refcnt))
+		return false;
+	css_refcount(css, "TRY");
 	return true;
 }
 
@@ -350,8 +359,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
-		return percpu_ref_tryget_live(&css->refcnt);
+	if (css->flags & CSS_NO_REF)
+		return true;
+	if (!percpu_ref_tryget_live(&css->refcnt))
+		return false;
+	css_refcount(css, "TRYO");
 	return true;
 }
 
@@ -383,8 +395,10 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_put(&css->refcnt);
+		css_refcount(css, "PUT");
+	}
 }
 
 /**
@@ -396,8 +410,10 @@ static inline void css_put(struct cgroup_subsys_state *css)
  */
 static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_put_many(&css->refcnt, n);
+		css_refcount(css, "PUTM");
+	}
 }
 
 static inline void cgroup_get(struct cgroup *cgrp)
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index de7d625ec077..d87a0b8c3441 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1175,7 +1175,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 		    ss->root == &cgrp_dfl_root)
 			continue;
 
-		if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+		if (!css_tryget_online(&ss->root->cgrp.self)) {
 			mutex_unlock(&cgroup_mutex);
 			goto err_restart;
 		}
@@ -1228,7 +1228,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
 		if (IS_ERR(pinned_sb) ||
-		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+		    !css_tryget_online(&root->cgrp.self)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))
 				deactivate_super(pinned_sb);
@@ -1284,6 +1284,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 	if (new_root) {
 		mutex_lock(&cgroup_mutex);
 		percpu_ref_reinit(&root->cgrp.self.refcnt);
+		css_refcount(&root->cgrp.self, "RIN");
 		mutex_unlock(&cgroup_mutex);
 	}
 	cgroup_get(&root->cgrp);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7dbe71b23941..299e3fb30731 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1904,8 +1904,11 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
 	root->flags = ctx->flags;
 	if (ctx->release_agent)
 		strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
-	if (ctx->name)
+	if (ctx->name) {
 		strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
+		if (strcmp(root->name, "xxxxy") == 0)
+			cgrp->self.debug = true;
+	}
 	if (ctx->cpuset_clone_children)
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
@@ -1927,7 +1930,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
 	root_cgrp->ancestor_ids[0] = ret;
 
 	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
-			      ref_flags, GFP_KERNEL);
+			      ref_flags | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -2166,8 +2169,10 @@ static void cgroup_kill_sb(struct super_block *sb)
 	if (!list_empty(&root->cgrp.self.children) ||
 	    root == &cgrp_dfl_root)
 		cgroup_put(&root->cgrp);
-	else
+	else {
+		css_refcount(&root->cgrp.self, "KIL");
 		percpu_ref_kill(&root->cgrp.self.refcnt);
+	}
 
 	kernfs_kill_sb(sb);
 }
@@ -4891,7 +4896,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
+	err = percpu_ref_init(&css->refcnt, css_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4946,7 +4951,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	if (!cgrp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
@@ -5194,6 +5199,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 	 * Use percpu_ref_kill_and_confirm() to get notifications as each
 	 * css is confirmed to be seen as killed on all CPUs.
 	 */
+	css_refcount(css, "KLC");
 	percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
 }
 
@@ -5278,6 +5284,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	cgroup1_check_for_release(parent);
 
 	/* put the base reference */
+	css_refcount(&cgrp->self, "KIL");
 	percpu_ref_kill(&cgrp->self.refcnt);
 
 	return 0;
@@ -6106,3 +6113,11 @@ static int __init cgroup_sysfs_init(void)
 }
 subsys_initcall(cgroup_sysfs_init);
 #endif /* CONFIG_SYSFS */
+
+noinline void css_refcount(struct cgroup_subsys_state *css, const char *op)
+{
+	if (css->debug)
+		printk("*** %p %s %2ld: %pSR\n",
+		       css, op, atomic_long_read(&css->refcnt.count),
+		       __builtin_return_address(0));
+}

  parent reply	other threads:[~2019-01-03 15:27 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-28 23:59 [PATCH] cgroup: fix top cgroup refcnt leak Andrei Vagin
2018-12-29  0:04 ` [PATCH vfs/for-next v2] " Andrei Vagin
2018-12-30 19:41   ` Andrei Vagin
2019-01-02  2:28   ` Al Viro
2019-01-02 18:14     ` [PATCH vfs/for-next v3] " Andrei Vagin
2019-01-02 19:37     ` [PATCH vfs/for-next v2] " Andrei Vagin
2019-01-02 19:37     ` [PATCH vfs/for-next v4] " Andrei Vagin
2019-01-02 20:02       ` Al Viro
2019-01-02 21:06         ` Andrei Vagin
2019-01-03  0:26         ` David Howells
2019-01-03  0:43           ` Andrei Vagin
2019-01-03  1:00             ` Andrei Vagin
2019-01-03  3:54               ` [PATCH vfs/for-next v6] " Andrei Vagin
2019-01-03  8:32                 ` Al Viro
2019-01-03 17:34                   ` Andrei Vagin
2019-01-03 21:54                   ` David Howells
2019-01-02 22:26 ` [PATCH vfs/for-next v2] " David Howells
2019-01-02 23:06   ` Andrei Vagin
2019-01-02 23:31     ` Andrei Vagin
2019-01-03  0:33     ` David Howells
2019-01-03 13:41     ` David Howells
2019-01-03 13:42     ` David Howells
2019-01-03 15:27     ` David Howells [this message]
2019-01-03 15:44     ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9531.1546529229@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=avagin@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.