linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
@ 2016-08-02 15:00 Vladimir Davydov
  2016-08-02 15:00 ` [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move Vladimir Davydov
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-02 15:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: stable, Johannes Weiner, Michal Hocko, linux-mm, linux-kernel

An offline memory cgroup might have anonymous memory or shmem left
charged to it and no swap. Since only swap entries pin the id of an
offline cgroup, such a cgroup will have no id and so an attempt to
swapout its anon/shmem will not store memory cgroup info in the swap
cgroup map. As a result, memcg->swap or memcg->memsw will never get
uncharged from it and any of its ascendants.

Fix this by always charging swapout to the first ancestor cgroup that
hasn't released its id yet.

Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs")
Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: <stable@vger.kernel.org>	[3.19+]
---
Changes in v2:
 - handle !use_hierarchy case properly (Michal)

 mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3be791afd372..4ae12effe347 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
 	atomic_inc(&memcg->id.ref);
 }
 
+static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
+{
+	while (!atomic_inc_not_zero(&memcg->id.ref)) {
+		/*
+		 * The root cgroup cannot be destroyed, so it's refcount must
+		 * always be >= 1.
+		 */
+		if (memcg == root_mem_cgroup) {
+			VM_BUG_ON(1);
+			break;
+		}
+		memcg = parent_mem_cgroup(memcg);
+		if (!memcg)
+			memcg = root_mem_cgroup;
+	}
+	return memcg;
+}
+
 static void mem_cgroup_id_put(struct mem_cgroup *memcg)
 {
 	if (atomic_dec_and_test(&memcg->id.ref)) {
@@ -5752,7 +5770,7 @@ subsys_initcall(mem_cgroup_init);
  */
 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 {
-	struct mem_cgroup *memcg;
+	struct mem_cgroup *memcg, *swap_memcg;
 	unsigned short oldid;
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5767,15 +5785,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	if (!memcg)
 		return;
 
-	mem_cgroup_id_get(memcg);
-	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+	swap_memcg = mem_cgroup_id_get_active(memcg);
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
 	VM_BUG_ON_PAGE(oldid, page);
-	mem_cgroup_swap_statistics(memcg, true);
+	mem_cgroup_swap_statistics(swap_memcg, true);
 
 	page->mem_cgroup = NULL;
 
 	if (!mem_cgroup_is_root(memcg))
 		page_counter_uncharge(&memcg->memory, 1);
+	if (memcg != swap_memcg) {
+		if (!mem_cgroup_is_root(swap_memcg))
+			page_counter_charge(&swap_memcg->memsw, 1);
+		page_counter_uncharge(&memcg->memsw, 1);
+	}
 
 	/*
 	 * Interrupts should be disabled here because the caller holds the
@@ -5815,11 +5838,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 	if (!memcg)
 		return 0;
 
+	memcg = mem_cgroup_id_get_active(memcg);
+
 	if (!mem_cgroup_is_root(memcg) &&
-	    !page_counter_try_charge(&memcg->swap, 1, &counter))
+	    !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+		mem_cgroup_id_put(memcg);
 		return -ENOMEM;
+	}
 
-	mem_cgroup_id_get(memcg);
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move
  2016-08-02 15:00 [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Vladimir Davydov
@ 2016-08-02 15:00 ` Vladimir Davydov
  2016-08-02 17:22   ` Johannes Weiner
  2016-08-02 15:00 ` [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put Vladimir Davydov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-02 15:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: stable, Johannes Weiner, Michal Hocko, linux-mm, linux-kernel

Since commit 73f576c04b941 swap entries do not pin memcg->css.refcnt
directly. Instead, they pin memcg->id.ref. So we should adjust the
reference counters accordingly when moving swap charges between cgroups.

Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs")
Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>	[3.19+]
---
 mm/memcontrol.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ae12effe347..67109d556a4a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4031,9 +4031,9 @@ static struct cftype mem_cgroup_legacy_files[] = {
 
 static DEFINE_IDR(mem_cgroup_idr);
 
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	atomic_inc(&memcg->id.ref);
+	atomic_add(n, &memcg->id.ref);
 }
 
 static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
@@ -4054,9 +4054,9 @@ static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
 	return memcg;
 }
 
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	if (atomic_dec_and_test(&memcg->id.ref)) {
+	if (atomic_sub_and_test(n, &memcg->id.ref)) {
 		idr_remove(&mem_cgroup_idr, memcg->id.id);
 		memcg->id.id = 0;
 
@@ -4065,6 +4065,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
 	}
 }
 
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+	mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+	mem_cgroup_id_put_many(memcg, 1);
+}
+
 /**
  * mem_cgroup_from_id - look up a memcg from a memcg id
  * @id: the memcg id to look up
@@ -4699,6 +4709,8 @@ static void __mem_cgroup_clear_mc(void)
 		if (!mem_cgroup_is_root(mc.from))
 			page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
 
+		mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
 		/*
 		 * we charged both to->memory and to->memsw, so we
 		 * should uncharge to->memory.
@@ -4706,9 +4718,9 @@ static void __mem_cgroup_clear_mc(void)
 		if (!mem_cgroup_is_root(mc.to))
 			page_counter_uncharge(&mc.to->memory, mc.moved_swap);
 
-		css_put_many(&mc.from->css, mc.moved_swap);
+		mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+		css_put_many(&mc.to->css, mc.moved_swap);
 
-		/* we've already done css_get(mc.to) */
 		mc.moved_swap = 0;
 	}
 	memcg_oom_recover(from);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put
  2016-08-02 15:00 [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Vladimir Davydov
  2016-08-02 15:00 ` [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move Vladimir Davydov
@ 2016-08-02 15:00 ` Vladimir Davydov
  2016-08-02 16:01   ` Michal Hocko
  2016-08-02 17:27   ` Johannes Weiner
  2016-08-02 16:00 ` [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Michal Hocko
  2016-08-02 17:21 ` Johannes Weiner
  3 siblings, 2 replies; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-02 15:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Johannes Weiner, Michal Hocko, linux-mm, linux-kernel

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 mm/memcontrol.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 67109d556a4a..32b2f33865f9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4033,6 +4033,7 @@ static DEFINE_IDR(mem_cgroup_idr);
 
 static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
+	VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
 	atomic_add(n, &memcg->id.ref);
 }
 
@@ -4056,6 +4057,7 @@ static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
 
 static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
+	VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
 	if (atomic_sub_and_test(n, &memcg->id.ref)) {
 		idr_remove(&mem_cgroup_idr, memcg->id.id);
 		memcg->id.id = 0;
@@ -4176,6 +4178,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 #endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+	atomic_set(&memcg->id.ref, 1);
 	return memcg;
 fail:
 	if (memcg->id.id > 0)
@@ -4245,7 +4248,6 @@ fail:
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	/* Online state pins memcg ID, memcg ID pins CSS */
-	mem_cgroup_id_get(mem_cgroup_from_css(css));
 	css_get(css);
 	return 0;
 }
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 15:00 [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Vladimir Davydov
  2016-08-02 15:00 ` [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move Vladimir Davydov
  2016-08-02 15:00 ` [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put Vladimir Davydov
@ 2016-08-02 16:00 ` Michal Hocko
  2016-08-02 17:33   ` Johannes Weiner
  2016-08-03  9:50   ` Vladimir Davydov
  2016-08-02 17:21 ` Johannes Weiner
  3 siblings, 2 replies; 17+ messages in thread
From: Michal Hocko @ 2016-08-02 16:00 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Andrew Morton, stable, Johannes Weiner, linux-mm, linux-kernel

On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> An offline memory cgroup might have anonymous memory or shmem left
> charged to it and no swap. Since only swap entries pin the id of an
> offline cgroup, such a cgroup will have no id and so an attempt to
> swapout its anon/shmem will not store memory cgroup info in the swap
> cgroup map. As a result, memcg->swap or memcg->memsw will never get
> uncharged from it and any of its ascendants.
> 
> Fix this by always charging swapout to the first ancestor cgroup that
> hasn't released its id yet.
> 
> Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs")
> Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
> Cc: <stable@vger.kernel.org>	[3.19+]
> ---
> Changes in v2:
>  - handle !use_hierarchy case properly (Michal)
> 
>  mm/memcontrol.c | 38 ++++++++++++++++++++++++++++++++------
>  1 file changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 3be791afd372..4ae12effe347 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
>  	atomic_inc(&memcg->id.ref);
>  }
>  
> +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> +{
> +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> +		/*
> +		 * The root cgroup cannot be destroyed, so it's refcount must
> +		 * always be >= 1.
> +		 */
> +		if (memcg == root_mem_cgroup) {
> +			VM_BUG_ON(1);
> +			break;
> +		}

why not simply VM_BUG_ON(memcg == root_mem_cgroup)?

> +		memcg = parent_mem_cgroup(memcg);
> +		if (!memcg)
> +			memcg = root_mem_cgroup;
> +	}
> +	return memcg;
> +}
> +
>  static void mem_cgroup_id_put(struct mem_cgroup *memcg)
>  {
>  	if (atomic_dec_and_test(&memcg->id.ref)) {
> @@ -5752,7 +5770,7 @@ subsys_initcall(mem_cgroup_init);
>   */
>  void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
>  {
> -	struct mem_cgroup *memcg;
> +	struct mem_cgroup *memcg, *swap_memcg;
>  	unsigned short oldid;
>  
>  	VM_BUG_ON_PAGE(PageLRU(page), page);
> @@ -5767,15 +5785,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
>  	if (!memcg)
>  		return;
>  
> -	mem_cgroup_id_get(memcg);
> -	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> +	swap_memcg = mem_cgroup_id_get_active(memcg);
> +	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
>  	VM_BUG_ON_PAGE(oldid, page);
> -	mem_cgroup_swap_statistics(memcg, true);
> +	mem_cgroup_swap_statistics(swap_memcg, true);
>  
>  	page->mem_cgroup = NULL;
>  
>  	if (!mem_cgroup_is_root(memcg))
>  		page_counter_uncharge(&memcg->memory, 1);
> +	if (memcg != swap_memcg) {
> +		if (!mem_cgroup_is_root(swap_memcg))
> +			page_counter_charge(&swap_memcg->memsw, 1);
> +		page_counter_uncharge(&memcg->memsw, 1);
> +	}
>  
>  	/*
>  	 * Interrupts should be disabled here because the caller holds the

The resulting code is a weird mixture of memcg and swap_memcg usage
which is really confusing and error prone. Do we really have to do
uncharge on an already offline memcg?
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put
  2016-08-02 15:00 ` [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put Vladimir Davydov
@ 2016-08-02 16:01   ` Michal Hocko
  2016-08-02 17:27   ` Johannes Weiner
  1 sibling, 0 replies; 17+ messages in thread
From: Michal Hocko @ 2016-08-02 16:01 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Andrew Morton, Johannes Weiner, linux-mm, linux-kernel

On Tue 02-08-16 18:00:50, Vladimir Davydov wrote:
> Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/memcontrol.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 67109d556a4a..32b2f33865f9 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4033,6 +4033,7 @@ static DEFINE_IDR(mem_cgroup_idr);
>  
>  static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
>  {
> +	VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
>  	atomic_add(n, &memcg->id.ref);
>  }
>  
> @@ -4056,6 +4057,7 @@ static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
>  
>  static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
>  {
> +	VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
>  	if (atomic_sub_and_test(n, &memcg->id.ref)) {
>  		idr_remove(&mem_cgroup_idr, memcg->id.id);
>  		memcg->id.id = 0;
> @@ -4176,6 +4178,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
>  	INIT_LIST_HEAD(&memcg->cgwb_list);
>  #endif
>  	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
> +	atomic_set(&memcg->id.ref, 1);
>  	return memcg;
>  fail:
>  	if (memcg->id.id > 0)
> @@ -4245,7 +4248,6 @@ fail:
>  static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  {
>  	/* Online state pins memcg ID, memcg ID pins CSS */
> -	mem_cgroup_id_get(mem_cgroup_from_css(css));
>  	css_get(css);
>  	return 0;
>  }
> -- 
> 2.1.4

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 15:00 [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Vladimir Davydov
                   ` (2 preceding siblings ...)
  2016-08-02 16:00 ` [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Michal Hocko
@ 2016-08-02 17:21 ` Johannes Weiner
  3 siblings, 0 replies; 17+ messages in thread
From: Johannes Weiner @ 2016-08-02 17:21 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Andrew Morton, stable, Michal Hocko, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 06:00:48PM +0300, Vladimir Davydov wrote:
> An offline memory cgroup might have anonymous memory or shmem left
> charged to it and no swap. Since only swap entries pin the id of an
> offline cgroup, such a cgroup will have no id and so an attempt to
> swapout its anon/shmem will not store memory cgroup info in the swap
> cgroup map. As a result, memcg->swap or memcg->memsw will never get
> uncharged from it and any of its ascendants.
> 
> Fix this by always charging swapout to the first ancestor cgroup that
> hasn't released its id yet.
> 
> Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs")
> Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
> Cc: <stable@vger.kernel.org>	[3.19+]

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move
  2016-08-02 15:00 ` [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move Vladimir Davydov
@ 2016-08-02 17:22   ` Johannes Weiner
  0 siblings, 0 replies; 17+ messages in thread
From: Johannes Weiner @ 2016-08-02 17:22 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Andrew Morton, stable, Michal Hocko, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 06:00:49PM +0300, Vladimir Davydov wrote:
> Since commit 73f576c04b941 swap entries do not pin memcg->css.refcnt
> directly. Instead, they pin memcg->id.ref. So we should adjust the
> reference counters accordingly when moving swap charges between cgroups.
> 
> Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs")
> Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
> Acked-by: Michal Hocko <mhocko@suse.com>
> Cc: <stable@vger.kernel.org>	[3.19+]

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put
  2016-08-02 15:00 ` [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put Vladimir Davydov
  2016-08-02 16:01   ` Michal Hocko
@ 2016-08-02 17:27   ` Johannes Weiner
  1 sibling, 0 replies; 17+ messages in thread
From: Johannes Weiner @ 2016-08-02 17:27 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Andrew Morton, Michal Hocko, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 06:00:50PM +0300, Vladimir Davydov wrote:
> Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
> ---
>  mm/memcontrol.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 67109d556a4a..32b2f33865f9 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4033,6 +4033,7 @@ static DEFINE_IDR(mem_cgroup_idr);
>  
>  static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
>  {
> +	VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
>  	atomic_add(n, &memcg->id.ref);
>  }
>  
> @@ -4056,6 +4057,7 @@ static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
>  
>  static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
>  {
> +	VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
>  	if (atomic_sub_and_test(n, &memcg->id.ref)) {
>  		idr_remove(&mem_cgroup_idr, memcg->id.id);
>  		memcg->id.id = 0;
> @@ -4176,6 +4178,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
>  	INIT_LIST_HEAD(&memcg->cgwb_list);
>  #endif
>  	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
> +	atomic_set(&memcg->id.ref, 1);
>  	return memcg;
>  fail:
>  	if (memcg->id.id > 0)
> @@ -4245,7 +4248,6 @@ fail:
>  static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  {
>  	/* Online state pins memcg ID, memcg ID pins CSS */
> -	mem_cgroup_id_get(mem_cgroup_from_css(css));
>  	css_get(css);
>  	return 0;

This comment and code is no very, very confusing. Can you move the
atomic_set(&memcg->id.ref, 1) down here instead?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 16:00 ` [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Michal Hocko
@ 2016-08-02 17:33   ` Johannes Weiner
  2016-08-02 20:31     ` Michal Hocko
  2016-08-03  9:50   ` Vladimir Davydov
  1 sibling, 1 reply; 17+ messages in thread
From: Johannes Weiner @ 2016-08-02 17:33 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Vladimir Davydov, Andrew Morton, stable, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > @@ -5767,15 +5785,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> >  	if (!memcg)
> >  		return;
> >  
> > -	mem_cgroup_id_get(memcg);
> > -	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > +	swap_memcg = mem_cgroup_id_get_active(memcg);
> > +	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
> >  	VM_BUG_ON_PAGE(oldid, page);
> > -	mem_cgroup_swap_statistics(memcg, true);
> > +	mem_cgroup_swap_statistics(swap_memcg, true);
> >  
> >  	page->mem_cgroup = NULL;
> >  
> >  	if (!mem_cgroup_is_root(memcg))
> >  		page_counter_uncharge(&memcg->memory, 1);
> > +	if (memcg != swap_memcg) {
> > +		if (!mem_cgroup_is_root(swap_memcg))
> > +			page_counter_charge(&swap_memcg->memsw, 1);
> > +		page_counter_uncharge(&memcg->memsw, 1);
> > +	}
> >  
> >  	/*
> >  	 * Interrupts should be disabled here because the caller holds the
> 
> The resulting code is a weird mixture of memcg and swap_memcg usage
> which is really confusing and error prone. Do we really have to do
> uncharge on an already offline memcg?

The charge is recursive and includes swap_memcg, i.e. live groups, so
the uncharge is necessary. I don't think the code is too bad, though?
swap_memcg is the target that is being charged for swap, memcg is the
origin group from which we swap out. Seems pretty straightforward...?

But maybe a comment above the memcg != swap_memcg check would be nice:

/*
 * In case the memcg owning these pages has been offlined and doesn't
 * have an ID allocated to it anymore, charge the closest online
 * ancestor for the swap instead and transfer the memory+swap charge.
 */

Thinking about it, mem_cgroup_id_get_active() is a little strange; the
term we use throughout the cgroup code is "online". It might be good
to rename this mem_cgroup_id_get_online().

Thanks

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 17:33   ` Johannes Weiner
@ 2016-08-02 20:31     ` Michal Hocko
  2016-08-03 10:06       ` Vladimir Davydov
  0 siblings, 1 reply; 17+ messages in thread
From: Michal Hocko @ 2016-08-02 20:31 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Vladimir Davydov, Andrew Morton, stable, linux-mm, linux-kernel

On Tue 02-08-16 13:33:37, Johannes Weiner wrote:
> On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > > @@ -5767,15 +5785,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > >  	if (!memcg)
> > >  		return;
> > >  
> > > -	mem_cgroup_id_get(memcg);
> > > -	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > > +	swap_memcg = mem_cgroup_id_get_active(memcg);
> > > +	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
> > >  	VM_BUG_ON_PAGE(oldid, page);
> > > -	mem_cgroup_swap_statistics(memcg, true);
> > > +	mem_cgroup_swap_statistics(swap_memcg, true);
> > >  
> > >  	page->mem_cgroup = NULL;
> > >  
> > >  	if (!mem_cgroup_is_root(memcg))
> > >  		page_counter_uncharge(&memcg->memory, 1);
> > > +	if (memcg != swap_memcg) {
> > > +		if (!mem_cgroup_is_root(swap_memcg))
> > > +			page_counter_charge(&swap_memcg->memsw, 1);
> > > +		page_counter_uncharge(&memcg->memsw, 1);
> > > +	}
> > >  
> > >  	/*
> > >  	 * Interrupts should be disabled here because the caller holds the
> > 
> > The resulting code is a weird mixture of memcg and swap_memcg usage
> > which is really confusing and error prone. Do we really have to do
> > uncharge on an already offline memcg?
> 
> The charge is recursive and includes swap_memcg, i.e. live groups, so
> the uncharge is necessary.

Hmm, the charge is recursive, alraight, but then I see only see only
small sympathy for
               if (!mem_cgroup_is_root(swap_memcg))
                       page_counter_charge(&swap_memcg->memsw, 1);
               page_counter_uncharge(&memcg->memsw, 1);

we first charge up the hierarchy just to uncharge the same balance from
the lower. So the end result should be same, right? The only reason
would be that we uncharge the lower layer as well. I do not remember
details, but I do not remember we would be checking counters being 0 on
exit.
But it is quite late and my brain is quite burnt so I might miss
something easily. So whatever small style issues, I think the patch
is correct and feel free to add

Acked-by: Michal Hocko <mhocko@suse.com>

I just think we can make this easier and more straightforward. See the
diff below (not even compile tested - just for an illustration).

> I don't think the code is too bad, though?
> swap_memcg is the target that is being charged for swap, memcg is the
> origin group from which we swap out. Seems pretty straightforward...?
> 
> But maybe a comment above the memcg != swap_memcg check would be nice:
> 
> /*
>  * In case the memcg owning these pages has been offlined and doesn't
>  * have an ID allocated to it anymore, charge the closest online
>  * ancestor for the swap instead and transfer the memory+swap charge.
>  */

comment would be definitely helpful.
 
> Thinking about it, mem_cgroup_id_get_active() is a little strange; the
> term we use throughout the cgroup code is "online". It might be good
> to rename this mem_cgroup_id_get_online().

yes, that would be better, imho

---
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b6ac01d2b908..66868b2a4c8c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5819,6 +5819,14 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 	VM_BUG_ON_PAGE(page_count(page), page);
 
+	/*
+	 * Interrupts should be disabled here because the caller holds the
+	 * mapping->tree_lock lock which is taken with interrupts-off. It is
+	 * important here to have the interrupts disabled because it is the
+	 * only synchronisation we have for udpating the per-CPU variables.
+	 */
+	VM_BUG_ON(!irqs_disabled());
+
 	if (!do_memsw_account())
 		return;
 
@@ -5828,6 +5836,12 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	if (!memcg)
 		return;
 
+	/*
+	 * In case the memcg owning these pages has been offlined and doesn't
+	 * have an ID allocated to it anymore, charge the closest online
+	 * ancestor for the swap instead. Hierarchical charges will be preserved
+	 * and the offlined one will not cry with some discrepances in statistics
+	 */
 	swap_memcg = mem_cgroup_id_get_active(memcg);
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
 	VM_BUG_ON_PAGE(oldid, page);
@@ -5837,21 +5851,11 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 
 	if (!mem_cgroup_is_root(memcg))
 		page_counter_uncharge(&memcg->memory, 1);
-	if (memcg != swap_memcg) {
-		if (!mem_cgroup_is_root(swap_memcg))
-			page_counter_charge(&swap_memcg->memsw, 1);
-		page_counter_uncharge(&memcg->memsw, 1);
-	}
 
-	/*
-	 * Interrupts should be disabled here because the caller holds the
-	 * mapping->tree_lock lock which is taken with interrupts-off. It is
-	 * important here to have the interrupts disabled because it is the
-	 * only synchronisation we have for udpating the per-CPU variables.
-	 */
-	VM_BUG_ON(!irqs_disabled());
-	mem_cgroup_charge_statistics(memcg, page, false, -1);
-	memcg_check_events(memcg, page);
+	if (memcg == swap_memcg) {
+		mem_cgroup_charge_statistics(memcg, page, false, -1);
+		memcg_check_events(memcg, page);
+	}
 
 	if (!mem_cgroup_is_root(memcg))
 		css_put(&memcg->css);
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 16:00 ` [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Michal Hocko
  2016-08-02 17:33   ` Johannes Weiner
@ 2016-08-03  9:50   ` Vladimir Davydov
  2016-08-03 11:09     ` Michal Hocko
  1 sibling, 1 reply; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-03  9:50 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, stable, Johannes Weiner, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
...
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 3be791afd372..4ae12effe347 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
> >  	atomic_inc(&memcg->id.ref);
> >  }
> >  
> > +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> > +{
> > +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> > +		/*
> > +		 * The root cgroup cannot be destroyed, so it's refcount must
> > +		 * always be >= 1.
> > +		 */
> > +		if (memcg == root_mem_cgroup) {
> > +			VM_BUG_ON(1);
> > +			break;
> > +		}
> 
> why not simply VM_BUG_ON(memcg == root_mem_cgroup)?

Because with DEBUG_VM disabled we could wind up looping forever here if
the refcount of the root_mem_cgroup got screwed up. On production
kernels, it's better to break the loop and carry on closing eyes on
diverging counters rather than getting a lockup.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-02 20:31     ` Michal Hocko
@ 2016-08-03 10:06       ` Vladimir Davydov
  0 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-03 10:06 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Johannes Weiner, Andrew Morton, stable, linux-mm, linux-kernel

On Tue, Aug 02, 2016 at 10:31:16PM +0200, Michal Hocko wrote:
> On Tue 02-08-16 13:33:37, Johannes Weiner wrote:
> > On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > > > @@ -5767,15 +5785,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
> > > >  	if (!memcg)
> > > >  		return;
> > > >  
> > > > -	mem_cgroup_id_get(memcg);
> > > > -	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
> > > > +	swap_memcg = mem_cgroup_id_get_active(memcg);
> > > > +	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
> > > >  	VM_BUG_ON_PAGE(oldid, page);
> > > > -	mem_cgroup_swap_statistics(memcg, true);
> > > > +	mem_cgroup_swap_statistics(swap_memcg, true);
> > > >  
> > > >  	page->mem_cgroup = NULL;
> > > >  
> > > >  	if (!mem_cgroup_is_root(memcg))
> > > >  		page_counter_uncharge(&memcg->memory, 1);
> > > > +	if (memcg != swap_memcg) {
> > > > +		if (!mem_cgroup_is_root(swap_memcg))
> > > > +			page_counter_charge(&swap_memcg->memsw, 1);
> > > > +		page_counter_uncharge(&memcg->memsw, 1);
> > > > +	}
> > > >  
> > > >  	/*
> > > >  	 * Interrupts should be disabled here because the caller holds the
> > > 
> > > The resulting code is a weird mixture of memcg and swap_memcg usage
> > > which is really confusing and error prone. Do we really have to do
> > > uncharge on an already offline memcg?
> > 
> > The charge is recursive and includes swap_memcg, i.e. live groups, so
> > the uncharge is necessary.
> 
> Hmm, the charge is recursive, alraight, but then I see only see only
> small sympathy for
>                if (!mem_cgroup_is_root(swap_memcg))
>                        page_counter_charge(&swap_memcg->memsw, 1);
>                page_counter_uncharge(&memcg->memsw, 1);
> 
> we first charge up the hierarchy just to uncharge the same balance from
> the lower. So the end result should be same, right? The only reason
> would be that we uncharge the lower layer as well. I do not remember
> details, but I do not remember we would be checking counters being 0 on
> exit.

We don't, but I think it would be nice to check the counters on css
free, as it might be helpful for debugging.

I thought about introducing page_counter_uncharge_until() to make this
code look more straightforward, but finally decided to leave it as it is
now, because this code is doomed to die anyway once the unified
hierarchy has settled in.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-03  9:50   ` Vladimir Davydov
@ 2016-08-03 11:09     ` Michal Hocko
  2016-08-03 11:46       ` Vladimir Davydov
  0 siblings, 1 reply; 17+ messages in thread
From: Michal Hocko @ 2016-08-03 11:09 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Andrew Morton, stable, Johannes Weiner, linux-mm, linux-kernel

On Wed 03-08-16 12:50:49, Vladimir Davydov wrote:
> On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> ...
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 3be791afd372..4ae12effe347 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
> > >  	atomic_inc(&memcg->id.ref);
> > >  }
> > >  
> > > +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> > > +{
> > > +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> > > +		/*
> > > +		 * The root cgroup cannot be destroyed, so it's refcount must
> > > +		 * always be >= 1.
> > > +		 */
> > > +		if (memcg == root_mem_cgroup) {
> > > +			VM_BUG_ON(1);
> > > +			break;
> > > +		}
> > 
> > why not simply VM_BUG_ON(memcg == root_mem_cgroup)?
> 
> Because with DEBUG_VM disabled we could wind up looping forever here if
> the refcount of the root_mem_cgroup got screwed up. On production
> kernels, it's better to break the loop and carry on closing eyes on
> diverging counters rather than getting a lockup.

Wouldn't this just paper over a real bug? Anyway I will not insist but
making the code more complex just to pretend we can handle a situation
gracefully doesn't sound right to me.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-03 11:09     ` Michal Hocko
@ 2016-08-03 11:46       ` Vladimir Davydov
  2016-08-03 12:00         ` Michal Hocko
  2016-08-03 14:12         ` Johannes Weiner
  0 siblings, 2 replies; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-03 11:46 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, stable, Johannes Weiner, linux-mm, linux-kernel

On Wed, Aug 03, 2016 at 01:09:42PM +0200, Michal Hocko wrote:
> On Wed 03-08-16 12:50:49, Vladimir Davydov wrote:
> > On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > ...
> > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > index 3be791afd372..4ae12effe347 100644
> > > > --- a/mm/memcontrol.c
> > > > +++ b/mm/memcontrol.c
> > > > @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
> > > >  	atomic_inc(&memcg->id.ref);
> > > >  }
> > > >  
> > > > +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> > > > +{
> > > > +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> > > > +		/*
> > > > +		 * The root cgroup cannot be destroyed, so it's refcount must
> > > > +		 * always be >= 1.
> > > > +		 */
> > > > +		if (memcg == root_mem_cgroup) {
> > > > +			VM_BUG_ON(1);
> > > > +			break;
> > > > +		}
> > > 
> > > why not simply VM_BUG_ON(memcg == root_mem_cgroup)?
> > 
> > Because with DEBUG_VM disabled we could wind up looping forever here if
> > the refcount of the root_mem_cgroup got screwed up. On production
> > kernels, it's better to break the loop and carry on closing eyes on
> > diverging counters rather than getting a lockup.
> 
> Wouldn't this just paper over a real bug? Anyway I will not insist but
> making the code more complex just to pretend we can handle a situation
> gracefully doesn't sound right to me.

But we can handle this IMO. AFAICS diverging id refcount will typically
result in leaking swap charges, which aren't even a real resource. At
worst, we can leak an offline mem_cgroup, which is also not critical
enough to crash the production system.

I see your concern of papering over a bug though. What about adding a
warning there?

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c0aa59fd333..8c8e68becee9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4044,7 +4044,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
 		 * The root cgroup cannot be destroyed, so it's refcount must
 		 * always be >= 1.
 		 */
-		if (memcg == root_mem_cgroup) {
+		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
 			VM_BUG_ON(1);
 			break;
 		}

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-03 11:46       ` Vladimir Davydov
@ 2016-08-03 12:00         ` Michal Hocko
  2016-08-03 14:12         ` Johannes Weiner
  1 sibling, 0 replies; 17+ messages in thread
From: Michal Hocko @ 2016-08-03 12:00 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Andrew Morton, stable, Johannes Weiner, linux-mm, linux-kernel

On Wed 03-08-16 14:46:40, Vladimir Davydov wrote:
> On Wed, Aug 03, 2016 at 01:09:42PM +0200, Michal Hocko wrote:
> > On Wed 03-08-16 12:50:49, Vladimir Davydov wrote:
> > > On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > > > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > > ...
> > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > > index 3be791afd372..4ae12effe347 100644
> > > > > --- a/mm/memcontrol.c
> > > > > +++ b/mm/memcontrol.c
> > > > > @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
> > > > >  	atomic_inc(&memcg->id.ref);
> > > > >  }
> > > > >  
> > > > > +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> > > > > +{
> > > > > +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> > > > > +		/*
> > > > > +		 * The root cgroup cannot be destroyed, so it's refcount must
> > > > > +		 * always be >= 1.
> > > > > +		 */
> > > > > +		if (memcg == root_mem_cgroup) {
> > > > > +			VM_BUG_ON(1);
> > > > > +			break;
> > > > > +		}
> > > > 
> > > > why not simply VM_BUG_ON(memcg == root_mem_cgroup)?
> > > 
> > > Because with DEBUG_VM disabled we could wind up looping forever here if
> > > the refcount of the root_mem_cgroup got screwed up. On production
> > > kernels, it's better to break the loop and carry on closing eyes on
> > > diverging counters rather than getting a lockup.
> > 
> > Wouldn't this just paper over a real bug? Anyway I will not insist but
> > making the code more complex just to pretend we can handle a situation
> > gracefully doesn't sound right to me.
> 
> But we can handle this IMO. AFAICS diverging id refcount will typically
> result in leaking swap charges, which aren't even a real resource.

Fair enough.

> At
> worst, we can leak an offline mem_cgroup, which is also not critical
> enough to crash the production system.

Agreed.

> I see your concern of papering over a bug though. What about adding a
> warning there?

WARN_ON_ONCE sounds better...
 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1c0aa59fd333..8c8e68becee9 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4044,7 +4044,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
>  		 * The root cgroup cannot be destroyed, so it's refcount must
>  		 * always be >= 1.
>  		 */
> -		if (memcg == root_mem_cgroup) {
> +		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
>  			VM_BUG_ON(1);
>  			break;
>  		}
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-03 11:46       ` Vladimir Davydov
  2016-08-03 12:00         ` Michal Hocko
@ 2016-08-03 14:12         ` Johannes Weiner
  2016-08-03 14:31           ` Vladimir Davydov
  1 sibling, 1 reply; 17+ messages in thread
From: Johannes Weiner @ 2016-08-03 14:12 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Michal Hocko, Andrew Morton, stable, linux-mm, linux-kernel

On Wed, Aug 03, 2016 at 02:46:40PM +0300, Vladimir Davydov wrote:
> On Wed, Aug 03, 2016 at 01:09:42PM +0200, Michal Hocko wrote:
> > On Wed 03-08-16 12:50:49, Vladimir Davydov wrote:
> > > On Tue, Aug 02, 2016 at 06:00:26PM +0200, Michal Hocko wrote:
> > > > On Tue 02-08-16 18:00:48, Vladimir Davydov wrote:
> > > ...
> > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > > > index 3be791afd372..4ae12effe347 100644
> > > > > --- a/mm/memcontrol.c
> > > > > +++ b/mm/memcontrol.c
> > > > > @@ -4036,6 +4036,24 @@ static void mem_cgroup_id_get(struct mem_cgroup *memcg)
> > > > >  	atomic_inc(&memcg->id.ref);
> > > > >  }
> > > > >  
> > > > > +static struct mem_cgroup *mem_cgroup_id_get_active(struct mem_cgroup *memcg)
> > > > > +{
> > > > > +	while (!atomic_inc_not_zero(&memcg->id.ref)) {
> > > > > +		/*
> > > > > +		 * The root cgroup cannot be destroyed, so it's refcount must
> > > > > +		 * always be >= 1.
> > > > > +		 */
> > > > > +		if (memcg == root_mem_cgroup) {
> > > > > +			VM_BUG_ON(1);
> > > > > +			break;
> > > > > +		}
> > > > 
> > > > why not simply VM_BUG_ON(memcg == root_mem_cgroup)?
> > > 
> > > Because with DEBUG_VM disabled we could wind up looping forever here if
> > > the refcount of the root_mem_cgroup got screwed up. On production
> > > kernels, it's better to break the loop and carry on closing eyes on
> > > diverging counters rather than getting a lockup.
> > 
> > Wouldn't this just paper over a real bug? Anyway I will not insist but
> > making the code more complex just to pretend we can handle a situation
> > gracefully doesn't sound right to me.
> 
> But we can handle this IMO. AFAICS diverging id refcount will typically
> result in leaking swap charges, which aren't even a real resource. At
> worst, we can leak an offline mem_cgroup, which is also not critical
> enough to crash the production system.

Agreed. If we have the option to detect and warn about the bug, but
can continue to limp along without causing data corruption, then
that's what we should do.

> I see your concern of papering over a bug though. What about adding a
> warning there?
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1c0aa59fd333..8c8e68becee9 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4044,7 +4044,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
>  		 * The root cgroup cannot be destroyed, so it's refcount must
>  		 * always be >= 1.
>  		 */
> -		if (memcg == root_mem_cgroup) {
> +		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
>  			VM_BUG_ON(1);
>  			break;
>  		}

The WARN_ON_ONCE() makes sense to me. But if we warn on all configs
anyway, the VM_BUG_ON() doesn't provide any additional value. Anybody
who is testing new code and enables DEBUG_VM should notice a warning
without requiring the kernel to blow up in their face; it also allows
them to check other state that is not necessarily available in BUG().

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup
  2016-08-03 14:12         ` Johannes Weiner
@ 2016-08-03 14:31           ` Vladimir Davydov
  0 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2016-08-03 14:31 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Michal Hocko, Andrew Morton, stable, linux-mm, linux-kernel

On Wed, Aug 03, 2016 at 10:12:03AM -0400, Johannes Weiner wrote:
> On Wed, Aug 03, 2016 at 02:46:40PM +0300, Vladimir Davydov wrote:
...
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 1c0aa59fd333..8c8e68becee9 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4044,7 +4044,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
> >  		 * The root cgroup cannot be destroyed, so it's refcount must
> >  		 * always be >= 1.
> >  		 */
> > -		if (memcg == root_mem_cgroup) {
> > +		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
> >  			VM_BUG_ON(1);
> >  			break;
> >  		}
> 
> The WARN_ON_ONCE() makes sense to me. But if we warn on all configs
> anyway, the VM_BUG_ON() doesn't provide any additional value. Anybody
> who is testing new code and enables DEBUG_VM should notice a warning
> without requiring the kernel to blow up in their face; it also allows
> them to check other state that is not necessarily available in BUG().

Personally, I prefer to crash the kernel as early as possible when
debugging to get vmcore for further investigation. Judging by
mem_cgroup_update_lru_size(), I'm not alone.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-08-04  3:34 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-02 15:00 [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Vladimir Davydov
2016-08-02 15:00 ` [PATCH v2 2/3] mm: memcontrol: fix memcg id ref counter on swap charge move Vladimir Davydov
2016-08-02 17:22   ` Johannes Weiner
2016-08-02 15:00 ` [PATCH v2 3/3] mm: memcontrol: add sanity checks for memcg->id.ref on get/put Vladimir Davydov
2016-08-02 16:01   ` Michal Hocko
2016-08-02 17:27   ` Johannes Weiner
2016-08-02 16:00 ` [PATCH v2 1/3] mm: memcontrol: fix swap counter leak on swapout from offline cgroup Michal Hocko
2016-08-02 17:33   ` Johannes Weiner
2016-08-02 20:31     ` Michal Hocko
2016-08-03 10:06       ` Vladimir Davydov
2016-08-03  9:50   ` Vladimir Davydov
2016-08-03 11:09     ` Michal Hocko
2016-08-03 11:46       ` Vladimir Davydov
2016-08-03 12:00         ` Michal Hocko
2016-08-03 14:12         ` Johannes Weiner
2016-08-03 14:31           ` Vladimir Davydov
2016-08-02 17:21 ` Johannes Weiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).