All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-18  5:34 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-18  5:34 UTC (permalink / raw)
  To: linux-mm, kamezawa.hiroyu, mhocko, akpm; +Cc: linux-kernel, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
need to check whether cgroup have any change left after that. Also skip
those hstates for which we don't have any charge in this cgroup.

Based on an earlier patch from  Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 mm/hugetlb_cgroup.c |   49 +++++++++++++++++++++----------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b834e8d..b355fb4 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -65,18 +65,6 @@ static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
 	return hugetlb_cgroup_from_cgroup(cg->parent);
 }
 
-static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
-{
-	int idx;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
-
-	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
-		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
-			return true;
-	}
-	return false;
-}
-
 static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
 {
 	int idx;
@@ -159,24 +147,29 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
 {
 	struct hstate *h;
 	struct page *page;
-	int ret = 0, idx = 0;
+	int ret = 0, idx;
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
 
-	do {
-		if (cgroup_task_count(cgroup) ||
-		    !list_empty(&cgroup->children)) {
-			ret = -EBUSY;
-			goto out;
-		}
-		for_each_hstate(h) {
-			spin_lock(&hugetlb_lock);
-			list_for_each_entry(page, &h->hugepage_activelist, lru)
-				hugetlb_cgroup_move_parent(idx, cgroup, page);
 
-			spin_unlock(&hugetlb_lock);
-			idx++;
-		}
-		cond_resched();
-	} while (hugetlb_cgroup_have_usage(cgroup));
+	if (cgroup_task_count(cgroup) ||
+	    !list_empty(&cgroup->children)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	for_each_hstate(h) {
+		/*
+		 * if we don't have any charge, skip this hstate
+		 */
+		idx = hstate_index(h);
+		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
+			continue;
+		spin_lock(&hugetlb_lock);
+		list_for_each_entry(page, &h->hugepage_activelist, lru)
+			hugetlb_cgroup_move_parent(idx, cgroup, page);
+		spin_unlock(&hugetlb_lock);
+		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
+	}
 out:
 	return ret;
 }
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-18  5:34 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-18  5:34 UTC (permalink / raw)
  To: linux-mm, kamezawa.hiroyu, mhocko, akpm; +Cc: linux-kernel, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
need to check whether cgroup have any change left after that. Also skip
those hstates for which we don't have any charge in this cgroup.

Based on an earlier patch from  Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 mm/hugetlb_cgroup.c |   49 +++++++++++++++++++++----------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b834e8d..b355fb4 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -65,18 +65,6 @@ static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
 	return hugetlb_cgroup_from_cgroup(cg->parent);
 }
 
-static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
-{
-	int idx;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
-
-	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
-		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
-			return true;
-	}
-	return false;
-}
-
 static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
 {
 	int idx;
@@ -159,24 +147,29 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
 {
 	struct hstate *h;
 	struct page *page;
-	int ret = 0, idx = 0;
+	int ret = 0, idx;
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
 
-	do {
-		if (cgroup_task_count(cgroup) ||
-		    !list_empty(&cgroup->children)) {
-			ret = -EBUSY;
-			goto out;
-		}
-		for_each_hstate(h) {
-			spin_lock(&hugetlb_lock);
-			list_for_each_entry(page, &h->hugepage_activelist, lru)
-				hugetlb_cgroup_move_parent(idx, cgroup, page);
 
-			spin_unlock(&hugetlb_lock);
-			idx++;
-		}
-		cond_resched();
-	} while (hugetlb_cgroup_have_usage(cgroup));
+	if (cgroup_task_count(cgroup) ||
+	    !list_empty(&cgroup->children)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	for_each_hstate(h) {
+		/*
+		 * if we don't have any charge, skip this hstate
+		 */
+		idx = hstate_index(h);
+		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
+			continue;
+		spin_lock(&hugetlb_lock);
+		list_for_each_entry(page, &h->hugepage_activelist, lru)
+			hugetlb_cgroup_move_parent(idx, cgroup, page);
+		spin_unlock(&hugetlb_lock);
+		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
+	}
 out:
 	return ret;
 }
-- 
1.7.10

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-18  5:34 ` Aneesh Kumar K.V
@ 2012-07-18  7:24   ` Wanpeng Li
  -1 siblings, 0 replies; 18+ messages in thread
From: Wanpeng Li @ 2012-07-18  7:24 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, kamezawa.hiroyu, mhocko, akpm
  Cc: linux-kernel, Wanpeng Li

On Wed, Jul 18, 2012 at 11:04:09AM +0530, Aneesh Kumar K.V wrote:
>From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
>Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>need to check whether cgroup have any change left after that. Also skip
>those hstates for which we don't have any charge in this cgroup.
>
>Based on an earlier patch from  Wanpeng Li <liwanp@linux.vnet.ibm.com>
>Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>

>---
> mm/hugetlb_cgroup.c |   49 +++++++++++++++++++++----------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
>diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
>index b834e8d..b355fb4 100644
>--- a/mm/hugetlb_cgroup.c
>+++ b/mm/hugetlb_cgroup.c
>@@ -65,18 +65,6 @@ static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
> 	return hugetlb_cgroup_from_cgroup(cg->parent);
> }
>
>-static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
>-{
>-	int idx;
>-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
>-
>-	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
>-		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
>-			return true;
>-	}
>-	return false;
>-}
>-
> static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
> {
> 	int idx;
>@@ -159,24 +147,29 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
> {
> 	struct hstate *h;
> 	struct page *page;
>-	int ret = 0, idx = 0;
>+	int ret = 0, idx;
>+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
>
>-	do {
>-		if (cgroup_task_count(cgroup) ||
>-		    !list_empty(&cgroup->children)) {
>-			ret = -EBUSY;
>-			goto out;
>-		}
>-		for_each_hstate(h) {
>-			spin_lock(&hugetlb_lock);
>-			list_for_each_entry(page, &h->hugepage_activelist, lru)
>-				hugetlb_cgroup_move_parent(idx, cgroup, page);
>
>-			spin_unlock(&hugetlb_lock);
>-			idx++;
>-		}
>-		cond_resched();
>-	} while (hugetlb_cgroup_have_usage(cgroup));
>+	if (cgroup_task_count(cgroup) ||
>+	    !list_empty(&cgroup->children)) {
>+		ret = -EBUSY;
>+		goto out;
>+	}
>+
>+	for_each_hstate(h) {
>+		/*
>+		 * if we don't have any charge, skip this hstate
>+		 */
>+		idx = hstate_index(h);
>+		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>+			continue;
>+		spin_lock(&hugetlb_lock);
>+		list_for_each_entry(page, &h->hugepage_activelist, lru)
>+			hugetlb_cgroup_move_parent(idx, cgroup, page);
>+		spin_unlock(&hugetlb_lock);
>+		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>+	}
> out:
> 	return ret;
> }
>-- 
>1.7.10
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-18  7:24   ` Wanpeng Li
  0 siblings, 0 replies; 18+ messages in thread
From: Wanpeng Li @ 2012-07-18  7:24 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linux-mm, kamezawa.hiroyu, mhocko, akpm
  Cc: linux-kernel, Wanpeng Li

On Wed, Jul 18, 2012 at 11:04:09AM +0530, Aneesh Kumar K.V wrote:
>From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
>Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>need to check whether cgroup have any change left after that. Also skip
>those hstates for which we don't have any charge in this cgroup.
>
>Based on an earlier patch from  Wanpeng Li <liwanp@linux.vnet.ibm.com>
>Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>

>---
> mm/hugetlb_cgroup.c |   49 +++++++++++++++++++++----------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
>diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
>index b834e8d..b355fb4 100644
>--- a/mm/hugetlb_cgroup.c
>+++ b/mm/hugetlb_cgroup.c
>@@ -65,18 +65,6 @@ static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
> 	return hugetlb_cgroup_from_cgroup(cg->parent);
> }
>
>-static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
>-{
>-	int idx;
>-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
>-
>-	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
>-		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
>-			return true;
>-	}
>-	return false;
>-}
>-
> static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
> {
> 	int idx;
>@@ -159,24 +147,29 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
> {
> 	struct hstate *h;
> 	struct page *page;
>-	int ret = 0, idx = 0;
>+	int ret = 0, idx;
>+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup);
>
>-	do {
>-		if (cgroup_task_count(cgroup) ||
>-		    !list_empty(&cgroup->children)) {
>-			ret = -EBUSY;
>-			goto out;
>-		}
>-		for_each_hstate(h) {
>-			spin_lock(&hugetlb_lock);
>-			list_for_each_entry(page, &h->hugepage_activelist, lru)
>-				hugetlb_cgroup_move_parent(idx, cgroup, page);
>
>-			spin_unlock(&hugetlb_lock);
>-			idx++;
>-		}
>-		cond_resched();
>-	} while (hugetlb_cgroup_have_usage(cgroup));
>+	if (cgroup_task_count(cgroup) ||
>+	    !list_empty(&cgroup->children)) {
>+		ret = -EBUSY;
>+		goto out;
>+	}
>+
>+	for_each_hstate(h) {
>+		/*
>+		 * if we don't have any charge, skip this hstate
>+		 */
>+		idx = hstate_index(h);
>+		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>+			continue;
>+		spin_lock(&hugetlb_lock);
>+		list_for_each_entry(page, &h->hugepage_activelist, lru)
>+			hugetlb_cgroup_move_parent(idx, cgroup, page);
>+		spin_unlock(&hugetlb_lock);
>+		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>+	}
> out:
> 	return ret;
> }
>-- 
>1.7.10
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@kvack.org.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-18  5:34 ` Aneesh Kumar K.V
@ 2012-07-18 21:26   ` Andrew Morton
  -1 siblings, 0 replies; 18+ messages in thread
From: Andrew Morton @ 2012-07-18 21:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

On Wed, 18 Jul 2012 11:04:09 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
> need to check whether cgroup have any change left after that. Also skip
> those hstates for which we don't have any charge in this cgroup.
> 
> ...
>
> +	for_each_hstate(h) {
> +		/*
> +		 * if we don't have any charge, skip this hstate
> +		 */
> +		idx = hstate_index(h);
> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
> +			continue;
> +		spin_lock(&hugetlb_lock);
> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
> +		spin_unlock(&hugetlb_lock);
> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
> +	}
>  out:
>  	return ret;
>  }

This looks fishy.

We test RES_USAGE before taking hugetlb_lock.  What prevents some other
thread from increasing RES_USAGE after that test?

After walking the list we test RES_USAGE after dropping hugetlb_lock. 
What prevents another thread from incrementing RES_USAGE before that
test, triggering the BUG?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-18 21:26   ` Andrew Morton
  0 siblings, 0 replies; 18+ messages in thread
From: Andrew Morton @ 2012-07-18 21:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

On Wed, 18 Jul 2012 11:04:09 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
> need to check whether cgroup have any change left after that. Also skip
> those hstates for which we don't have any charge in this cgroup.
> 
> ...
>
> +	for_each_hstate(h) {
> +		/*
> +		 * if we don't have any charge, skip this hstate
> +		 */
> +		idx = hstate_index(h);
> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
> +			continue;
> +		spin_lock(&hugetlb_lock);
> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
> +		spin_unlock(&hugetlb_lock);
> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
> +	}
>  out:
>  	return ret;
>  }

This looks fishy.

We test RES_USAGE before taking hugetlb_lock.  What prevents some other
thread from increasing RES_USAGE after that test?

After walking the list we test RES_USAGE after dropping hugetlb_lock. 
What prevents another thread from incrementing RES_USAGE before that
test, triggering the BUG?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-18 21:26   ` Andrew Morton
@ 2012-07-19  2:55     ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19  2:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

Andrew Morton <akpm@linux-foundation.org> writes:

> On Wed, 18 Jul 2012 11:04:09 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>> need to check whether cgroup have any change left after that. Also skip
>> those hstates for which we don't have any charge in this cgroup.
>> 
>> ...
>>
>> +	for_each_hstate(h) {
>> +		/*
>> +		 * if we don't have any charge, skip this hstate
>> +		 */
>> +		idx = hstate_index(h);
>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>> +			continue;
>> +		spin_lock(&hugetlb_lock);
>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>> +		spin_unlock(&hugetlb_lock);
>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>> +	}
>>  out:
>>  	return ret;
>>  }
>
> This looks fishy.
>
> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
> thread from increasing RES_USAGE after that test?
>
> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
> What prevents another thread from incrementing RES_USAGE before that
> test, triggering the BUG?

IIUC core cgroup will prevent a new task getting added to the cgroup
when we are in pre_destroy. Since we already check that the cgroup doesn't
have any task, the RES_USAGE cannot increase in pre_destroy.

-aneesh


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19  2:55     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19  2:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

Andrew Morton <akpm@linux-foundation.org> writes:

> On Wed, 18 Jul 2012 11:04:09 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>> need to check whether cgroup have any change left after that. Also skip
>> those hstates for which we don't have any charge in this cgroup.
>> 
>> ...
>>
>> +	for_each_hstate(h) {
>> +		/*
>> +		 * if we don't have any charge, skip this hstate
>> +		 */
>> +		idx = hstate_index(h);
>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>> +			continue;
>> +		spin_lock(&hugetlb_lock);
>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>> +		spin_unlock(&hugetlb_lock);
>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>> +	}
>>  out:
>>  	return ret;
>>  }
>
> This looks fishy.
>
> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
> thread from increasing RES_USAGE after that test?
>
> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
> What prevents another thread from incrementing RES_USAGE before that
> test, triggering the BUG?

IIUC core cgroup will prevent a new task getting added to the cgroup
when we are in pre_destroy. Since we already check that the cgroup doesn't
have any task, the RES_USAGE cannot increase in pre_destroy.

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-19  2:55     ` Aneesh Kumar K.V
@ 2012-07-19  6:59       ` Li Zefan
  -1 siblings, 0 replies; 18+ messages in thread
From: Li Zefan @ 2012-07-19  6:59 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Andrew Morton, linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

on 2012/7/19 10:55, Aneesh Kumar K.V wrote:

> Andrew Morton <akpm@linux-foundation.org> writes:
> 
>> On Wed, 18 Jul 2012 11:04:09 +0530
>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>
>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>> need to check whether cgroup have any change left after that. Also skip
>>> those hstates for which we don't have any charge in this cgroup.
>>>
>>> ...
>>>
>>> +	for_each_hstate(h) {
>>> +		/*
>>> +		 * if we don't have any charge, skip this hstate
>>> +		 */
>>> +		idx = hstate_index(h);
>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>> +			continue;
>>> +		spin_lock(&hugetlb_lock);
>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>> +		spin_unlock(&hugetlb_lock);
>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>> +	}
>>>  out:
>>>  	return ret;
>>>  }
>>
>> This looks fishy.
>>
>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>> thread from increasing RES_USAGE after that test?
>>
>> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
>> What prevents another thread from incrementing RES_USAGE before that
>> test, triggering the BUG?
> 
> IIUC core cgroup will prevent a new task getting added to the cgroup
> when we are in pre_destroy. Since we already check that the cgroup doesn't
> have any task, the RES_USAGE cannot increase in pre_destroy.
> 


You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
the lock after that, so a task can be attached to the cgroup in this interval.

See 3fa59dfbc3b223f02c26593be69ce6fc9a940405 ("cgroup: fix potential deadlock in pre_destroy")

But I think the memcg->pre_destroy has been reworked and now we can safely hold
cgroup_lock when calling the callback, and this can make the code a bit simpler.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19  6:59       ` Li Zefan
  0 siblings, 0 replies; 18+ messages in thread
From: Li Zefan @ 2012-07-19  6:59 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Andrew Morton, linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

on 2012/7/19 10:55, Aneesh Kumar K.V wrote:

> Andrew Morton <akpm@linux-foundation.org> writes:
> 
>> On Wed, 18 Jul 2012 11:04:09 +0530
>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>
>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>> need to check whether cgroup have any change left after that. Also skip
>>> those hstates for which we don't have any charge in this cgroup.
>>>
>>> ...
>>>
>>> +	for_each_hstate(h) {
>>> +		/*
>>> +		 * if we don't have any charge, skip this hstate
>>> +		 */
>>> +		idx = hstate_index(h);
>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>> +			continue;
>>> +		spin_lock(&hugetlb_lock);
>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>> +		spin_unlock(&hugetlb_lock);
>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>> +	}
>>>  out:
>>>  	return ret;
>>>  }
>>
>> This looks fishy.
>>
>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>> thread from increasing RES_USAGE after that test?
>>
>> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
>> What prevents another thread from incrementing RES_USAGE before that
>> test, triggering the BUG?
> 
> IIUC core cgroup will prevent a new task getting added to the cgroup
> when we are in pre_destroy. Since we already check that the cgroup doesn't
> have any task, the RES_USAGE cannot increase in pre_destroy.
> 


You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
the lock after that, so a task can be attached to the cgroup in this interval.

See 3fa59dfbc3b223f02c26593be69ce6fc9a940405 ("cgroup: fix potential deadlock in pre_destroy")

But I think the memcg->pre_destroy has been reworked and now we can safely hold
cgroup_lock when calling the callback, and this can make the code a bit simpler.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-19  6:59       ` Li Zefan
@ 2012-07-19  9:41         ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19  9:41 UTC (permalink / raw)
  To: Li Zefan; +Cc: Andrew Morton, linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

Li Zefan <lizefan@huawei.com> writes:

> on 2012/7/19 10:55, Aneesh Kumar K.V wrote:
>
>> Andrew Morton <akpm@linux-foundation.org> writes:
>> 
>>> On Wed, 18 Jul 2012 11:04:09 +0530
>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>>
>>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>>> need to check whether cgroup have any change left after that. Also skip
>>>> those hstates for which we don't have any charge in this cgroup.
>>>>
>>>> ...
>>>>
>>>> +	for_each_hstate(h) {
>>>> +		/*
>>>> +		 * if we don't have any charge, skip this hstate
>>>> +		 */
>>>> +		idx = hstate_index(h);
>>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>>> +			continue;
>>>> +		spin_lock(&hugetlb_lock);
>>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>>> +		spin_unlock(&hugetlb_lock);
>>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>>> +	}
>>>>  out:
>>>>  	return ret;
>>>>  }
>>>
>>> This looks fishy.
>>>
>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>> thread from increasing RES_USAGE after that test?
>>>
>>> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
>>> What prevents another thread from incrementing RES_USAGE before that
>>> test, triggering the BUG?
>> 
>> IIUC core cgroup will prevent a new task getting added to the cgroup
>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>> have any task, the RES_USAGE cannot increase in pre_destroy.
>> 
>
>
> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
> the lock after that, so a task can be attached to the cgroup in this interval.
>

But that means rmdir can be racy right ? What happens if the task got
added, allocated few pages and then moved out ? We still would have task
count 0 but few pages, which we missed to to move to parent cgroup. 

-aneesh


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19  9:41         ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19  9:41 UTC (permalink / raw)
  To: Li Zefan; +Cc: Andrew Morton, linux-mm, kamezawa.hiroyu, mhocko, linux-kernel

Li Zefan <lizefan@huawei.com> writes:

> on 2012/7/19 10:55, Aneesh Kumar K.V wrote:
>
>> Andrew Morton <akpm@linux-foundation.org> writes:
>> 
>>> On Wed, 18 Jul 2012 11:04:09 +0530
>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>>
>>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>>> need to check whether cgroup have any change left after that. Also skip
>>>> those hstates for which we don't have any charge in this cgroup.
>>>>
>>>> ...
>>>>
>>>> +	for_each_hstate(h) {
>>>> +		/*
>>>> +		 * if we don't have any charge, skip this hstate
>>>> +		 */
>>>> +		idx = hstate_index(h);
>>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>>> +			continue;
>>>> +		spin_lock(&hugetlb_lock);
>>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>>> +		spin_unlock(&hugetlb_lock);
>>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>>> +	}
>>>>  out:
>>>>  	return ret;
>>>>  }
>>>
>>> This looks fishy.
>>>
>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>> thread from increasing RES_USAGE after that test?
>>>
>>> After walking the list we test RES_USAGE after dropping hugetlb_lock. 
>>> What prevents another thread from incrementing RES_USAGE before that
>>> test, triggering the BUG?
>> 
>> IIUC core cgroup will prevent a new task getting added to the cgroup
>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>> have any task, the RES_USAGE cannot increase in pre_destroy.
>> 
>
>
> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
> the lock after that, so a task can be attached to the cgroup in this interval.
>

But that means rmdir can be racy right ? What happens if the task got
added, allocated few pages and then moved out ? We still would have task
count 0 but few pages, which we missed to to move to parent cgroup. 

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-19  9:41         ` Aneesh Kumar K.V
@ 2012-07-19 10:25           ` Kamezawa Hiroyuki
  -1 siblings, 0 replies; 18+ messages in thread
From: Kamezawa Hiroyuki @ 2012-07-19 10:25 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Li Zefan, Andrew Morton, linux-mm, mhocko, linux-kernel

(2012/07/19 18:41), Aneesh Kumar K.V wrote:
> Li Zefan <lizefan@huawei.com> writes:
>
>> on 2012/7/19 10:55, Aneesh Kumar K.V wrote:
>>
>>> Andrew Morton <akpm@linux-foundation.org> writes:
>>>
>>>> On Wed, 18 Jul 2012 11:04:09 +0530
>>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>>>
>>>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>>>> need to check whether cgroup have any change left after that. Also skip
>>>>> those hstates for which we don't have any charge in this cgroup.
>>>>>
>>>>> ...
>>>>>
>>>>> +	for_each_hstate(h) {
>>>>> +		/*
>>>>> +		 * if we don't have any charge, skip this hstate
>>>>> +		 */
>>>>> +		idx = hstate_index(h);
>>>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>>>> +			continue;
>>>>> +		spin_lock(&hugetlb_lock);
>>>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>>>> +		spin_unlock(&hugetlb_lock);
>>>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>>>> +	}
>>>>>   out:
>>>>>   	return ret;
>>>>>   }
>>>>
>>>> This looks fishy.
>>>>
>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>>> thread from increasing RES_USAGE after that test?
>>>>
>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
>>>> What prevents another thread from incrementing RES_USAGE before that
>>>> test, triggering the BUG?
>>>
>>> IIUC core cgroup will prevent a new task getting added to the cgroup
>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>>> have any task, the RES_USAGE cannot increase in pre_destroy.
>>>
>>
>>
>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
>> the lock after that, so a task can be attached to the cgroup in this interval.
>>
>
> But that means rmdir can be racy right ? What happens if the task got
> added, allocated few pages and then moved out ? We still would have task
> count 0 but few pages, which we missed to to move to parent cgroup.
>

That's a problem even if it's verrrry unlikely.
I'd like to look into it and fix the race in cgroup layer.
But I'm sorry I'm a bit busy in these days...

Thanks,
-Kame



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19 10:25           ` Kamezawa Hiroyuki
  0 siblings, 0 replies; 18+ messages in thread
From: Kamezawa Hiroyuki @ 2012-07-19 10:25 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Li Zefan, Andrew Morton, linux-mm, mhocko, linux-kernel

(2012/07/19 18:41), Aneesh Kumar K.V wrote:
> Li Zefan <lizefan@huawei.com> writes:
>
>> on 2012/7/19 10:55, Aneesh Kumar K.V wrote:
>>
>>> Andrew Morton <akpm@linux-foundation.org> writes:
>>>
>>>> On Wed, 18 Jul 2012 11:04:09 +0530
>>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>>>
>>>>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Since we cannot fail in hugetlb_cgroup_move_parent, we don't really
>>>>> need to check whether cgroup have any change left after that. Also skip
>>>>> those hstates for which we don't have any charge in this cgroup.
>>>>>
>>>>> ...
>>>>>
>>>>> +	for_each_hstate(h) {
>>>>> +		/*
>>>>> +		 * if we don't have any charge, skip this hstate
>>>>> +		 */
>>>>> +		idx = hstate_index(h);
>>>>> +		if (res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE) == 0)
>>>>> +			continue;
>>>>> +		spin_lock(&hugetlb_lock);
>>>>> +		list_for_each_entry(page, &h->hugepage_activelist, lru)
>>>>> +			hugetlb_cgroup_move_parent(idx, cgroup, page);
>>>>> +		spin_unlock(&hugetlb_lock);
>>>>> +		VM_BUG_ON(res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE));
>>>>> +	}
>>>>>   out:
>>>>>   	return ret;
>>>>>   }
>>>>
>>>> This looks fishy.
>>>>
>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>>> thread from increasing RES_USAGE after that test?
>>>>
>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
>>>> What prevents another thread from incrementing RES_USAGE before that
>>>> test, triggering the BUG?
>>>
>>> IIUC core cgroup will prevent a new task getting added to the cgroup
>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>>> have any task, the RES_USAGE cannot increase in pre_destroy.
>>>
>>
>>
>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
>> the lock after that, so a task can be attached to the cgroup in this interval.
>>
>
> But that means rmdir can be racy right ? What happens if the task got
> added, allocated few pages and then moved out ? We still would have task
> count 0 but few pages, which we missed to to move to parent cgroup.
>

That's a problem even if it's verrrry unlikely.
I'd like to look into it and fix the race in cgroup layer.
But I'm sorry I'm a bit busy in these days...

Thanks,
-Kame


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-19 10:25           ` Kamezawa Hiroyuki
@ 2012-07-19 11:26             ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19 11:26 UTC (permalink / raw)
  To: Kamezawa Hiroyuki; +Cc: Li Zefan, Andrew Morton, linux-mm, mhocko, linux-kernel

Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> writes:

>>>>>
>>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>>>> thread from increasing RES_USAGE after that test?
>>>>>
>>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
>>>>> What prevents another thread from incrementing RES_USAGE before that
>>>>> test, triggering the BUG?
>>>>
>>>> IIUC core cgroup will prevent a new task getting added to the cgroup
>>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>>>> have any task, the RES_USAGE cannot increase in pre_destroy.
>>>>
>>>
>>>
>>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
>>> the lock after that, so a task can be attached to the cgroup in this interval.
>>>
>>
>> But that means rmdir can be racy right ? What happens if the task got
>> added, allocated few pages and then moved out ? We still would have task
>> count 0 but few pages, which we missed to to move to parent cgroup.
>>
>
> That's a problem even if it's verrrry unlikely.
> I'd like to look into it and fix the race in cgroup layer.
> But I'm sorry I'm a bit busy in these days...
>

How about moving that mutex_unlock(&cgroup_mutex) to memcg callback ? That
can be a patch for 3.5 ? 

-aneesh
 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19 11:26             ` Aneesh Kumar K.V
  0 siblings, 0 replies; 18+ messages in thread
From: Aneesh Kumar K.V @ 2012-07-19 11:26 UTC (permalink / raw)
  To: Kamezawa Hiroyuki; +Cc: Li Zefan, Andrew Morton, linux-mm, mhocko, linux-kernel

Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> writes:

>>>>>
>>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
>>>>> thread from increasing RES_USAGE after that test?
>>>>>
>>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
>>>>> What prevents another thread from incrementing RES_USAGE before that
>>>>> test, triggering the BUG?
>>>>
>>>> IIUC core cgroup will prevent a new task getting added to the cgroup
>>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
>>>> have any task, the RES_USAGE cannot increase in pre_destroy.
>>>>
>>>
>>>
>>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
>>> the lock after that, so a task can be attached to the cgroup in this interval.
>>>
>>
>> But that means rmdir can be racy right ? What happens if the task got
>> added, allocated few pages and then moved out ? We still would have task
>> count 0 but few pages, which we missed to to move to parent cgroup.
>>
>
> That's a problem even if it's verrrry unlikely.
> I'd like to look into it and fix the race in cgroup layer.
> But I'm sorry I'm a bit busy in these days...
>

How about moving that mutex_unlock(&cgroup_mutex) to memcg callback ? That
can be a patch for 3.5 ? 

-aneesh
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
  2012-07-19 11:26             ` Aneesh Kumar K.V
@ 2012-07-19 11:42               ` Michal Hocko
  -1 siblings, 0 replies; 18+ messages in thread
From: Michal Hocko @ 2012-07-19 11:42 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Kamezawa Hiroyuki, Li Zefan, Andrew Morton, linux-mm, linux-kernel

On Thu 19-07-12 16:56:18, Aneesh Kumar K.V wrote:
> Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> writes:
> 
> >>>>>
> >>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
> >>>>> thread from increasing RES_USAGE after that test?
> >>>>>
> >>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
> >>>>> What prevents another thread from incrementing RES_USAGE before that
> >>>>> test, triggering the BUG?
> >>>>
> >>>> IIUC core cgroup will prevent a new task getting added to the cgroup
> >>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
> >>>> have any task, the RES_USAGE cannot increase in pre_destroy.
> >>>>
> >>>
> >>>
> >>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
> >>> the lock after that, so a task can be attached to the cgroup in this interval.
> >>>
> >>
> >> But that means rmdir can be racy right ? What happens if the task got
> >> added, allocated few pages and then moved out ? We still would have task
> >> count 0 but few pages, which we missed to to move to parent cgroup.
> >>
> >
> > That's a problem even if it's verrrry unlikely.
> > I'd like to look into it and fix the race in cgroup layer.
> > But I'm sorry I'm a bit busy in these days...
> >
> 
> How about moving that mutex_unlock(&cgroup_mutex) to memcg callback ? That
> can be a patch for 3.5 ? 

Bahh, I have just posted a follow up on mm-commits email exactly about
this. Sorry I have missed that the discussion is still ongoing. I have
posted also something I guess should help. Can we follow up on that one
or should I post the patch here as well?

> 
> -aneesh
>  
> 

-- 
Michal Hocko
SUSE Labs
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9    
Czech Republic

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] hugetlb/cgroup: Simplify pre_destroy callback
@ 2012-07-19 11:42               ` Michal Hocko
  0 siblings, 0 replies; 18+ messages in thread
From: Michal Hocko @ 2012-07-19 11:42 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Kamezawa Hiroyuki, Li Zefan, Andrew Morton, linux-mm, linux-kernel

On Thu 19-07-12 16:56:18, Aneesh Kumar K.V wrote:
> Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> writes:
> 
> >>>>>
> >>>>> We test RES_USAGE before taking hugetlb_lock.  What prevents some other
> >>>>> thread from increasing RES_USAGE after that test?
> >>>>>
> >>>>> After walking the list we test RES_USAGE after dropping hugetlb_lock.
> >>>>> What prevents another thread from incrementing RES_USAGE before that
> >>>>> test, triggering the BUG?
> >>>>
> >>>> IIUC core cgroup will prevent a new task getting added to the cgroup
> >>>> when we are in pre_destroy. Since we already check that the cgroup doesn't
> >>>> have any task, the RES_USAGE cannot increase in pre_destroy.
> >>>>
> >>>
> >>>
> >>> You're wrong here. We release cgroup_lock before calling pre_destroy and retrieve
> >>> the lock after that, so a task can be attached to the cgroup in this interval.
> >>>
> >>
> >> But that means rmdir can be racy right ? What happens if the task got
> >> added, allocated few pages and then moved out ? We still would have task
> >> count 0 but few pages, which we missed to to move to parent cgroup.
> >>
> >
> > That's a problem even if it's verrrry unlikely.
> > I'd like to look into it and fix the race in cgroup layer.
> > But I'm sorry I'm a bit busy in these days...
> >
> 
> How about moving that mutex_unlock(&cgroup_mutex) to memcg callback ? That
> can be a patch for 3.5 ? 

Bahh, I have just posted a follow up on mm-commits email exactly about
this. Sorry I have missed that the discussion is still ongoing. I have
posted also something I guess should help. Can we follow up on that one
or should I post the patch here as well?

> 
> -aneesh
>  
> 

-- 
Michal Hocko
SUSE Labs
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9    
Czech Republic

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2012-07-19 11:42 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-18  5:34 [PATCH] hugetlb/cgroup: Simplify pre_destroy callback Aneesh Kumar K.V
2012-07-18  5:34 ` Aneesh Kumar K.V
2012-07-18  7:24 ` Wanpeng Li
2012-07-18  7:24   ` Wanpeng Li
2012-07-18 21:26 ` Andrew Morton
2012-07-18 21:26   ` Andrew Morton
2012-07-19  2:55   ` Aneesh Kumar K.V
2012-07-19  2:55     ` Aneesh Kumar K.V
2012-07-19  6:59     ` Li Zefan
2012-07-19  6:59       ` Li Zefan
2012-07-19  9:41       ` Aneesh Kumar K.V
2012-07-19  9:41         ` Aneesh Kumar K.V
2012-07-19 10:25         ` Kamezawa Hiroyuki
2012-07-19 10:25           ` Kamezawa Hiroyuki
2012-07-19 11:26           ` Aneesh Kumar K.V
2012-07-19 11:26             ` Aneesh Kumar K.V
2012-07-19 11:42             ` Michal Hocko
2012-07-19 11:42               ` Michal Hocko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.