* [PATCH 1/6] memcg: simplify move_account() check.
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
@ 2012-02-06 10:07 ` KAMEZAWA Hiroyuki
2012-02-06 22:38 ` Andrew Morton
2012-02-06 10:09 ` [RFC] [PATCH 2/6 v3] memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat) KAMEZAWA Hiroyuki
` (4 subsequent siblings)
5 siblings, 1 reply; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:07 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
>From c75cc843ca0cb36de97ab814e59fb4ab7b1ffbd1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 10:02:39 +0900
Subject: [PATCH 1/6] memcg: simplify move_account() check.
In memcg, for avoiding take-lock-irq-off at accessing page_cgroup,
a logic, flag + rcu_read_lock(), is used. This works as following
CPU-A CPU-B
rcu_read_lock()
set flag
if(flag is set)
take heavy lock
do job.
synchronize_rcu() rcu_read_unlock()
In recent discussion, it's argued that using per-cpu value for this
flag just complicates the code because 'set flag' is very rare.
This patch changes 'flag' implementation from percpu to atomic_t.
This will be much simpler.
memcontrol.c | 65 ++++++++++++++++++++++-------------------------------------
1 file changed, 25 insertions(+), 40 deletions(-)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
mm/memcontrol.c | 65 +++++++++++++++++++++---------------------------------
1 files changed, 25 insertions(+), 40 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab315ab..9bf9d1c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -89,7 +89,6 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
- MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
MEM_CGROUP_STAT_NSTATS,
};
@@ -278,6 +277,10 @@ struct mem_cgroup {
*/
unsigned long move_charge_at_immigrate;
/*
+ * set > 0 if pages under this cgroup are moving to other cgroup.
+ */
+ atomic_t moving_account;
+ /*
* percpu counter.
*/
struct mem_cgroup_stat_cpu *stat;
@@ -1253,35 +1256,31 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
return memcg->swappiness;
}
+/*
+ * memcg->moving_account is used for checking possibility that some thread is
+ * calling move_account(). When a thread on CPU-A starts moving pages under
+ * a memcg, other threads sholud check memcg->moving_account under
+ * rcu_read_lock(), like this:
+ *
+ * CPU-A CPU-B
+ * rcu_read_lock()
+ * memcg->moving_account+1 if (memcg->mocing_account)
+ * take havier locks.
+ * syncronize_rcu() update something.
+ * rcu_read_unlock()
+ * start move here.
+ */
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
- int cpu;
-
- get_online_cpus();
- spin_lock(&memcg->pcp_counter_lock);
- for_each_online_cpu(cpu)
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
- memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
- spin_unlock(&memcg->pcp_counter_lock);
- put_online_cpus();
-
+ atomic_inc(&memcg->moving_account);
synchronize_rcu();
}
static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
- int cpu;
-
- if (!memcg)
- return;
- get_online_cpus();
- spin_lock(&memcg->pcp_counter_lock);
- for_each_online_cpu(cpu)
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
- memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
- spin_unlock(&memcg->pcp_counter_lock);
- put_online_cpus();
+ if (memcg)
+ atomic_dec(&memcg->moving_account);
}
/*
* 2 routines for checking "mem" is under move_account() or not.
@@ -1298,7 +1297,7 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
{
VM_BUG_ON(!rcu_read_lock_held());
- return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
+ return atomic_read(&memcg->moving_account);
}
static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
@@ -1849,8 +1848,8 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
* by flags.
*
* Considering "move", this is an only case we see a race. To make the race
- * small, we check MEM_CGROUP_ON_MOVE percpu value and detect there are
- * possibility of race condition. If there is, we take a lock.
+ * small, we check mm->moving_account and detect there are possibility of race
+ * If there is, we take a lock.
*/
void mem_cgroup_update_page_stat(struct page *page,
@@ -2068,17 +2067,6 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
per_cpu(memcg->stat->events[i], cpu) = 0;
memcg->nocpu_base.events[i] += x;
}
- /* need to clear ON_MOVE value, works as a kind of lock. */
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
- spin_unlock(&memcg->pcp_counter_lock);
-}
-
-static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
-{
- int idx = MEM_CGROUP_ON_MOVE;
-
- spin_lock(&memcg->pcp_counter_lock);
- per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
spin_unlock(&memcg->pcp_counter_lock);
}
@@ -2090,11 +2078,8 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
struct memcg_stock_pcp *stock;
struct mem_cgroup *iter;
- if ((action == CPU_ONLINE)) {
- for_each_mem_cgroup(iter)
- synchronize_mem_cgroup_on_move(iter, cpu);
+ if ((action == CPU_ONLINE))
return NOTIFY_OK;
- }
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
return NOTIFY_OK;
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/6] memcg: simplify move_account() check.
2012-02-06 10:07 ` [PATCH 1/6] memcg: simplify move_account() check KAMEZAWA Hiroyuki
@ 2012-02-06 22:38 ` Andrew Morton
2012-02-07 0:19 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2012-02-06 22:38 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han, Hugh Dickins
On Mon, 6 Feb 2012 19:07:59 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> >From c75cc843ca0cb36de97ab814e59fb4ab7b1ffbd1 Mon Sep 17 00:00:00 2001
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Date: Thu, 2 Feb 2012 10:02:39 +0900
> Subject: [PATCH 1/6] memcg: simplify move_account() check.
>
> In memcg, for avoiding take-lock-irq-off at accessing page_cgroup,
> a logic, flag + rcu_read_lock(), is used. This works as following
>
> CPU-A CPU-B
> rcu_read_lock()
> set flag
> if(flag is set)
> take heavy lock
> do job.
> synchronize_rcu() rcu_read_unlock()
>
> In recent discussion, it's argued that using per-cpu value for this
> flag just complicates the code because 'set flag' is very rare.
>
> This patch changes 'flag' implementation from percpu to atomic_t.
> This will be much simpler.
>
To me, "RFC" says "might not be ready for merging yet". You're up to
v3 - why is it still RFC? You're still expecting to make significant
changes?
>
> }
> +/*
> + * memcg->moving_account is used for checking possibility that some thread is
> + * calling move_account(). When a thread on CPU-A starts moving pages under
> + * a memcg, other threads sholud check memcg->moving_account under
"should"
> + * rcu_read_lock(), like this:
> + *
> + * CPU-A CPU-B
> + * rcu_read_lock()
> + * memcg->moving_account+1 if (memcg->mocing_account)
> + * take havier locks.
> + * syncronize_rcu() update something.
> + * rcu_read_unlock()
> + * start move here.
> + */
>
> static void mem_cgroup_start_move(struct mem_cgroup *memcg)
> {
> - int cpu;
> -
> - get_online_cpus();
> - spin_lock(&memcg->pcp_counter_lock);
> - for_each_online_cpu(cpu)
> - per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
> - memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
> - spin_unlock(&memcg->pcp_counter_lock);
> - put_online_cpus();
> -
> + atomic_inc(&memcg->moving_account);
> synchronize_rcu();
> }
>
> static void mem_cgroup_end_move(struct mem_cgroup *memcg)
> {
> - int cpu;
> -
> - if (!memcg)
> - return;
> - get_online_cpus();
> - spin_lock(&memcg->pcp_counter_lock);
> - for_each_online_cpu(cpu)
> - per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
> - memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
> - spin_unlock(&memcg->pcp_counter_lock);
> - put_online_cpus();
> + if (memcg)
> + atomic_dec(&memcg->moving_account);
> }
It's strange that end_move handles a NULL memcg but start_move does not.
> /*
> * 2 routines for checking "mem" is under move_account() or not.
> @@ -1298,7 +1297,7 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
> static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
> {
> VM_BUG_ON(!rcu_read_lock_held());
> - return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
> + return atomic_read(&memcg->moving_account);
> }
So a bool-returning function can return something > 1?
I don't know what the compiler would make of that. Presumably "if (b)"
will work OK, but will "if (b1 == b2)"?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/6] memcg: simplify move_account() check.
2012-02-06 22:38 ` Andrew Morton
@ 2012-02-07 0:19 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-07 0:19 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han, Hugh Dickins
On Mon, 6 Feb 2012 14:38:53 -0800
Andrew Morton <akpm@linux-foundation.org> wrote:
> On Mon, 6 Feb 2012 19:07:59 +0900
> KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
>
> > >From c75cc843ca0cb36de97ab814e59fb4ab7b1ffbd1 Mon Sep 17 00:00:00 2001
> > From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Date: Thu, 2 Feb 2012 10:02:39 +0900
> > Subject: [PATCH 1/6] memcg: simplify move_account() check.
> >
> > In memcg, for avoiding take-lock-irq-off at accessing page_cgroup,
> > a logic, flag + rcu_read_lock(), is used. This works as following
> >
> > CPU-A CPU-B
> > rcu_read_lock()
> > set flag
> > if(flag is set)
> > take heavy lock
> > do job.
> > synchronize_rcu() rcu_read_unlock()
> >
> > In recent discussion, it's argued that using per-cpu value for this
> > flag just complicates the code because 'set flag' is very rare.
> >
> > This patch changes 'flag' implementation from percpu to atomic_t.
> > This will be much simpler.
> >
>
> To me, "RFC" says "might not be ready for merging yet". You're up to
> v3 - why is it still RFC? You're still expecting to make significant
> changes?
>
Yes, I made changes discussed in v2. and need to show how it looks.
I'm sorry that changelog wasn't enough.
> >
> > }
> > +/*
> > + * memcg->moving_account is used for checking possibility that some thread is
> > + * calling move_account(). When a thread on CPU-A starts moving pages under
> > + * a memcg, other threads sholud check memcg->moving_account under
>
> "should"
>
Sure..
> > + * rcu_read_lock(), like this:
> > + *
> > + * CPU-A CPU-B
> > + * rcu_read_lock()
> > + * memcg->moving_account+1 if (memcg->mocing_account)
> > + * take havier locks.
> > + * syncronize_rcu() update something.
> > + * rcu_read_unlock()
> > + * start move here.
> > + */
> >
> > static void mem_cgroup_start_move(struct mem_cgroup *memcg)
> > {
> > - int cpu;
> > -
> > - get_online_cpus();
> > - spin_lock(&memcg->pcp_counter_lock);
> > - for_each_online_cpu(cpu)
> > - per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
> > - memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
> > - spin_unlock(&memcg->pcp_counter_lock);
> > - put_online_cpus();
> > -
> > + atomic_inc(&memcg->moving_account);
> > synchronize_rcu();
> > }
> >
> > static void mem_cgroup_end_move(struct mem_cgroup *memcg)
> > {
> > - int cpu;
> > -
> > - if (!memcg)
> > - return;
> > - get_online_cpus();
> > - spin_lock(&memcg->pcp_counter_lock);
> > - for_each_online_cpu(cpu)
> > - per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
> > - memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
> > - spin_unlock(&memcg->pcp_counter_lock);
> > - put_online_cpus();
> > + if (memcg)
> > + atomic_dec(&memcg->moving_account);
> > }
>
> It's strange that end_move handles a NULL memcg but start_move does not.
>
Ah, the reason was that mem_cgroup_end_move() can called in mem_cgroup_clear_mc().
This mem_cgroup_clear_mc() can call mem_cgroup_end_move(NULL)...
Then, this function has NULL check in callee side.
I'll add comments.
> > /*
> > * 2 routines for checking "mem" is under move_account() or not.
> > @@ -1298,7 +1297,7 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
> > static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
> > {
> > VM_BUG_ON(!rcu_read_lock_held());
> > - return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
> > + return atomic_read(&memcg->moving_account);
> > }
>
> So a bool-returning function can return something > 1?
>
> I don't know what the compiler would make of that. Presumably "if (b)"
> will work OK, but will "if (b1 == b2)"?
>
if (!mem_cgroup_stealed(memcg))
ffffffff8116e278: 85 c0 test %eax,%eax
ffffffff8116e27a: 74 1f je ffffffff8116e29b <__mem_cgroup_begin_update_page_stat+0x7b>
return;
ffffffff8116e29b: 5b pop %rbx
ffffffff8116e29c: 41 5c pop %r12
ffffffff8116e29e: 41 5d pop %r13
ffffffff8116e2a0: 41 5e pop %r14
ffffffff8116e2a2: c9 leaveq
ffffffff8116e2a3: c3 retq
Maybe works as expected but... I'll rewrite..how about this ?.
>From 3cefc03a4da41843ea2439f1c0ca630c64e8cf87 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 10:02:39 +0900
Subject: [PATCH] memcg: simplify move_account() check.
In memcg, for avoiding take-lock-irq-off at accessing page_cgroup,
a logic, flag + rcu_read_lock(), is used. This works as following
CPU-A CPU-B
rcu_read_lock()
set flag
if(flag is set)
take heavy lock
do job.
synchronize_rcu() rcu_read_unlock()
In recent discussion, it's argued that using per-cpu value for this
flag just complicates the code because 'set flag' is very rare.
This patch changes 'flag' implementation from percpu to atomic_t.
This will be much simpler.
Changelog: v4
- fixed many typos.
- check return variables should be bool
- add comments.
Changelog: v3
- this is a new patch since v3.
memcontrol.c | 65 ++++++++++++++++++++++-------------------------------------
1 file changed, 25 insertions(+), 40 deletions(-)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
mm/memcontrol.c | 70 +++++++++++++++++++++++-------------------------------
1 files changed, 30 insertions(+), 40 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab315ab..0359175 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -89,7 +89,6 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
- MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */
MEM_CGROUP_STAT_NSTATS,
};
@@ -278,6 +277,10 @@ struct mem_cgroup {
*/
unsigned long move_charge_at_immigrate;
/*
+ * set > 0 if pages under this cgroup are moving to other cgroup.
+ */
+ atomic_t moving_account;
+ /*
* percpu counter.
*/
struct mem_cgroup_stat_cpu *stat;
@@ -1253,36 +1256,37 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
return memcg->swappiness;
}
+/*
+ * memcg->moving_account is used for checking possibility that some thread is
+ * calling move_account(). When a thread on CPU-A starts moving pages under
+ * a memcg, other threads should check memcg->moving_account under
+ * rcu_read_lock(), like this:
+ *
+ * CPU-A CPU-B
+ * rcu_read_lock()
+ * memcg->moving_account+1 if (memcg->mocing_account)
+ * take heavy locks.
+ * synchronize_rcu() update something.
+ * rcu_read_unlock()
+ * start move here.
+ */
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
- int cpu;
-
- get_online_cpus();
- spin_lock(&memcg->pcp_counter_lock);
- for_each_online_cpu(cpu)
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
- memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
- spin_unlock(&memcg->pcp_counter_lock);
- put_online_cpus();
-
+ atomic_inc(&memcg->moving_account);
synchronize_rcu();
}
static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
- int cpu;
-
- if (!memcg)
- return;
- get_online_cpus();
- spin_lock(&memcg->pcp_counter_lock);
- for_each_online_cpu(cpu)
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
- memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
- spin_unlock(&memcg->pcp_counter_lock);
- put_online_cpus();
+ /*
+ * Now, mem_cgroup_clear_mc() may call this function with NULL.
+ * We check NULL in callee rather than caller.
+ */
+ if (memcg)
+ atomic_dec(&memcg->moving_account);
}
+
/*
* 2 routines for checking "mem" is under move_account() or not.
*
@@ -1298,7 +1302,7 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
{
VM_BUG_ON(!rcu_read_lock_held());
- return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
+ return atomic_read(&memcg->moving_account) > 0;
}
static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
@@ -1849,8 +1853,8 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
* by flags.
*
* Considering "move", this is an only case we see a race. To make the race
- * small, we check MEM_CGROUP_ON_MOVE percpu value and detect there are
- * possibility of race condition. If there is, we take a lock.
+ * small, we check mm->moving_account and detect there are possibility of race
+ * If there is, we take a lock.
*/
void mem_cgroup_update_page_stat(struct page *page,
@@ -2068,17 +2072,6 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
per_cpu(memcg->stat->events[i], cpu) = 0;
memcg->nocpu_base.events[i] += x;
}
- /* need to clear ON_MOVE value, works as a kind of lock. */
- per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
- spin_unlock(&memcg->pcp_counter_lock);
-}
-
-static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
-{
- int idx = MEM_CGROUP_ON_MOVE;
-
- spin_lock(&memcg->pcp_counter_lock);
- per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
spin_unlock(&memcg->pcp_counter_lock);
}
@@ -2090,11 +2083,8 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
struct memcg_stock_pcp *stock;
struct mem_cgroup *iter;
- if ((action == CPU_ONLINE)) {
- for_each_mem_cgroup(iter)
- synchronize_mem_cgroup_on_move(iter, cpu);
+ if ((action == CPU_ONLINE))
return NOTIFY_OK;
- }
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
return NOTIFY_OK;
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [RFC] [PATCH 2/6 v3] memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat)
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
2012-02-06 10:07 ` [PATCH 1/6] memcg: simplify move_account() check KAMEZAWA Hiroyuki
@ 2012-02-06 10:09 ` KAMEZAWA Hiroyuki
2012-02-06 10:09 ` [PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup KAMEZAWA Hiroyuki
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:09 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
Michal pointed out this.
>From 78dfe92250046397114148e57bbc5927c59257fe Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 12:05:41 +0900
Subject: [PATCH 2/6] memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat)
>From the log, I guess EXPORT was for preparing dirty accounting.
But _now_, we don't need to export this. Remove this for now.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
mm/memcontrol.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9bf9d1c..4ba0d76 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1896,7 +1896,6 @@ out:
move_unlock_page_cgroup(pc, &flags);
rcu_read_unlock();
}
-EXPORT_SYMBOL(mem_cgroup_update_page_stat);
/*
* size of first charge trial. "32" comes from vmscan.c's magic value.
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup.
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
2012-02-06 10:07 ` [PATCH 1/6] memcg: simplify move_account() check KAMEZAWA Hiroyuki
2012-02-06 10:09 ` [RFC] [PATCH 2/6 v3] memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat) KAMEZAWA Hiroyuki
@ 2012-02-06 10:09 ` KAMEZAWA Hiroyuki
2012-02-06 10:10 ` [RFC][PATCH 4/6] memcg: use new logic for page stat accounting KAMEZAWA Hiroyuki
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:09 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
>From 94b17cbc95e068a0a841c84fb0345f48a2a27d24 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 11:49:59 +0900
Subject: [PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup.
PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting
pc->mem_cgroup and page statistics accounting per memcg.
This lock helps to avoid the race but the race is very rare because moving
tasks between cgroup is not a usual job.
So, it seems using 1bit per page is too costly.
This patch changes this lock as per-memcg spinlock and removes PCG_MOVE_LOCK.
If smaller lock is required, we'll be able to add some hashes but
I'd like to start from this.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/page_cgroup.h | 19 -------------------
mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++--
2 files changed, 32 insertions(+), 21 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1060292..7a3af74 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -7,7 +7,6 @@ enum {
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */
- PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
__NR_PCG_FLAGS,
};
@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
bit_spin_unlock(PCG_LOCK, &pc->flags);
}
-static inline void move_lock_page_cgroup(struct page_cgroup *pc,
- unsigned long *flags)
-{
- /*
- * We know updates to pc->flags of page cache's stats are from both of
- * usual context or IRQ context. Disable IRQ to avoid deadlock.
- */
- local_irq_save(*flags);
- bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
-}
-
-static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
- unsigned long *flags)
-{
- bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
- local_irq_restore(*flags);
-}
-
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ba0d76..083154d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -280,6 +280,8 @@ struct mem_cgroup {
* set > 0 if pages under this cgroup are moving to other cgroup.
*/
atomic_t moving_account;
+ /* taken only while moving_account > 0 */
+ spinlock_t move_lock;
/*
* percpu counter.
*/
@@ -1338,6 +1340,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
return false;
}
+/*
+ * Take this lock when
+ * - a code tries to modify page's memcg while it's USED.
+ * - a code tries to modify page state accounting in a memcg.
+ * see mem_cgroup_stealed(), too.
+ */
+static void move_lock_page_cgroup(struct page_cgroup *pc, unsigned long *flags)
+{
+ struct mem_cgroup *memcg;
+
+again:
+ memcg = pc->mem_cgroup;
+ spin_lock_irqsave(&memcg->move_lock, *flags);
+ if (unlikely(pc->mem_cgroup != memcg)) {
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
+ goto again;
+ }
+}
+
+static void move_unlock_page_cgroup(struct page_cgroup *pc,
+ unsigned long *flags)
+{
+ struct mem_cgroup *memcg;
+
+ memcg = pc->mem_cgroup;
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
+}
+
/**
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
* @memcg: The memory cgroup that went over limit
@@ -2435,8 +2465,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
- (1 << PCG_MIGRATION))
+#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -4923,6 +4952,7 @@ mem_cgroup_create(struct cgroup *cont)
atomic_set(&memcg->refcnt, 1);
memcg->move_charge_at_immigrate = 0;
mutex_init(&memcg->thresholds_lock);
+ spin_lock_init(&memcg->move_lock);
return &memcg->css;
free_out:
__mem_cgroup_free(memcg);
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [RFC][PATCH 4/6] memcg: use new logic for page stat accounting.
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
` (2 preceding siblings ...)
2012-02-06 10:09 ` [PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup KAMEZAWA Hiroyuki
@ 2012-02-06 10:10 ` KAMEZAWA Hiroyuki
2012-02-06 10:10 ` [RFC][PATCH 5/6] memcg: remove PCG_FILE_MAPPED KAMEZAWA Hiroyuki
2012-02-06 10:11 ` [RFC] [PATCH 6/6] memcg: fix performance of mem_cgroup_begin_update_page_stat() KAMEZAWA Hiroyuki
5 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:10 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
>From 99112a52852adfdecb745028c83838bc6ffbbb47 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 11:49:59 +0900
Subject: [PATCH 4/6] memcg: use new logic for page stat accounting.
Now, page-stat-per-memcg is recorded into per page_cgroup flag by
duplicating page's status into the flag. The reason is that memcg
has a feature to move a page from a group to another group and we
have race between "move" and "page stat accounting",
Under current logic, assume CPU-A and CPU-B. CPU-A does "move"
and CPU-B does "page stat accounting".
When CPU-A goes 1st,
CPU-A CPU-B
update "struct page" info.
move_lock_page_cgroup(pc)
see flags
copy page stat to new group
overwrite pc->mem_cgroup.
move_unlock_page_cgroup(pc)
move_lock_page_cgroup(pc)
set pc->flags
update page stat accounting
move_unlock_page_cgroup(pc)
stat accounting is guarded by move_lock_page_cgroup() and "move"
logic (CPU-A) doesn't see changes in "struct page" information.
But it's costly to have the same information both in 'struct page' and
'struct page_cgroup' and there is a potential problem.
For example, assume we have PG_dirty accounting in memcg.
PG_..is a flag for struct page.
PCG_ is a flag for struct page_cgroup.
(This is just an example. The same problem can be found in any
kind of page stat accountig.)
CPU-A CPU-B
TestSet PG_dirty
(delay) TestClear PG_dirty_
if (TestClear(PCG_dirty))
memcg->nr_dirty--
if (TestSet(PCG_dirty))
memcg->nr_dirty++
Here, memcg->nr_dirty = +1, this is wrong.
This race was reported by Greg Thelen <gthelen@google.com>.
Now, only FILE_MAPPED is supporetd but fortunately, it's serialized by
page table lock and this is not real bug, _now_,
If this potential problem is caused by having duplicated information in
struct page and struct page_cgroup, we may be able to fix this by using
original 'struct page' information.
But we'll have a problem in "move account"
Assume we use only PG_dirty.
CPU-A CPU-B
TestSet PG_dirty
(delay) move_lock_page_cgroup()
if (PageDirty(page))
new_memcg->nr_dirty++
pc->mem_cgroup = new_memcg;
move_unlock_page_cgroup()
move_lock_page_cgroup()
memcg = pc->mem_cgroup
new_memcg->nr_dirty++
accounting information may be double-counted. This was original
reason to have PCG_xxx flags.
I think we need a bigger lock as
move_lock_page_cgroup(page)
TestSetPageDirty(page)
update page stats (without any checks)
move_unlock_page_cgroup(page)
This fixes both of problems and we don't have to duplicate page flag
into page_cgroup. Please note: move_lock_page_cgroup() is held
only when there are possibility of "account move" under the system.
So, in most path, status update will go without atomic locks.
This patch introduce mem_cgroup_begin_update_page_stat() and
mem_cgroup_end_update_page_stat() both should be called at modifing
'struct page' information if memcg takes care of it. as
mem_cgroup_begin_update_page_stat()
modify page information
mem_cgroup_update_page_stat()
=> never check any 'struct page' info, just update counters.
mem_cgroup_end_update_page_stat().
This patch is slow because we need to call begin_update_page_stat()/
end_update_page_stat() regardless of accounted will be changed or not.
A following patch adds an easy optimizaiton and reduces the cost.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/memcontrol.h | 35 +++++++++++++++++++++++++++++
mm/memcontrol.c | 53 +++++++++++++++++++++++++++----------------
mm/rmap.c | 15 ++++++++++-
3 files changed, 81 insertions(+), 22 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bf4e1f4..3f3ef33 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -141,6 +141,31 @@ static inline bool mem_cgroup_disabled(void)
return false;
}
+void __mem_cgroup_begin_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags);
+
+static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+ if (mem_cgroup_disabled())
+ return;
+ rcu_read_lock();
+ *lock = false;
+ return __mem_cgroup_begin_update_page_stat(page, lock, flags);
+}
+
+void __mem_cgroup_end_update_page_stat(struct page *page, bool *lock,
+ unsigned long *flags);
+static inline void mem_cgroup_end_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+ if (mem_cgroup_disabled())
+ return;
+ if (*lock)
+ __mem_cgroup_end_update_page_stat(page, lock, flags);
+ rcu_read_unlock();
+}
+
void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx,
int val);
@@ -356,6 +381,16 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
+static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+}
+
+static inline void mem_cgroup_end_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+}
+
static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 083154d..6ac33ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1882,30 +1882,48 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
* If there is, we take a lock.
*/
+void __mem_cgroup_begin_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+ struct mem_cgroup *memcg;
+ struct page_cgroup *pc;
+
+ pc = lookup_page_cgroup(page);
+again:
+ memcg = pc->mem_cgroup;
+ if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ return;
+ if (!mem_cgroup_stealed(memcg))
+ return;
+
+ move_lock_page_cgroup(pc, flags);
+ if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+ move_unlock_page_cgroup(pc, flags);
+ goto again;
+ }
+ *lock = true;
+}
+
+void __mem_cgroup_end_update_page_stat(struct page *page,
+ bool *lock, unsigned long *flags)
+{
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ move_unlock_page_cgroup(pc, flags);
+}
+
void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx, int val)
{
struct mem_cgroup *memcg;
- struct page_cgroup *pc = lookup_page_cgroup(page);
- bool need_unlock = false;
- unsigned long uninitialized_var(flags);
+ struct page_cgroup *pc;
if (mem_cgroup_disabled())
return;
- rcu_read_lock();
+ pc = lookup_page_cgroup(page);
+ if (!pc->mem_cgroup || !PageCgroupUsed(pc))
+ return;
memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
- goto out;
- /* pc->mem_cgroup is unstable ? */
- if (unlikely(mem_cgroup_stealed(memcg))) {
- /* take a lock against to access pc->mem_cgroup */
- move_lock_page_cgroup(pc, &flags);
- need_unlock = true;
- memcg = pc->mem_cgroup;
- if (!memcg || !PageCgroupUsed(pc))
- goto out;
- }
switch (idx) {
case MEMCG_NR_FILE_MAPPED:
@@ -1920,11 +1938,6 @@ void mem_cgroup_update_page_stat(struct page *page,
}
this_cpu_add(memcg->stat->count[idx], val);
-
-out:
- if (unlikely(need_unlock))
- move_unlock_page_cgroup(pc, &flags);
- rcu_read_unlock();
}
/*
diff --git a/mm/rmap.c b/mm/rmap.c
index aa547d4..dd193db 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1150,10 +1150,15 @@ void page_add_new_anon_rmap(struct page *page,
*/
void page_add_file_rmap(struct page *page)
{
+ bool locked;
+ unsigned long flags;
+
+ mem_cgroup_begin_update_page_stat(page, &locked, &flags);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
}
+ mem_cgroup_end_update_page_stat(page, &locked, &flags);
}
/**
@@ -1164,9 +1169,13 @@ void page_add_file_rmap(struct page *page)
*/
void page_remove_rmap(struct page *page)
{
+ bool locked;
+ unsigned long flags;
+
+ mem_cgroup_begin_update_page_stat(page, &locked, &flags);
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
- return;
+ goto out;
/*
* Now that the last pte has gone, s390 must transfer dirty
@@ -1183,7 +1192,7 @@ void page_remove_rmap(struct page *page)
* and not charged by memcg for now.
*/
if (unlikely(PageHuge(page)))
- return;
+ goto out;
if (PageAnon(page)) {
mem_cgroup_uncharge_page(page);
if (!PageTransHuge(page))
@@ -1204,6 +1213,8 @@ void page_remove_rmap(struct page *page)
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
+out:
+ mem_cgroup_end_update_page_stat(page, &locked, &flags);
}
/*
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [RFC][PATCH 5/6] memcg: remove PCG_FILE_MAPPED
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
` (3 preceding siblings ...)
2012-02-06 10:10 ` [RFC][PATCH 4/6] memcg: use new logic for page stat accounting KAMEZAWA Hiroyuki
@ 2012-02-06 10:10 ` KAMEZAWA Hiroyuki
2012-02-06 10:11 ` [RFC] [PATCH 6/6] memcg: fix performance of mem_cgroup_begin_update_page_stat() KAMEZAWA Hiroyuki
5 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:10 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
>From 6edf53b19769c52a6d65e0c8ba0c3000eadbddda Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 2 Feb 2012 15:02:18 +0900
Subject: [PATCH 5/6] memcg: remove PCG_FILE_MAPPED
with new lock scheme for updating memcg's page stat, we don't need
a flag PCG_FILE_MAPPED which was duplicated information of page_mapped().
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/page_cgroup.h | 6 ------
mm/memcontrol.c | 6 +-----
2 files changed, 1 insertions(+), 11 deletions(-)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 7a3af74..a88cdba 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -6,8 +6,6 @@ enum {
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
- /* flags for mem_cgroup and file and I/O status */
- PCG_FILE_MAPPED, /* page is accounted as "mapped" */
__NR_PCG_FLAGS,
};
@@ -66,10 +64,6 @@ TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)
SETPCGFLAG(Used, USED)
-SETPCGFLAG(FileMapped, FILE_MAPPED)
-CLEARPCGFLAG(FileMapped, FILE_MAPPED)
-TESTPCGFLAG(FileMapped, FILE_MAPPED)
-
SETPCGFLAG(Migration, MIGRATION)
CLEARPCGFLAG(Migration, MIGRATION)
TESTPCGFLAG(Migration, MIGRATION)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ac33ef..eedabee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1927,10 +1927,6 @@ void mem_cgroup_update_page_stat(struct page *page,
switch (idx) {
case MEMCG_NR_FILE_MAPPED:
- if (val > 0)
- SetPageCgroupFileMapped(pc);
- else if (!page_mapped(page))
- ClearPageCgroupFileMapped(pc);
idx = MEM_CGROUP_STAT_FILE_MAPPED;
break;
default:
@@ -2551,7 +2547,7 @@ static int mem_cgroup_move_account(struct page *page,
move_lock_page_cgroup(pc, &flags);
- if (PageCgroupFileMapped(pc)) {
+ if (page_mapped(page)) {
/* Update mapped_file data for mem_cgroup */
preempt_disable();
__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [RFC] [PATCH 6/6] memcg: fix performance of mem_cgroup_begin_update_page_stat()
2012-02-06 10:06 [RFC] [PATCH 0/6 v3] memcg: page cgroup diet KAMEZAWA Hiroyuki
` (4 preceding siblings ...)
2012-02-06 10:10 ` [RFC][PATCH 5/6] memcg: remove PCG_FILE_MAPPED KAMEZAWA Hiroyuki
@ 2012-02-06 10:11 ` KAMEZAWA Hiroyuki
5 siblings, 0 replies; 9+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-06 10:11 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki
Cc: linux-mm, linux-kernel, hannes, Michal Hocko, Ying Han,
Hugh Dickins, akpm
>From 4094b65b9c0a368a34b8c3d42df1ce0ebfa1edf8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 6 Feb 2012 12:14:47 +0900
Subject: [PATCH 6/6] memcg: fix performance of mem_cgroup_begin_update_page_stat()
mem_cgroup_begin_update_page_stat() should be very fast because
it's called very frequently. Now, it needs to look up page_cgroup
and its memcg....this is slow.
This patch adds a global variable to check "a memcg is moving or not".
By this, the caller doesn't need to visit page_cgroup and memcg.
This makes performance quicker.
Here is a test result. A test program makes page faults onto a file,
MAP_SHARED and makes each page's page_mapcount(page) > 1, and free
the range by madvise() and page fault again. This program causes
26214400 times of page fault onto a file(size was 1G.) and shows
shows the cost of mem_cgroup_begin_update_page_stat().
(please see 'sys' time and think error range is 0.2 - 0.4sec.)
Before the series of patch (linux-next Feb/06)
[kamezawa@bluextal test]$ time ./mmap 1G
real 0m23.534s
user 0m5.949s
sys 0m17.563s
After the patch for mem_cgroup_begin_update_page_stat()
[kamezawa@bluextal test]$ time ./mmap 1G
real 0m24.268s
user 0m6.059s
sys 0m18.187s
After this patch
[kamezawa@bluextal test]$ time ./mmap 1G
real 0m23.094s
user 0m6.045s
sys 0m17.030s
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/memcontrol.h | 5 +++--
mm/memcontrol.c | 6 +++++-
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3f3ef33..3df9979 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -143,7 +143,7 @@ static inline bool mem_cgroup_disabled(void)
void __mem_cgroup_begin_update_page_stat(struct page *page,
bool *lock, unsigned long *flags);
-
+extern atomic_t memcg_moving;
static inline void mem_cgroup_begin_update_page_stat(struct page *page,
bool *lock, unsigned long *flags)
{
@@ -151,7 +151,8 @@ static inline void mem_cgroup_begin_update_page_stat(struct page *page,
return;
rcu_read_lock();
*lock = false;
- return __mem_cgroup_begin_update_page_stat(page, lock, flags);
+ if (atomic_read(&memcg_moving))
+ return __mem_cgroup_begin_update_page_stat(page, lock, flags);
}
void __mem_cgroup_end_update_page_stat(struct page *page, bool *lock,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eedabee..322e381 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1273,16 +1273,20 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
* start move here.
*/
+atomic_t memcg_moving __read_mostly;
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
+ atomic_inc(&memcg_moving);
atomic_inc(&memcg->moving_account);
synchronize_rcu();
}
static void mem_cgroup_end_move(struct mem_cgroup *memcg)
{
- if (memcg)
+ if (memcg) {
atomic_dec(&memcg->moving_account);
+ atomic_dec(&memcg_moving);
+ }
}
/*
* 2 routines for checking "mem" is under move_account() or not.
--
1.7.4.1
^ permalink raw reply related [flat|nested] 9+ messages in thread