All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/2] memcg: reduce overhead by coalescing css_get/put
@ 2010-06-03  9:54 KAMEZAWA Hiroyuki
  2010-06-03  9:56 ` [RFC][PATCH 1/2] memcg: coalescing css_get() at charge KAMEZAWA Hiroyuki
  2010-06-03  9:57 ` [RFC][PATCH 2/2] memcg: coalescing css_put KAMEZAWA Hiroyuki
  0 siblings, 2 replies; 3+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-06-03  9:54 UTC (permalink / raw)
  To: linux-mm; +Cc: nishimura, balbir

This is now under development patch (and I can't guarantee this is free from bug.)

The idea is coalescing multiple css_get/put to __css_get(),__css_put() as
we now do in res_counter charging.

Here is a result with multi-threaded page fault program. The program does continuous 
page fault in 60 sec. If the kernel works better, we can see more page faults.

Here is a test result under a memcg(not root cgroup).

[before Patch]
[root@bluextal test]# /root/bin/perf stat -e page-faults,cache-misses ./multi-fault-all-split 8

 Performance counter stats for './multi-fault-all-split 8':

           12357708  page-faults
          161332057  cache-misses

       60.007931275  seconds time elapsed
    25.31%  multi-fault-all  [kernel.kallsyms]      [k] clear_page_c
     9.24%  multi-fault-all  [kernel.kallsyms]      [k] down_read_trylock
     8.37%  multi-fault-all  [kernel.kallsyms]      [k] try_get_mem_cgroup_from_mm
     5.21%  multi-fault-all  [kernel.kallsyms]      [k] __alloc_pages_nodemask
     5.13%  multi-fault-all  [kernel.kallsyms]      [k] _raw_spin_lock_irqsave
     4.91%  multi-fault-all  [kernel.kallsyms]      [k] __css_put
     4.66%  multi-fault-all  [kernel.kallsyms]      [k] up_read
     3.17%  multi-fault-all  [kernel.kallsyms]      [k] css_put
     2.77%  multi-fault-all  [kernel.kallsyms]      [k] _raw_spin_lock_irq
     2.58%  multi-fault-all  [kernel.kallsyms]      [k] page_fault

[after Patch]
[root@bluextal test]#  /root/bin/perf stat -e page-faults,cache-misses ./multi-fault-all-split 8

 Performance counter stats for './multi-fault-all-split 8':

           13615258  page-faults
          153207110  cache-misses

       60.004117823  seconds time elapsed

# Overhead          Command          Shared Object  Symbol
# ........  ...............  .....................  ......
#
    27.70%  multi-fault-all  [kernel.kallsyms]      [k] clear_page_c
    11.18%  multi-fault-all  [kernel.kallsyms]      [k] down_read_trylock
     7.54%  multi-fault-all  [kernel.kallsyms]      [k] _raw_spin_lock_irqsave
     5.99%  multi-fault-all  [kernel.kallsyms]      [k] up_read
     5.90%  multi-fault-all  [kernel.kallsyms]      [k] __alloc_pages_nodemask
     5.13%  multi-fault-all  [kernel.kallsyms]      [k] _raw_spin_lock_irq
     2.73%  multi-fault-all  [kernel.kallsyms]      [k] __mem_cgroup_commit_charge
     2.71%  multi-fault-all  [kernel.kallsyms]      [k] page_fault
     2.66%  multi-fault-all  [kernel.kallsyms]      [k] handle_mm_fault
     2.35%  multi-fault-all  [kernel.kallsyms]      [k] _raw_spin_lock


You can see cache-miss/page-faults is improved and no css_get/css_put in overhead
stat record. Please give me your review if interested.

(I tried to get rid of css_get()/put() per a page ...but..it seems no very easy.
 So, now trying to reduce overheads.)

Thanks,
-Kame

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC][PATCH 1/2] memcg: coalescing css_get() at charge
  2010-06-03  9:54 [RFC][PATCH 0/2] memcg: reduce overhead by coalescing css_get/put KAMEZAWA Hiroyuki
@ 2010-06-03  9:56 ` KAMEZAWA Hiroyuki
  2010-06-03  9:57 ` [RFC][PATCH 2/2] memcg: coalescing css_put KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 3+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-06-03  9:56 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-mm, nishimura, balbir

based on a clean up patch I sent.
==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Coalessing multiple css_get() to a __css_get(count) as res_counter does.
This reduces memcg's cost, cache ping-pong very much.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 mm/memcontrol.c |   35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

Index: mmotm-2.6.34-May21/mm/memcontrol.c
===================================================================
--- mmotm-2.6.34-May21.orig/mm/memcontrol.c
+++ mmotm-2.6.34-May21/mm/memcontrol.c
@@ -1542,6 +1542,7 @@ static void drain_stock(struct memcg_sto
 		res_counter_uncharge(&old->res, stock->charge);
 		if (do_swap_account)
 			res_counter_uncharge(&old->memsw, stock->charge);
+		__css_put(&old->css, stock->charge/PAGE_SIZE);
 	}
 	stock->cached = NULL;
 	stock->charge = 0;
@@ -1570,6 +1571,7 @@ static void refill_stock(struct mem_cgro
 		stock->cached = mem;
 	}
 	stock->charge += val;
+	__css_get(&mem->css, val/PAGE_SIZE);
 	put_cpu_var(memcg_stock);
 }
 
@@ -1710,6 +1712,7 @@ static int __mem_cgroup_try_charge(struc
 	 * in system level. So, allow to go ahead dying process in addition to
 	 * MEMDIE process.
 	 */
+again:
 	if (unlikely(test_thread_flag(TIF_MEMDIE)
 		     || fatal_signal_pending(current)))
 		goto bypass;
@@ -1720,25 +1723,42 @@ static int __mem_cgroup_try_charge(struc
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
+
+	rcu_read_lock();
 	if (*memcg) {
 		mem = *memcg;
-		css_get(&mem->css);
 	} else {
-		mem = try_get_mem_cgroup_from_mm(mm);
+		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		if (unlikely(!mem))
 			return 0;
 		*memcg = mem;
 	}
 
-	VM_BUG_ON(css_is_removed(&mem->css));
-	if (mem_cgroup_is_root(mem))
+	/* racy ? (but seems to never happen in usual */
+	if (unlikely(css_is_removed(&mem->css))) {
+		rcu_read_unlock();
+		mem = NULL;
+		goto bypass;
+	}
+
+	if (mem_cgroup_is_root(mem)) {
+		rcu_read_unlock();
 		goto done;
+	}
 
+	if (consume_stock(mem)) {
+		rcu_read_unlock();
+		goto done;
+	}
+	if (!css_tryget(&mem->css)) {
+		rcu_read_unlock();
+		goto again;
+	}
+	rcu_read_unlock();
+	/* Enter memory reclaim loop */
 	do {
 		bool oom_check;
 
-		if (consume_stock(mem))
-			goto done; /* don't need to fill stock */
 		/* If killed, bypass charge */
 		if (fatal_signal_pending(current))
 			goto bypass;
@@ -1756,7 +1776,8 @@ static int __mem_cgroup_try_charge(struc
 			break;
 		case CHARGE_RETRY: /* not in OOM situation but retry */
 			csize = PAGE_SIZE;
-			break;
+			css_put(&mem->css);
+			goto again;
 		case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
 			goto nomem;
 		case CHARGE_NOMEM: /* OOM routine works */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC][PATCH 2/2] memcg: coalescing css_put
  2010-06-03  9:54 [RFC][PATCH 0/2] memcg: reduce overhead by coalescing css_get/put KAMEZAWA Hiroyuki
  2010-06-03  9:56 ` [RFC][PATCH 1/2] memcg: coalescing css_get() at charge KAMEZAWA Hiroyuki
@ 2010-06-03  9:57 ` KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 3+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-06-03  9:57 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-mm, nishimura, balbir

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

memory cgroup adds a css-refcnt per a page and we decrement it at
uncharge(). Now, we do uncharge in batch if possible. So, css_put()
can be called in batch.

This patch reduces the call of atomic_dec() and make memcg faster
on smp system. And we need small modification for SWAPOUT routine.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 mm/memcontrol.c |   14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Index: mmotm-2.6.34-May21/mm/memcontrol.c
===================================================================
--- mmotm-2.6.34-May21.orig/mm/memcontrol.c
+++ mmotm-2.6.34-May21/mm/memcontrol.c
@@ -2304,6 +2304,7 @@ direct_uncharge:
 		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	if (unlikely(batch->memcg != mem))
 		memcg_oom_recover(mem);
+	css_put(&mem->css);
 	return;
 }
 
@@ -2373,9 +2374,6 @@ __mem_cgroup_uncharge_common(struct page
 	unlock_page_cgroup(pc);
 
 	memcg_check_events(mem, page);
-	/* at swapout, this memcg will be accessed to record to swap */
-	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
-		css_put(&mem->css);
 
 	return mem;
 
@@ -2441,6 +2439,7 @@ void mem_cgroup_uncharge_end(void)
 		res_counter_uncharge(&batch->memcg->res, batch->bytes);
 	if (batch->memsw_bytes)
 		res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes);
+	__css_put(&batch->memcg->css, batch->bytes/PAGE_SIZE);
 	memcg_oom_recover(batch->memcg);
 	/* forget this pointer (for sanity check) */
 	batch->memcg = NULL;
@@ -2456,19 +2455,22 @@ mem_cgroup_uncharge_swapcache(struct pag
 {
 	struct mem_cgroup *memcg;
 	int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
+	struct mem_cgroup *keep = NULL;
 
 	if (!swapout) /* this was a swap cache but the swap is unused ! */
 		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
+	else
+		keep = try_get_mem_cgroup_from_page(page);
 
 	memcg = __mem_cgroup_uncharge_common(page, ctype);
 
 	/* record memcg information */
-	if (do_swap_account && swapout && memcg) {
+	if (do_swap_account && swapout && memcg && keep == memcg) {
 		swap_cgroup_record(ent, css_id(&memcg->css));
 		mem_cgroup_get(memcg);
 	}
-	if (swapout && memcg)
-		css_put(&memcg->css);
+	if (keep)
+		css_put(&keep->css);
 }
 #endif
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-06-03 10:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-03  9:54 [RFC][PATCH 0/2] memcg: reduce overhead by coalescing css_get/put KAMEZAWA Hiroyuki
2010-06-03  9:56 ` [RFC][PATCH 1/2] memcg: coalescing css_get() at charge KAMEZAWA Hiroyuki
2010-06-03  9:57 ` [RFC][PATCH 2/2] memcg: coalescing css_put KAMEZAWA Hiroyuki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.