From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760154Ab0KSBZZ (ORCPT ); Thu, 18 Nov 2010 20:25:25 -0500 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:56941 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755160Ab0KSBZY (ORCPT ); Thu, 18 Nov 2010 20:25:24 -0500 X-SecurityPolicyCheck-FJ: OK by FujitsuOutboundMailChecker v1.3.1 Date: Fri, 19 Nov 2010 10:19:38 +0900 From: KAMEZAWA Hiroyuki To: Andrea Arcangeli Cc: linux-mm@kvack.org, Linus Torvalds , Andrew Morton , linux-kernel@vger.kernel.org, Marcelo Tosatti , Adam Litke , Avi Kivity , Hugh Dickins , Rik van Riel , Mel Gorman , Dave Hansen , Benjamin Herrenschmidt , Ingo Molnar , Mike Travis , Christoph Lameter , Chris Wright , bpicco@redhat.com, KOSAKI Motohiro , Balbir Singh , "Michael S. Tsirkin" , Peter Zijlstra , Johannes Weiner , Daisuke Nishimura , Chris Mason , Borislav Petkov Subject: Re: [PATCH 39 of 66] memcg huge memory Message-Id: <20101119101938.2edf889f.kamezawa.hiroyu@jp.fujitsu.com> In-Reply-To: <877d2f205026b0463450.1288798094@v2.random> References: <877d2f205026b0463450.1288798094@v2.random> Organization: FUJITSU Co. LTD. X-Mailer: Sylpheed 3.0.3 (GTK+ 2.10.14; i686-pc-mingw32) Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wed, 03 Nov 2010 16:28:14 +0100 Andrea Arcangeli wrote: > From: Andrea Arcangeli > > Add memcg charge/uncharge to hugepage faults in huge_memory.c. > > Signed-off-by: Andrea Arcangeli > Acked-by: Rik van Riel > --- > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -233,6 +233,7 @@ static int __do_huge_pmd_anonymous_page( > VM_BUG_ON(!PageCompound(page)); > pgtable = pte_alloc_one(mm, haddr); > if (unlikely(!pgtable)) { > + mem_cgroup_uncharge_page(page); > put_page(page); > return VM_FAULT_OOM; > } > @@ -243,6 +244,7 @@ static int __do_huge_pmd_anonymous_page( > spin_lock(&mm->page_table_lock); > if (unlikely(!pmd_none(*pmd))) { > spin_unlock(&mm->page_table_lock); > + mem_cgroup_uncharge_page(page); > put_page(page); > pte_free(mm, pgtable); > } else { > @@ -286,6 +288,10 @@ int do_huge_pmd_anonymous_page(struct mm > page = alloc_hugepage(transparent_hugepage_defrag(vma)); > if (unlikely(!page)) > goto out; > + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { > + put_page(page); > + goto out; > + } > > return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); > } > @@ -402,9 +408,15 @@ static int do_huge_pmd_wp_page_fallback( > for (i = 0; i < HPAGE_PMD_NR; i++) { > pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, > vma, address); > - if (unlikely(!pages[i])) { > - while (--i >= 0) > + if (unlikely(!pages[i] || > + mem_cgroup_newpage_charge(pages[i], mm, > + GFP_KERNEL))) { > + if (pages[i]) > put_page(pages[i]); > + while (--i >= 0) { > + mem_cgroup_uncharge_page(pages[i]); > + put_page(pages[i]); > + } Maybe you can use batched-uncharge here. == mem_cgroup_uncharge_start() { do loop; } mem_cgroup_uncharge_end(); == Then, many atomic ops can be reduced. > kfree(pages); > ret |= VM_FAULT_OOM; > goto out; > @@ -455,8 +467,10 @@ out: > > out_free_pages: > spin_unlock(&mm->page_table_lock); > - for (i = 0; i < HPAGE_PMD_NR; i++) > + for (i = 0; i < HPAGE_PMD_NR; i++) { > + mem_cgroup_uncharge_page(pages[i]); > put_page(pages[i]); > + } here, too. > kfree(pages); > goto out; > } > @@ -501,14 +515,22 @@ int do_huge_pmd_wp_page(struct mm_struct > goto out; > } > > + if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { > + put_page(new_page); > + put_page(page); > + ret |= VM_FAULT_OOM; > + goto out; > + } > + > copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); > __SetPageUptodate(new_page); > > spin_lock(&mm->page_table_lock); > put_page(page); > - if (unlikely(!pmd_same(*pmd, orig_pmd))) > + if (unlikely(!pmd_same(*pmd, orig_pmd))) { > + mem_cgroup_uncharge_page(new_page); > put_page(new_page); > - else { > + } else { > pmd_t entry; > VM_BUG_ON(!PageHead(page)); > entry = mk_pmd(new_page, vma->vm_page_prot); Hmm...it seems there are no codes for move_account() hugepage in series. I think it needs some complicated work to walk page table. Thanks, -Kame From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail172.messagelabs.com (mail172.messagelabs.com [216.82.254.3]) by kanga.kvack.org (Postfix) with SMTP id 10C8F6B004A for ; Thu, 18 Nov 2010 20:25:25 -0500 (EST) Received: from m6.gw.fujitsu.co.jp ([10.0.50.76]) by fgwmail5.fujitsu.co.jp (Fujitsu Gateway) with ESMTP id oAJ1PN9w028313 for (envelope-from kamezawa.hiroyu@jp.fujitsu.com); Fri, 19 Nov 2010 10:25:23 +0900 Received: from smail (m6 [127.0.0.1]) by outgoing.m6.gw.fujitsu.co.jp (Postfix) with ESMTP id 05A4545DE52 for ; Fri, 19 Nov 2010 10:25:23 +0900 (JST) Received: from s6.gw.fujitsu.co.jp (s6.gw.fujitsu.co.jp [10.0.50.96]) by m6.gw.fujitsu.co.jp (Postfix) with ESMTP id D318045DE53 for ; Fri, 19 Nov 2010 10:25:22 +0900 (JST) Received: from s6.gw.fujitsu.co.jp (localhost.localdomain [127.0.0.1]) by s6.gw.fujitsu.co.jp (Postfix) with ESMTP id AA62C1DB8015 for ; Fri, 19 Nov 2010 10:25:22 +0900 (JST) Received: from m107.s.css.fujitsu.com (m107.s.css.fujitsu.com [10.249.87.107]) by s6.gw.fujitsu.co.jp (Postfix) with ESMTP id 4C1451DB8014 for ; Fri, 19 Nov 2010 10:25:22 +0900 (JST) Date: Fri, 19 Nov 2010 10:19:38 +0900 From: KAMEZAWA Hiroyuki Subject: Re: [PATCH 39 of 66] memcg huge memory Message-Id: <20101119101938.2edf889f.kamezawa.hiroyu@jp.fujitsu.com> In-Reply-To: <877d2f205026b0463450.1288798094@v2.random> References: <877d2f205026b0463450.1288798094@v2.random> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org To: Andrea Arcangeli Cc: linux-mm@kvack.org, Linus Torvalds , Andrew Morton , linux-kernel@vger.kernel.org, Marcelo Tosatti , Adam Litke , Avi Kivity , Hugh Dickins , Rik van Riel , Mel Gorman , Dave Hansen , Benjamin Herrenschmidt , Ingo Molnar , Mike Travis , Christoph Lameter , Chris Wright , bpicco@redhat.com, KOSAKI Motohiro , Balbir Singh , "Michael S. Tsirkin" , Peter Zijlstra , Johannes Weiner , Daisuke Nishimura , Chris Mason , Borislav Petkov List-ID: On Wed, 03 Nov 2010 16:28:14 +0100 Andrea Arcangeli wrote: > From: Andrea Arcangeli > > Add memcg charge/uncharge to hugepage faults in huge_memory.c. > > Signed-off-by: Andrea Arcangeli > Acked-by: Rik van Riel > --- > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -233,6 +233,7 @@ static int __do_huge_pmd_anonymous_page( > VM_BUG_ON(!PageCompound(page)); > pgtable = pte_alloc_one(mm, haddr); > if (unlikely(!pgtable)) { > + mem_cgroup_uncharge_page(page); > put_page(page); > return VM_FAULT_OOM; > } > @@ -243,6 +244,7 @@ static int __do_huge_pmd_anonymous_page( > spin_lock(&mm->page_table_lock); > if (unlikely(!pmd_none(*pmd))) { > spin_unlock(&mm->page_table_lock); > + mem_cgroup_uncharge_page(page); > put_page(page); > pte_free(mm, pgtable); > } else { > @@ -286,6 +288,10 @@ int do_huge_pmd_anonymous_page(struct mm > page = alloc_hugepage(transparent_hugepage_defrag(vma)); > if (unlikely(!page)) > goto out; > + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { > + put_page(page); > + goto out; > + } > > return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); > } > @@ -402,9 +408,15 @@ static int do_huge_pmd_wp_page_fallback( > for (i = 0; i < HPAGE_PMD_NR; i++) { > pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, > vma, address); > - if (unlikely(!pages[i])) { > - while (--i >= 0) > + if (unlikely(!pages[i] || > + mem_cgroup_newpage_charge(pages[i], mm, > + GFP_KERNEL))) { > + if (pages[i]) > put_page(pages[i]); > + while (--i >= 0) { > + mem_cgroup_uncharge_page(pages[i]); > + put_page(pages[i]); > + } Maybe you can use batched-uncharge here. == mem_cgroup_uncharge_start() { do loop; } mem_cgroup_uncharge_end(); == Then, many atomic ops can be reduced. > kfree(pages); > ret |= VM_FAULT_OOM; > goto out; > @@ -455,8 +467,10 @@ out: > > out_free_pages: > spin_unlock(&mm->page_table_lock); > - for (i = 0; i < HPAGE_PMD_NR; i++) > + for (i = 0; i < HPAGE_PMD_NR; i++) { > + mem_cgroup_uncharge_page(pages[i]); > put_page(pages[i]); > + } here, too. > kfree(pages); > goto out; > } > @@ -501,14 +515,22 @@ int do_huge_pmd_wp_page(struct mm_struct > goto out; > } > > + if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { > + put_page(new_page); > + put_page(page); > + ret |= VM_FAULT_OOM; > + goto out; > + } > + > copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); > __SetPageUptodate(new_page); > > spin_lock(&mm->page_table_lock); > put_page(page); > - if (unlikely(!pmd_same(*pmd, orig_pmd))) > + if (unlikely(!pmd_same(*pmd, orig_pmd))) { > + mem_cgroup_uncharge_page(new_page); > put_page(new_page); > - else { > + } else { > pmd_t entry; > VM_BUG_ON(!PageHead(page)); > entry = mk_pmd(new_page, vma->vm_page_prot); Hmm...it seems there are no codes for move_account() hugepage in series. I think it needs some complicated work to walk page table. Thanks, -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/ Don't email: email@kvack.org