From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752565Ab2KFJPc (ORCPT ); Tue, 6 Nov 2012 04:15:32 -0500 Received: from cantor2.suse.de ([195.135.220.15]:36268 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751372Ab2KFJPE (ORCPT ); Tue, 6 Nov 2012 04:15:04 -0500 From: Mel Gorman To: Peter Zijlstra , Andrea Arcangeli , Ingo Molnar Cc: Rik van Riel , Johannes Weiner , Hugh Dickins , Thomas Gleixner , Linus Torvalds , Andrew Morton , Linux-MM , LKML , Mel Gorman Subject: [PATCH 05/19] mm: numa: pte_numa() and pmd_numa() Date: Tue, 6 Nov 2012 09:14:41 +0000 Message-Id: <1352193295-26815-6-git-send-email-mgorman@suse.de> X-Mailer: git-send-email 1.7.9.2 In-Reply-To: <1352193295-26815-1-git-send-email-mgorman@suse.de> References: <1352193295-26815-1-git-send-email-mgorman@suse.de> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Andrea Arcangeli Implement pte_numa and pmd_numa. We must atomically set the numa bit and clear the present bit to define a pte_numa or pmd_numa. Once a pte or pmd has been set as pte_numa or pmd_numa, the next time a thread touches a virtual address in the corresponding virtual range, a NUMA hinting page fault will trigger. The NUMA hinting page fault will clear the NUMA bit and set the present bit again to resolve the page fault. The expectation is that a NUMA hinting page fault is used as part of a placement policy that decides if a page should remain on the current node or migrated to a different node. Acked-by: Rik van Riel Signed-off-by: Andrea Arcangeli Signed-off-by: Mel Gorman --- arch/x86/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++++++++++++++-- include/asm-generic/pgtable.h | 12 ++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a1f780d..e075d57 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -404,7 +404,8 @@ static inline int pte_same(pte_t a, pte_t b) static inline int pte_present(pte_t a) { - return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | + _PAGE_NUMA); } static inline int pte_hidden(pte_t pte) @@ -420,7 +421,63 @@ static inline int pmd_present(pmd_t pmd) * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ - return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | + _PAGE_NUMA); +} + +#ifdef CONFIG_BALANCE_NUMA +/* + * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the + * same bit too). It's set only when _PAGE_PRESET is not set and it's + * never set if _PAGE_PRESENT is set. + * + * pte/pmd_present() returns true if pte/pmd_numa returns true. Page + * fault triggers on those regions if pte/pmd_numa returns true + * (because _PAGE_PRESENT is not set). + */ +static inline int pte_numa(pte_t pte) +{ + return (pte_flags(pte) & + (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; +} + +static inline int pmd_numa(pmd_t pmd) +{ + return (pmd_flags(pmd) & + (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; +} +#endif + +/* + * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically + * because they're called by the NUMA hinting minor page fault. If we + * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler + * would be forced to set it later while filling the TLB after we + * return to userland. That would trigger a second write to memory + * that we optimize away by setting _PAGE_ACCESSED here. + */ +static inline pte_t pte_mknonnuma(pte_t pte) +{ + pte = pte_clear_flags(pte, _PAGE_NUMA); + return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); +} + +static inline pmd_t pmd_mknonnuma(pmd_t pmd) +{ + pmd = pmd_clear_flags(pmd, _PAGE_NUMA); + return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); +} + +static inline pte_t pte_mknuma(pte_t pte) +{ + pte = pte_set_flags(pte, _PAGE_NUMA); + return pte_clear_flags(pte, _PAGE_PRESENT); +} + +static inline pmd_t pmd_mknuma(pmd_t pmd) +{ + pmd = pmd_set_flags(pmd, _PAGE_NUMA); + return pmd_clear_flags(pmd, _PAGE_PRESENT); } static inline int pmd_none(pmd_t pmd) @@ -479,6 +536,10 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) static inline int pmd_bad(pmd_t pmd) { +#ifdef CONFIG_BALANCE_NUMA + if (pmd_numa(pmd)) + return 0; +#endif return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b36ce40..896667e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -554,6 +554,18 @@ static inline int pmd_trans_unstable(pmd_t *pmd) #endif } +#ifndef CONFIG_BALANCE_NUMA +static inline int pte_numa(pte_t pte) +{ + return 0; +} + +static inline int pmd_numa(pmd_t pmd) +{ + return 0; +} +#endif /* CONFIG_BALANCE_NUMA */ + #endif /* CONFIG_MMU */ #endif /* !__ASSEMBLY__ */ -- 1.7.9.2