From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1161196AbcFOUQ4 (ORCPT ); Wed, 15 Jun 2016 16:16:56 -0400 Received: from mga04.intel.com ([192.55.52.120]:19464 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933079AbcFOUHC (ORCPT ); Wed, 15 Jun 2016 16:07:02 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.26,477,1459839600"; d="scan'208";a="998413910" From: "Kirill A. Shutemov" To: Hugh Dickins , Andrea Arcangeli , Andrew Morton Cc: Dave Hansen , Vlastimil Babka , Christoph Lameter , Naoya Horiguchi , Jerome Marchand , Yang Shi , Sasha Levin , Andres Lagar-Cavilla , Ning Qu , linux-kernel@vger.kernel.org, linux-mm@kvack.org, linux-fsdevel@vger.kernel.org, Ebru Akagunduz , "Kirill A. Shutemov" Subject: [PATCHv9-rebased2 11/37] mm: introduce do_set_pmd() Date: Wed, 15 Jun 2016 23:06:16 +0300 Message-Id: <1466021202-61880-12-git-send-email-kirill.shutemov@linux.intel.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1466021202-61880-1-git-send-email-kirill.shutemov@linux.intel.com> References: <1465222029-45942-1-git-send-email-kirill.shutemov@linux.intel.com> <1466021202-61880-1-git-send-email-kirill.shutemov@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org With postponed page table allocation we have chance to setup huge pages. do_set_pte() calls do_set_pmd() if following criteria met: - page is compound; - pmd entry in pmd_none(); - vma has suitable size and alignment; Signed-off-by: Kirill A. Shutemov --- include/linux/huge_mm.h | 2 ++ mm/huge_memory.c | 5 ---- mm/memory.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++- mm/migrate.c | 3 +-- 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 670ea0e3d138..3ef07cd7730c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -143,6 +143,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct page *get_huge_zero_page(void); void put_huge_zero_page(void); +#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 05088abe7576..b24b7993c369 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -795,11 +795,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) -{ - return pmd_mkhuge(mk_pmd(page, prot)); -} - static inline struct list_head *page_deferred_list(struct page *page) { /* diff --git a/mm/memory.c b/mm/memory.c index 02a5491f0f17..6c0ebbc680d4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2921,6 +2921,66 @@ map_pte: return 0; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, + unsigned long haddr) +{ + if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != + (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) + return false; + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return false; + return true; +} + +static int do_set_pmd(struct fault_env *fe, struct page *page) +{ + struct vm_area_struct *vma = fe->vma; + bool write = fe->flags & FAULT_FLAG_WRITE; + unsigned long haddr = fe->address & HPAGE_PMD_MASK; + pmd_t entry; + int i, ret; + + if (!transhuge_vma_suitable(vma, haddr)) + return VM_FAULT_FALLBACK; + + ret = VM_FAULT_FALLBACK; + page = compound_head(page); + + fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); + if (unlikely(!pmd_none(*fe->pmd))) + goto out; + + for (i = 0; i < HPAGE_PMD_NR; i++) + flush_icache_page(vma, page + i); + + entry = mk_huge_pmd(page, vma->vm_page_prot); + if (write) + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + + add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); + page_add_file_rmap(page, true); + + set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); + + update_mmu_cache_pmd(vma, haddr, fe->pmd); + + /* fault is handled */ + ret = 0; +out: + spin_unlock(fe->ptl); + return ret; +} +#else +static int do_set_pmd(struct fault_env *fe, struct page *page) +{ + BUILD_BUG(); + return 0; +} +#endif + /** * alloc_set_pte - setup new PTE entry for given page and add reverse page * mapping. If needed, the fucntion allocates page table or use pre-allocated. @@ -2940,9 +3000,19 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, struct vm_area_struct *vma = fe->vma; bool write = fe->flags & FAULT_FLAG_WRITE; pte_t entry; + int ret; + + if (pmd_none(*fe->pmd) && PageTransCompound(page)) { + /* THP on COW? */ + VM_BUG_ON_PAGE(memcg, page); + + ret = do_set_pmd(fe, page); + if (ret != VM_FAULT_FALLBACK) + return ret; + } if (!fe->pte) { - int ret = pte_alloc_one_map(fe); + ret = pte_alloc_one_map(fe); if (ret) return ret; } diff --git a/mm/migrate.c b/mm/migrate.c index 7e6e9375d654..c7531ccf65f4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1984,8 +1984,7 @@ fail_putback: } orig_entry = *pmd; - entry = mk_pmd(new_page, vma->vm_page_prot); - entry = pmd_mkhuge(entry); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); /* -- 2.8.1