From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753211AbdEXRU7 (ORCPT ); Wed, 24 May 2017 13:20:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:52624 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753103AbdEXRUv (ORCPT ); Wed, 24 May 2017 13:20:51 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com B267F80F9C Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=jglisse@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com B267F80F9C From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Dan Williams , "Kirill A . Shutemov" , John Hubbard , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Subject: [HMM 15/15] mm/migrate: allow migrate_vma() to alloc new page on empty entry v2 Date: Wed, 24 May 2017 13:20:24 -0400 Message-Id: <20170524172024.30810-16-jglisse@redhat.com> In-Reply-To: <20170524172024.30810-1-jglisse@redhat.com> References: <20170524172024.30810-1-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Wed, 24 May 2017 17:20:50 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This allow caller of migrate_vma() to allocate new page for empty CPU page table entry. It only support anoymous memory and it won't allow new page to be instance if userfaultfd is armed. This is useful to device driver that want to migrate a range of virtual address and would rather allocate new memory than having to fault later on. Changed since v1: - 5 level page table fix Signed-off-by: Jérôme Glisse --- mm/migrate.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 131 insertions(+), 4 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9e68399..d7c4db6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -2111,9 +2112,10 @@ static int migrate_vma_collect_hole(unsigned long start, struct mm_walk *walk) { struct migrate_vma *migrate = walk->private; - unsigned long addr, next; + unsigned long addr; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + migrate->cpages++; migrate->dst[migrate->npages] = 0; migrate->src[migrate->npages++] = 0; } @@ -2150,6 +2152,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pfn = pte_pfn(pte); if (pte_none(pte)) { + migrate->cpages++; mpfn = pfn = 0; goto next; } @@ -2463,6 +2466,118 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) } } +static void migrate_vma_insert_page(struct migrate_vma *migrate, + unsigned long addr, + struct page *page, + unsigned long *src, + unsigned long *dst) +{ + struct vm_area_struct *vma = migrate->vma; + struct mm_struct *mm = vma->vm_mm; + struct mem_cgroup *memcg; + spinlock_t *ptl; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + pte_t entry; + + /* Only allow populating anonymous memory */ + if (!vma_is_anonymous(vma)) + goto abort; + + pgdp = pgd_offset(mm, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + goto abort; + pudp = pud_alloc(mm, p4dp, addr); + if (!pudp) + goto abort; + pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + goto abort; + + if (pmd_trans_unstable(pmdp) || pmd_devmap(*pmdp)) + goto abort; + + /* + * Use pte_alloc() instead of pte_alloc_map(). We can't run + * pte_offset_map() on pmds where a huge pmd might be created + * from a different thread. + * + * pte_alloc_map() is safe to use under down_write(mmap_sem) or when + * parallel threads are excluded by other means. + * + * Here we only have down_read(mmap_sem). + */ + if (pte_alloc(mm, pmdp, addr)) + goto abort; + + /* See the comment in pte_alloc_one_map() */ + if (unlikely(pmd_trans_unstable(pmdp))) + goto abort; + + if (unlikely(anon_vma_prepare(vma))) + goto abort; + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) + goto abort; + + /* + * The memory barrier inside __SetPageUptodate makes sure that + * preceding stores to the page contents become visible before + * the set_pte_at() write. + */ + __SetPageUptodate(page); + + if (is_zone_device_page(page) && is_device_private_page(page)) { + swp_entry_t swp_entry; + + swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); + entry = swp_entry_to_pte(swp_entry); + } else { + entry = mk_pte(page, vma->vm_page_prot); + if (vma->vm_flags & VM_WRITE) + entry = pte_mkwrite(pte_mkdirty(entry)); + } + + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!pte_none(*ptep)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + /* + * Check for usefaultfd but do not deliver the fault. Instead, + * just back off. + */ + if (userfaultfd_missing(vma)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + inc_mm_counter(mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, addr, false); + mem_cgroup_commit_charge(page, memcg, false, false); + if (!is_zone_device_page(page)) + lru_cache_add_active_or_unevictable(page, vma); + set_pte_at(mm, addr, ptep, entry); + + /* Take a reference on the page */ + get_page(page); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, ptep); + pte_unmap_unlock(ptep, ptl); + *src = MIGRATE_PFN_MIGRATE; + return; + +abort: + *src &= ~MIGRATE_PFN_MIGRATE; +} + /* * migrate_vma_pages() - migrate meta-data from src page to dst page * @migrate: migrate struct containing all migration information @@ -2483,10 +2598,16 @@ static void migrate_vma_pages(struct migrate_vma *migrate) struct address_space *mapping; int r; - if (!page || !newpage) + if (!newpage) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; continue; - if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) + } else if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) { + if (!page) + migrate_vma_insert_page(migrate, addr, newpage, + &migrate->src[i], + &migrate->dst[i]); continue; + } mapping = page_mapping(page); @@ -2536,8 +2657,14 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); struct page *page = migrate_pfn_to_page(migrate->src[i]); - if (!page) + if (!page) { + if (newpage) { + unlock_page(newpage); + put_page(newpage); + } continue; + } + if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) { if (newpage) { unlock_page(newpage); -- 2.9.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qt0-f197.google.com (mail-qt0-f197.google.com [209.85.216.197]) by kanga.kvack.org (Postfix) with ESMTP id C66316B0374 for ; Wed, 24 May 2017 13:20:52 -0400 (EDT) Received: by mail-qt0-f197.google.com with SMTP id r58so69769651qtb.0 for ; Wed, 24 May 2017 10:20:52 -0700 (PDT) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id m131si113968qke.65.2017.05.24.10.20.51 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 24 May 2017 10:20:51 -0700 (PDT) From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Subject: [HMM 15/15] mm/migrate: allow migrate_vma() to alloc new page on empty entry v2 Date: Wed, 24 May 2017 13:20:24 -0400 Message-Id: <20170524172024.30810-16-jglisse@redhat.com> In-Reply-To: <20170524172024.30810-1-jglisse@redhat.com> References: <20170524172024.30810-1-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org List-ID: To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Dan Williams , "Kirill A . Shutemov" , John Hubbard , =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= This allow caller of migrate_vma() to allocate new page for empty CPU page table entry. It only support anoymous memory and it won't allow new page to be instance if userfaultfd is armed. This is useful to device driver that want to migrate a range of virtual address and would rather allocate new memory than having to fault later on. Changed since v1: - 5 level page table fix Signed-off-by: JA(C)rA'me Glisse --- mm/migrate.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 131 insertions(+), 4 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9e68399..d7c4db6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -2111,9 +2112,10 @@ static int migrate_vma_collect_hole(unsigned long start, struct mm_walk *walk) { struct migrate_vma *migrate = walk->private; - unsigned long addr, next; + unsigned long addr; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + migrate->cpages++; migrate->dst[migrate->npages] = 0; migrate->src[migrate->npages++] = 0; } @@ -2150,6 +2152,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pfn = pte_pfn(pte); if (pte_none(pte)) { + migrate->cpages++; mpfn = pfn = 0; goto next; } @@ -2463,6 +2466,118 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) } } +static void migrate_vma_insert_page(struct migrate_vma *migrate, + unsigned long addr, + struct page *page, + unsigned long *src, + unsigned long *dst) +{ + struct vm_area_struct *vma = migrate->vma; + struct mm_struct *mm = vma->vm_mm; + struct mem_cgroup *memcg; + spinlock_t *ptl; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + pte_t entry; + + /* Only allow populating anonymous memory */ + if (!vma_is_anonymous(vma)) + goto abort; + + pgdp = pgd_offset(mm, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + goto abort; + pudp = pud_alloc(mm, p4dp, addr); + if (!pudp) + goto abort; + pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + goto abort; + + if (pmd_trans_unstable(pmdp) || pmd_devmap(*pmdp)) + goto abort; + + /* + * Use pte_alloc() instead of pte_alloc_map(). We can't run + * pte_offset_map() on pmds where a huge pmd might be created + * from a different thread. + * + * pte_alloc_map() is safe to use under down_write(mmap_sem) or when + * parallel threads are excluded by other means. + * + * Here we only have down_read(mmap_sem). + */ + if (pte_alloc(mm, pmdp, addr)) + goto abort; + + /* See the comment in pte_alloc_one_map() */ + if (unlikely(pmd_trans_unstable(pmdp))) + goto abort; + + if (unlikely(anon_vma_prepare(vma))) + goto abort; + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) + goto abort; + + /* + * The memory barrier inside __SetPageUptodate makes sure that + * preceding stores to the page contents become visible before + * the set_pte_at() write. + */ + __SetPageUptodate(page); + + if (is_zone_device_page(page) && is_device_private_page(page)) { + swp_entry_t swp_entry; + + swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); + entry = swp_entry_to_pte(swp_entry); + } else { + entry = mk_pte(page, vma->vm_page_prot); + if (vma->vm_flags & VM_WRITE) + entry = pte_mkwrite(pte_mkdirty(entry)); + } + + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!pte_none(*ptep)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + /* + * Check for usefaultfd but do not deliver the fault. Instead, + * just back off. + */ + if (userfaultfd_missing(vma)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + inc_mm_counter(mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, addr, false); + mem_cgroup_commit_charge(page, memcg, false, false); + if (!is_zone_device_page(page)) + lru_cache_add_active_or_unevictable(page, vma); + set_pte_at(mm, addr, ptep, entry); + + /* Take a reference on the page */ + get_page(page); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, ptep); + pte_unmap_unlock(ptep, ptl); + *src = MIGRATE_PFN_MIGRATE; + return; + +abort: + *src &= ~MIGRATE_PFN_MIGRATE; +} + /* * migrate_vma_pages() - migrate meta-data from src page to dst page * @migrate: migrate struct containing all migration information @@ -2483,10 +2598,16 @@ static void migrate_vma_pages(struct migrate_vma *migrate) struct address_space *mapping; int r; - if (!page || !newpage) + if (!newpage) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; continue; - if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) + } else if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) { + if (!page) + migrate_vma_insert_page(migrate, addr, newpage, + &migrate->src[i], + &migrate->dst[i]); continue; + } mapping = page_mapping(page); @@ -2536,8 +2657,14 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); struct page *page = migrate_pfn_to_page(migrate->src[i]); - if (!page) + if (!page) { + if (newpage) { + unlock_page(newpage); + put_page(newpage); + } continue; + } + if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) { if (newpage) { unlock_page(newpage); -- 2.9.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org