From mboxrd@z Thu Jan 1 00:00:00 1970 References: <01998876-7e1e-2b84-cc65-8cda310083f2@siemens.com> From: Philippe Gerum Subject: Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range In-reply-to: <01998876-7e1e-2b84-cc65-8cda310083f2@siemens.com> Date: Sat, 06 Mar 2021 15:58:22 +0100 Message-ID: <87mtvgwcq9.fsf@xenomai.org> MIME-Version: 1.0 Content-Type: text/plain List-Id: Discussions about the Xenomai project List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Jan Kiszka Cc: Xenomai , Greg Gallagher Jan Kiszka writes: > On 05.03.21 12:38, Jan Kiszka via Xenomai wrote: >> From: Jan Kiszka >> >> This is still needed to avoid that a real-time parent seems minor faults >> after forking for shared pages until they are finally unshared. >> >> This missing support became visible in Xenomai when running the complete >> smokey test suite on certain architectures/config combinations. >> >> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range") >> Signed-off-by: Jan Kiszka >> --- >> >> For discussion. If there is a better pattern in reach, I'm happy to go >> that path as well. >> >> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't >> in sync with noarch, you will need manual merging). Possibly, it already >> applies to 5.4 as well, didn't check yet. >> >> mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- >> 1 file changed, 69 insertions(+), 7 deletions(-) >> >> diff --git a/mm/memory.c b/mm/memory.c >> index 4cc7c72a0b7e..f102f4de2213 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn); >> >> unsigned long highest_memmap_pfn __read_mostly; >> >> +static bool cow_user_page(struct page *dst, struct page *src, >> + struct vm_fault *vmf); >> + >> /* >> * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() >> */ >> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, >> >> static inline unsigned long >> copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, >> - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >> - unsigned long addr, int *rss) >> + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >> + unsigned long addr, int *rss, pmd_t *src_pmd, >> + struct page *uncow_page) >> { >> unsigned long vm_flags = vma->vm_flags; >> pte_t pte = *src_pte; >> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, >> * in the parent and the child >> */ >> if (is_cow_mapping(vm_flags) && pte_write(pte)) { >> +#ifdef CONFIG_IPIPE >> + if (uncow_page) { >> + struct page *old_page = vm_normal_page(vma, addr, pte); >> + struct vm_fault vmf; >> + >> + vmf.vma = vma; >> + vmf.address = addr; >> + vmf.orig_pte = pte; >> + vmf.pmd = src_pmd; >> + >> + if (cow_user_page(uncow_page, old_page, &vmf)) { >> + pte = mk_pte(uncow_page, vma->vm_page_prot); >> + >> + if (vm_flags & VM_SHARED) >> + pte = pte_mkclean(pte); >> + pte = pte_mkold(pte); >> + >> + page_add_new_anon_rmap(uncow_page, vma, addr, >> + false); >> + rss[!!PageAnon(uncow_page)]++; >> + goto out_set_pte; >> + } else { >> + /* unexpected: source page no longer present */ >> + WARN_ON_ONCE(1); >> + } >> + } >> +#endif /* CONFIG_IPIPE */ >> ptep_set_wrprotect(src_mm, addr, src_pte); >> pte = pte_wrprotect(pte); >> } >> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, >> int progress = 0; >> int rss[NR_MM_COUNTERS]; >> swp_entry_t entry = (swp_entry_t){0}; >> - >> + struct page *uncow_page = NULL; >> +#ifdef CONFIG_IPIPE >> + int do_cow_break = 0; >> +again: >> + if (do_cow_break) { >> + uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); >> + if (uncow_page == NULL) >> + return -ENOMEM; >> + do_cow_break = 0; >> + } >> +#else >> again: >> +#endif >> init_rss_vec(rss); >> >> dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); >> - if (!dst_pte) >> + if (!dst_pte) { >> + if (uncow_page) >> + put_page(uncow_page); >> return -ENOMEM; >> + } >> src_pte = pte_offset_map(src_pmd, addr); >> src_ptl = pte_lockptr(src_mm, src_pmd); >> spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); >> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, >> progress++; >> continue; >> } >> +#ifdef CONFIG_IPIPE >> + if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { >> + if (is_cow_mapping(vma->vm_flags) && >> + test_bit(MMF_VM_PINNED, &src_mm->flags) && >> + ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { >> + arch_leave_lazy_mmu_mode(); >> + spin_unlock(src_ptl); >> + pte_unmap(src_pte); >> + add_mm_rss_vec(dst_mm, rss); >> + pte_unmap_unlock(dst_pte, dst_ptl); >> + cond_resched(); >> + do_cow_break = 1; >> + goto again; >> + } >> + } >> +#endif >> entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, >> - vma, addr, rss); >> + vma, addr, rss, src_pmd, uncow_page); >> + uncow_page = NULL; >> if (entry.val) >> break; >> progress += 8; >> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, >> return same; >> } >> >> -static inline bool cow_user_page(struct page *dst, struct page *src, >> - struct vm_fault *vmf) >> +static bool cow_user_page(struct page *dst, struct page *src, >> + struct vm_fault *vmf) >> { >> bool ret; >> void *kaddr; >> > > With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see > > https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670 > > I'm inclined to take this for now so that we can move forward with other > topics. Any comments? > Fine with me. This code can be simplified, working on it. I'll channel this through Dovetail. -- Philippe.