From mboxrd@z Thu Jan 1 00:00:00 1970 Subject: Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range References: <01998876-7e1e-2b84-cc65-8cda310083f2@siemens.com> <87mtvgwcq9.fsf@xenomai.org> From: Jan Kiszka Message-ID: <7e3057f2-d642-1207-902b-c84cf21e4e91@siemens.com> Date: Mon, 8 Mar 2021 08:32:11 +0100 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 8bit List-Id: Discussions about the Xenomai project List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Greg Gallagher Cc: Philippe Gerum , Xenomai On 08.03.21 08:02, Greg Gallagher wrote: > > > On Sun, Mar 7, 2021 at 8:09 AM Greg Gallagher > wrote: > > > > On Sun, Mar 7, 2021 at 7:56 AM Jan Kiszka > wrote: > > On 06.03.21 15:58, Philippe Gerum wrote: > > > > Jan Kiszka > writes: > > > >> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote: > >>> From: Jan Kiszka > > >>> > >>> This is still needed to avoid that a real-time parent seems > minor faults > >>> after forking for shared pages until they are finally unshared. > >>> > >>> This missing support became visible in Xenomai when running > the complete > >>> smokey test suite on certain architectures/config combinations. > >>> > >>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from > copy_pte_range") > >>> Signed-off-by: Jan Kiszka > > >>> --- > >>> > >>> For discussion. If there is a better pattern in reach, I'm > happy to go > >>> that path as well. > >>> > >>> Otherwise, this should go into our 4.19 branches (Greg, your > arm64 isn't > >>> in sync with noarch, you will need manual merging). > Possibly, it already > >>> applies to 5.4 as well, didn't check yet. > >>> > >>>  mm/memory.c | 76 > ++++++++++++++++++++++++++++++++++++++++++++++++----- > >>>  1 file changed, 69 insertions(+), 7 deletions(-) > >>> > >>> diff --git a/mm/memory.c b/mm/memory.c > >>> index 4cc7c72a0b7e..f102f4de2213 100644 > >>> --- a/mm/memory.c > >>> +++ b/mm/memory.c > >>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn); > >>>  > >>>  unsigned long highest_memmap_pfn __read_mostly; > >>>  > >>> +static bool cow_user_page(struct page *dst, struct page *src, > >>> +                     struct vm_fault *vmf); > >>> + > >>>  /* > >>>   * CONFIG_MMU architectures set up ZERO_PAGE in their > paging_init() > >>>   */ > >>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct > vm_area_struct *vma, unsigned long addr, > >>>  > >>>  static inline unsigned long > >>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct > *src_mm, > >>> -           pte_t *dst_pte, pte_t *src_pte, struct > vm_area_struct *vma, > >>> -           unsigned long addr, int *rss) > >>> +        pte_t *dst_pte, pte_t *src_pte, struct > vm_area_struct *vma, > >>> +        unsigned long addr, int *rss, pmd_t *src_pmd, > >>> +        struct page *uncow_page) > >>>  { > >>>     unsigned long vm_flags = vma->vm_flags; > >>>     pte_t pte = *src_pte; > >>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct > *dst_mm, struct mm_struct *src_mm, > >>>      * in the parent and the child > >>>      */ > >>>     if (is_cow_mapping(vm_flags) && pte_write(pte)) { > >>> +#ifdef CONFIG_IPIPE > >>> +           if (uncow_page) { > >>> +                   struct page *old_page = > vm_normal_page(vma, addr, pte); > >>> +                   struct vm_fault vmf; > >>> + > >>> +                   vmf.vma = vma; > >>> +                   vmf.address = addr; > >>> +                   vmf.orig_pte = pte; > >>> +                   vmf.pmd = src_pmd; > >>> + > >>> +                   if (cow_user_page(uncow_page, old_page, > &vmf)) { > >>> +                           pte = mk_pte(uncow_page, > vma->vm_page_prot); > >>> + > >>> +                           if (vm_flags & VM_SHARED) > >>> +                                   pte = pte_mkclean(pte); > >>> +                           pte = pte_mkold(pte); > >>> + > >>> +                          >  page_add_new_anon_rmap(uncow_page, vma, addr, > > > >>> +                                                  false); > >>> +                           rss[!!PageAnon(uncow_page)]++; > >>> +                           goto out_set_pte; > >>> +                   } else { > >>> +                           /* unexpected: source page no > longer present */ > >>> +                           WARN_ON_ONCE(1); > >>> +                   } > >>> +           } > >>> +#endif /* CONFIG_IPIPE */ > >>>             ptep_set_wrprotect(src_mm, addr, src_pte); > >>>             pte = pte_wrprotect(pte); > >>>     } > >>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct > mm_struct *dst_mm, struct mm_struct *src_mm, > >>>     int progress = 0; > >>>     int rss[NR_MM_COUNTERS]; > >>>     swp_entry_t entry = (swp_entry_t){0}; > >>> - > >>> +   struct page *uncow_page = NULL; > >>> +#ifdef CONFIG_IPIPE > >>> +   int do_cow_break = 0; > >>> +again: > >>> +   if (do_cow_break) { > >>> +           uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, > addr); > >>> +           if (uncow_page == NULL) > >>> +                   return -ENOMEM; > >>> +           do_cow_break = 0; > >>> +   } > >>> +#else > >>>  again: > >>> +#endif > >>>     init_rss_vec(rss); > >>>  > >>>     dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, > &dst_ptl); > >>> -   if (!dst_pte) > >>> +   if (!dst_pte) { > >>> +           if (uncow_page) > >>> +                   put_page(uncow_page); > >>>             return -ENOMEM; > >>> +   } > >>>     src_pte = pte_offset_map(src_pmd, addr); > >>>     src_ptl = pte_lockptr(src_mm, src_pmd); > >>>     spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); > >>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct > mm_struct *dst_mm, struct mm_struct *src_mm, > >>>                     progress++; > >>>                     continue; > >>>             } > >>> +#ifdef CONFIG_IPIPE > >>> +           if (likely(uncow_page == NULL) && > likely(pte_present(*src_pte))) { > >>> +                   if (is_cow_mapping(vma->vm_flags) && > >>> +                       test_bit(MMF_VM_PINNED, > &src_mm->flags) && > >>> +                       ((vma->vm_flags|src_mm->def_flags) & > VM_LOCKED)) { > >>> +                           arch_leave_lazy_mmu_mode(); > >>> +                           spin_unlock(src_ptl); > >>> +                           pte_unmap(src_pte); > >>> +                           add_mm_rss_vec(dst_mm, rss); > >>> +                           pte_unmap_unlock(dst_pte, dst_ptl); > >>> +                           cond_resched(); > >>> +                           do_cow_break = 1; > >>> +                           goto again; > >>> +                   } > >>> +           } > >>> +#endif > >>>             entry.val = copy_one_pte(dst_mm, src_mm, > dst_pte, src_pte, > >>> -                                                   vma, > addr, rss); > >>> +                                    vma, addr, rss, > src_pmd, uncow_page); > >>> +           uncow_page = NULL; > >>>             if (entry.val) > >>>                     break; > >>>             progress += 8; > >>> @@ -2348,8 +2410,8 @@ static inline int > pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, > >>>     return same; > >>>  } > >>>  > >>> -static inline bool cow_user_page(struct page *dst, struct > page *src, > >>> -                            struct vm_fault *vmf) > >>> +static bool cow_user_page(struct page *dst, struct page *src, > >>> +                     struct vm_fault *vmf) > >>>  { > >>>     bool ret; > >>>     void *kaddr; > >>> > >> > >> With this applied to ipipe-4.19 and 5.4 queues, related > issues are gone, see > >> > >> > https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670 > > >> > >> I'm inclined to take this for now so that we can move forward > with other > >> topics. Any comments? > >> > > > > Fine with me. This code can be simplified, working on it. I'll > channel > > this through Dovetail. > > > > OK, applied. > > Greg, you can find updated no-arch branches for 4.19 and 5.4. > > > Did this commit go into the no-arch tree? I'll have new arm/arm64 5.4 > and 4.19 releases this week (aiming for Wednesday). > Yes, but not in the form sent to the list for forward-merging trees (like x86). The noarch trees contains the fix folded into the introducing commits, rebased over the respective stable branches. Jan -- Siemens AG, T RDA IOT Corporate Competence Center Embedded Linux