All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
@ 2021-03-05 11:38 Jan Kiszka
  2021-03-06 13:14 ` Jan Kiszka
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2021-03-05 11:38 UTC (permalink / raw)
  To: Xenomai, Philippe Gerum

From: Jan Kiszka <jan.kiszka@siemens.com>

This is still needed to avoid that a real-time parent seems minor faults
after forking for shared pages until they are finally unshared.

This missing support became visible in Xenomai when running the complete
smokey test suite on certain architectures/config combinations.

Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

For discussion. If there is a better pattern in reach, I'm happy to go 
that path as well.

Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
in sync with noarch, you will need manual merging). Possibly, it already 
applies to 5.4 as well, didn't check yet.

 mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 4cc7c72a0b7e..f102f4de2213 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
 
 unsigned long highest_memmap_pfn __read_mostly;
 
+static bool cow_user_page(struct page *dst, struct page *src,
+			  struct vm_fault *vmf);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, pmd_t *src_pmd,
+	     struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			struct vm_fault vmf;
+
+			vmf.vma = vma;
+			vmf.address = addr;
+			vmf.orig_pte = pte;
+			vmf.pmd = src_pmd;
+
+			if (cow_user_page(uncow_page, old_page, &vmf)) {
+				pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+				if (vm_flags & VM_SHARED)
+					pte = pte_mkclean(pte);
+				pte = pte_mkold(pte);
+
+				page_add_new_anon_rmap(uncow_page, vma, addr,
+						       false);
+				rss[!!PageAnon(uncow_page)]++;
+				goto out_set_pte;
+			} else {
+				/* unexpected: source page no longer present */
+				WARN_ON_ONCE(1);
+			}
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	int progress = 0;
 	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
-
+	struct page *uncow_page = NULL;
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
+again:
+	if (do_cow_break) {
+		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (uncow_page == NULL)
+			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
 again:
+#endif
 	init_rss_vec(rss);
 
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			put_page(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			progress++;
 			continue;
 		}
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags) &&
+			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
+			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
+				arch_leave_lazy_mmu_mode();
+				spin_unlock(src_ptl);
+				pte_unmap(src_pte);
+				add_mm_rss_vec(dst_mm, rss);
+				pte_unmap_unlock(dst_pte, dst_ptl);
+				cond_resched();
+				do_cow_break = 1;
+				goto again;
+			}
+		}
+#endif
 		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-							vma, addr, rss);
+					 vma, addr, rss, src_pmd, uncow_page);
+		uncow_page = NULL;
 		if (entry.val)
 			break;
 		progress += 8;
@@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
 	return same;
 }
 
-static inline bool cow_user_page(struct page *dst, struct page *src,
-				 struct vm_fault *vmf)
+static bool cow_user_page(struct page *dst, struct page *src,
+			  struct vm_fault *vmf)
 {
 	bool ret;
 	void *kaddr;
-- 
2.26.2


-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-05 11:38 [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range Jan Kiszka
@ 2021-03-06 13:14 ` Jan Kiszka
  2021-03-06 13:33   ` Henning Schild
  2021-03-06 14:58   ` Philippe Gerum
  0 siblings, 2 replies; 10+ messages in thread
From: Jan Kiszka @ 2021-03-06 13:14 UTC (permalink / raw)
  To: Xenomai, Philippe Gerum

On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> This is still needed to avoid that a real-time parent seems minor faults
> after forking for shared pages until they are finally unshared.
> 
> This missing support became visible in Xenomai when running the complete
> smokey test suite on certain architectures/config combinations.
> 
> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
> 
> For discussion. If there is a better pattern in reach, I'm happy to go 
> that path as well.
> 
> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
> in sync with noarch, you will need manual merging). Possibly, it already 
> applies to 5.4 as well, didn't check yet.
> 
>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 69 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index 4cc7c72a0b7e..f102f4de2213 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>  
>  unsigned long highest_memmap_pfn __read_mostly;
>  
> +static bool cow_user_page(struct page *dst, struct page *src,
> +			  struct vm_fault *vmf);
> +
>  /*
>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>   */
> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>  
>  static inline unsigned long
>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> -		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
> -		unsigned long addr, int *rss)
> +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
> +	     unsigned long addr, int *rss, pmd_t *src_pmd,
> +	     struct page *uncow_page)
>  {
>  	unsigned long vm_flags = vma->vm_flags;
>  	pte_t pte = *src_pte;
> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	 * in the parent and the child
>  	 */
>  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> +#ifdef CONFIG_IPIPE
> +		if (uncow_page) {
> +			struct page *old_page = vm_normal_page(vma, addr, pte);
> +			struct vm_fault vmf;
> +
> +			vmf.vma = vma;
> +			vmf.address = addr;
> +			vmf.orig_pte = pte;
> +			vmf.pmd = src_pmd;
> +
> +			if (cow_user_page(uncow_page, old_page, &vmf)) {
> +				pte = mk_pte(uncow_page, vma->vm_page_prot);
> +
> +				if (vm_flags & VM_SHARED)
> +					pte = pte_mkclean(pte);
> +				pte = pte_mkold(pte);
> +
> +				page_add_new_anon_rmap(uncow_page, vma, addr,
> +						       false);
> +				rss[!!PageAnon(uncow_page)]++;
> +				goto out_set_pte;
> +			} else {
> +				/* unexpected: source page no longer present */
> +				WARN_ON_ONCE(1);
> +			}
> +		}
> +#endif /* CONFIG_IPIPE */
>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>  		pte = pte_wrprotect(pte);
>  	}
> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	int progress = 0;
>  	int rss[NR_MM_COUNTERS];
>  	swp_entry_t entry = (swp_entry_t){0};
> -
> +	struct page *uncow_page = NULL;
> +#ifdef CONFIG_IPIPE
> +	int do_cow_break = 0;
> +again:
> +	if (do_cow_break) {
> +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
> +		if (uncow_page == NULL)
> +			return -ENOMEM;
> +		do_cow_break = 0;
> +	}
> +#else
>  again:
> +#endif
>  	init_rss_vec(rss);
>  
>  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
> -	if (!dst_pte)
> +	if (!dst_pte) {
> +		if (uncow_page)
> +			put_page(uncow_page);
>  		return -ENOMEM;
> +	}
>  	src_pte = pte_offset_map(src_pmd, addr);
>  	src_ptl = pte_lockptr(src_mm, src_pmd);
>  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  			progress++;
>  			continue;
>  		}
> +#ifdef CONFIG_IPIPE
> +		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
> +			if (is_cow_mapping(vma->vm_flags) &&
> +			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
> +			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
> +				arch_leave_lazy_mmu_mode();
> +				spin_unlock(src_ptl);
> +				pte_unmap(src_pte);
> +				add_mm_rss_vec(dst_mm, rss);
> +				pte_unmap_unlock(dst_pte, dst_ptl);
> +				cond_resched();
> +				do_cow_break = 1;
> +				goto again;
> +			}
> +		}
> +#endif
>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
> -							vma, addr, rss);
> +					 vma, addr, rss, src_pmd, uncow_page);
> +		uncow_page = NULL;
>  		if (entry.val)
>  			break;
>  		progress += 8;
> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>  	return same;
>  }
>  
> -static inline bool cow_user_page(struct page *dst, struct page *src,
> -				 struct vm_fault *vmf)
> +static bool cow_user_page(struct page *dst, struct page *src,
> +			  struct vm_fault *vmf)
>  {
>  	bool ret;
>  	void *kaddr;
> 

With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see

https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670

I'm inclined to take this for now so that we can move forward with other
topics. Any comments?

Jan

-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-06 13:14 ` Jan Kiszka
@ 2021-03-06 13:33   ` Henning Schild
  2021-03-06 14:58   ` Philippe Gerum
  1 sibling, 0 replies; 10+ messages in thread
From: Henning Schild @ 2021-03-06 13:33 UTC (permalink / raw)
  To: Jan Kiszka via Xenomai

Am Sat, 6 Mar 2021 14:14:30 +0100
schrieb Jan Kiszka via Xenomai <xenomai@xenomai.org>:

> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
> > From: Jan Kiszka <jan.kiszka@siemens.com>
> > 
> > This is still needed to avoid that a real-time parent seems minor
> > faults after forking for shared pages until they are finally
> > unshared.
> > 
> > This missing support became visible in Xenomai when running the
> > complete smokey test suite on certain architectures/config
> > combinations.
> > 
> > Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
> > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> > ---
> > 
> > For discussion. If there is a better pattern in reach, I'm happy to
> > go that path as well.
> > 
> > Otherwise, this should go into our 4.19 branches (Greg, your arm64
> > isn't in sync with noarch, you will need manual merging). Possibly,
> > it already applies to 5.4 as well, didn't check yet.
> > 
> >  mm/memory.c | 76
> > ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file
> > changed, 69 insertions(+), 7 deletions(-)
> > 
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 4cc7c72a0b7e..f102f4de2213 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
> >  
> >  unsigned long highest_memmap_pfn __read_mostly;
> >  
> > +static bool cow_user_page(struct page *dst, struct page *src,
> > +			  struct vm_fault *vmf);
> > +
> >  /*
> >   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
> >   */
> > @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct
> > vm_area_struct *vma, unsigned long addr, 
> >  static inline unsigned long
> >  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > -		pte_t *dst_pte, pte_t *src_pte, struct
> > vm_area_struct *vma,
> > -		unsigned long addr, int *rss)
> > +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct
> > *vma,
> > +	     unsigned long addr, int *rss, pmd_t *src_pmd,
> > +	     struct page *uncow_page)
> >  {
> >  	unsigned long vm_flags = vma->vm_flags;
> >  	pte_t pte = *src_pte;
> > @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm,
> > struct mm_struct *src_mm,
> >  	 * in the parent and the child
> >  	 */
> >  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> > +#ifdef CONFIG_IPIPE
> > +		if (uncow_page) {
> > +			struct page *old_page =
> > vm_normal_page(vma, addr, pte);
> > +			struct vm_fault vmf;
> > +
> > +			vmf.vma = vma;
> > +			vmf.address = addr;
> > +			vmf.orig_pte = pte;
> > +			vmf.pmd = src_pmd;
> > +
> > +			if (cow_user_page(uncow_page, old_page,
> > &vmf)) {
> > +				pte = mk_pte(uncow_page,
> > vma->vm_page_prot); +
> > +				if (vm_flags & VM_SHARED)
> > +					pte = pte_mkclean(pte);
> > +				pte = pte_mkold(pte);
> > +
> > +				page_add_new_anon_rmap(uncow_page,
> > vma, addr,
> > +						       false);
> > +				rss[!!PageAnon(uncow_page)]++;
> > +				goto out_set_pte;
> > +			} else {
> > +				/* unexpected: source page no
> > longer present */
> > +				WARN_ON_ONCE(1);
> > +			}
> > +		}
> > +#endif /* CONFIG_IPIPE */
> >  		ptep_set_wrprotect(src_mm, addr, src_pte);
> >  		pte = pte_wrprotect(pte);
> >  	}
> > @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct
> > *dst_mm, struct mm_struct *src_mm, int progress = 0;
> >  	int rss[NR_MM_COUNTERS];
> >  	swp_entry_t entry = (swp_entry_t){0};
> > -
> > +	struct page *uncow_page = NULL;
> > +#ifdef CONFIG_IPIPE
> > +	int do_cow_break = 0;
> > +again:
> > +	if (do_cow_break) {
> > +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma,
> > addr);
> > +		if (uncow_page == NULL)
> > +			return -ENOMEM;
> > +		do_cow_break = 0;
> > +	}
> > +#else
> >  again:
> > +#endif
> >  	init_rss_vec(rss);
> >  
> >  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr,
> > &dst_ptl);
> > -	if (!dst_pte)
> > +	if (!dst_pte) {
> > +		if (uncow_page)
> > +			put_page(uncow_page);
> >  		return -ENOMEM;
> > +	}
> >  	src_pte = pte_offset_map(src_pmd, addr);
> >  	src_ptl = pte_lockptr(src_mm, src_pmd);
> >  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
> > @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct
> > *dst_mm, struct mm_struct *src_mm, progress++;
> >  			continue;
> >  		}
> > +#ifdef CONFIG_IPIPE
> > +		if (likely(uncow_page == NULL) &&
> > likely(pte_present(*src_pte))) {
> > +			if (is_cow_mapping(vma->vm_flags) &&
> > +			    test_bit(MMF_VM_PINNED,
> > &src_mm->flags) &&
> > +			    ((vma->vm_flags|src_mm->def_flags) &
> > VM_LOCKED)) {
> > +				arch_leave_lazy_mmu_mode();
> > +				spin_unlock(src_ptl);
> > +				pte_unmap(src_pte);
> > +				add_mm_rss_vec(dst_mm, rss);
> > +				pte_unmap_unlock(dst_pte, dst_ptl);
> > +				cond_resched();
> > +				do_cow_break = 1;
> > +				goto again;
> > +			}
> > +		}
> > +#endif
> >  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte,
> > src_pte,
> > -							vma, addr,
> > rss);
> > +					 vma, addr, rss, src_pmd,
> > uncow_page);
> > +		uncow_page = NULL;
> >  		if (entry.val)
> >  			break;
> >  		progress += 8;
> > @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct
> > mm_struct *mm, pmd_t *pmd, return same;
> >  }
> >  
> > -static inline bool cow_user_page(struct page *dst, struct page
> > *src,
> > -				 struct vm_fault *vmf)
> > +static bool cow_user_page(struct page *dst, struct page *src,
> > +			  struct vm_fault *vmf)
> >  {
> >  	bool ret;
> >  	void *kaddr;
> >   
> 
> With this applied to ipipe-4.19 and 5.4 queues, related issues are
> gone, see
> 
> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
> 
> I'm inclined to take this for now so that we can move forward with
> other topics. Any comments?

Did not yet test myself, but that looks like bringing back the patch
that my bisect found. So i assume it will work, and that code is well
known and tested.

So i would say, go ahead. Will allow me to release for my customer.

Thanks,
Henning

> Jan
> 



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-06 13:14 ` Jan Kiszka
  2021-03-06 13:33   ` Henning Schild
@ 2021-03-06 14:58   ` Philippe Gerum
  2021-03-07 12:46     ` Jan Kiszka
  2021-03-15 10:00     ` Jan Kiszka
  1 sibling, 2 replies; 10+ messages in thread
From: Philippe Gerum @ 2021-03-06 14:58 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai, Greg Gallagher


Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>> 
>> This is still needed to avoid that a real-time parent seems minor faults
>> after forking for shared pages until they are finally unshared.
>> 
>> This missing support became visible in Xenomai when running the complete
>> smokey test suite on certain architectures/config combinations.
>> 
>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>> 
>> For discussion. If there is a better pattern in reach, I'm happy to go 
>> that path as well.
>> 
>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
>> in sync with noarch, you will need manual merging). Possibly, it already 
>> applies to 5.4 as well, didn't check yet.
>> 
>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 69 insertions(+), 7 deletions(-)
>> 
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 4cc7c72a0b7e..f102f4de2213 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>>  
>>  unsigned long highest_memmap_pfn __read_mostly;
>>  
>> +static bool cow_user_page(struct page *dst, struct page *src,
>> +			  struct vm_fault *vmf);
>> +
>>  /*
>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>>   */
>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  
>>  static inline unsigned long
>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>> -		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>> -		unsigned long addr, int *rss)
>> +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>> +	     unsigned long addr, int *rss, pmd_t *src_pmd,
>> +	     struct page *uncow_page)
>>  {
>>  	unsigned long vm_flags = vma->vm_flags;
>>  	pte_t pte = *src_pte;
>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>  	 * in the parent and the child
>>  	 */
>>  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>> +#ifdef CONFIG_IPIPE
>> +		if (uncow_page) {
>> +			struct page *old_page = vm_normal_page(vma, addr, pte);
>> +			struct vm_fault vmf;
>> +
>> +			vmf.vma = vma;
>> +			vmf.address = addr;
>> +			vmf.orig_pte = pte;
>> +			vmf.pmd = src_pmd;
>> +
>> +			if (cow_user_page(uncow_page, old_page, &vmf)) {
>> +				pte = mk_pte(uncow_page, vma->vm_page_prot);
>> +
>> +				if (vm_flags & VM_SHARED)
>> +					pte = pte_mkclean(pte);
>> +				pte = pte_mkold(pte);
>> +
>> +				page_add_new_anon_rmap(uncow_page, vma, addr,

>> +						       false);
>> +				rss[!!PageAnon(uncow_page)]++;
>> +				goto out_set_pte;
>> +			} else {
>> +				/* unexpected: source page no longer present */
>> +				WARN_ON_ONCE(1);
>> +			}
>> +		}
>> +#endif /* CONFIG_IPIPE */
>>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>>  		pte = pte_wrprotect(pte);
>>  	}
>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>  	int progress = 0;
>>  	int rss[NR_MM_COUNTERS];
>>  	swp_entry_t entry = (swp_entry_t){0};
>> -
>> +	struct page *uncow_page = NULL;
>> +#ifdef CONFIG_IPIPE
>> +	int do_cow_break = 0;
>> +again:
>> +	if (do_cow_break) {
>> +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>> +		if (uncow_page == NULL)
>> +			return -ENOMEM;
>> +		do_cow_break = 0;
>> +	}
>> +#else
>>  again:
>> +#endif
>>  	init_rss_vec(rss);
>>  
>>  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>> -	if (!dst_pte)
>> +	if (!dst_pte) {
>> +		if (uncow_page)
>> +			put_page(uncow_page);
>>  		return -ENOMEM;
>> +	}
>>  	src_pte = pte_offset_map(src_pmd, addr);
>>  	src_ptl = pte_lockptr(src_mm, src_pmd);
>>  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>  			progress++;
>>  			continue;
>>  		}
>> +#ifdef CONFIG_IPIPE
>> +		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
>> +			if (is_cow_mapping(vma->vm_flags) &&
>> +			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>> +			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
>> +				arch_leave_lazy_mmu_mode();
>> +				spin_unlock(src_ptl);
>> +				pte_unmap(src_pte);
>> +				add_mm_rss_vec(dst_mm, rss);
>> +				pte_unmap_unlock(dst_pte, dst_ptl);
>> +				cond_resched();
>> +				do_cow_break = 1;
>> +				goto again;
>> +			}
>> +		}
>> +#endif
>>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>> -							vma, addr, rss);
>> +					 vma, addr, rss, src_pmd, uncow_page);
>> +		uncow_page = NULL;
>>  		if (entry.val)
>>  			break;
>>  		progress += 8;
>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>>  	return same;
>>  }
>>  
>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>> -				 struct vm_fault *vmf)
>> +static bool cow_user_page(struct page *dst, struct page *src,
>> +			  struct vm_fault *vmf)
>>  {
>>  	bool ret;
>>  	void *kaddr;
>> 
>
> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see
>
> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>
> I'm inclined to take this for now so that we can move forward with other
> topics. Any comments?
>

Fine with me. This code can be simplified, working on it. I'll channel
this through Dovetail.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-06 14:58   ` Philippe Gerum
@ 2021-03-07 12:46     ` Jan Kiszka
  2021-03-07 13:09       ` Greg Gallagher
  2021-03-15 10:00     ` Jan Kiszka
  1 sibling, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2021-03-07 12:46 UTC (permalink / raw)
  To: Philippe Gerum, Greg Gallagher; +Cc: Xenomai

On 06.03.21 15:58, Philippe Gerum wrote:
> 
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This is still needed to avoid that a real-time parent seems minor faults
>>> after forking for shared pages until they are finally unshared.
>>>
>>> This missing support became visible in Xenomai when running the complete
>>> smokey test suite on certain architectures/config combinations.
>>>
>>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>
>>> For discussion. If there is a better pattern in reach, I'm happy to go 
>>> that path as well.
>>>
>>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
>>> in sync with noarch, you will need manual merging). Possibly, it already 
>>> applies to 5.4 as well, didn't check yet.
>>>
>>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>>  1 file changed, 69 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 4cc7c72a0b7e..f102f4de2213 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>>>  
>>>  unsigned long highest_memmap_pfn __read_mostly;
>>>  
>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>> +			  struct vm_fault *vmf);
>>> +
>>>  /*
>>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>>>   */
>>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  
>>>  static inline unsigned long
>>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>> -		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>> -		unsigned long addr, int *rss)
>>> +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>> +	     unsigned long addr, int *rss, pmd_t *src_pmd,
>>> +	     struct page *uncow_page)
>>>  {
>>>  	unsigned long vm_flags = vma->vm_flags;
>>>  	pte_t pte = *src_pte;
>>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  	 * in the parent and the child
>>>  	 */
>>>  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>>> +#ifdef CONFIG_IPIPE
>>> +		if (uncow_page) {
>>> +			struct page *old_page = vm_normal_page(vma, addr, pte);
>>> +			struct vm_fault vmf;
>>> +
>>> +			vmf.vma = vma;
>>> +			vmf.address = addr;
>>> +			vmf.orig_pte = pte;
>>> +			vmf.pmd = src_pmd;
>>> +
>>> +			if (cow_user_page(uncow_page, old_page, &vmf)) {
>>> +				pte = mk_pte(uncow_page, vma->vm_page_prot);
>>> +
>>> +				if (vm_flags & VM_SHARED)
>>> +					pte = pte_mkclean(pte);
>>> +				pte = pte_mkold(pte);
>>> +
>>> +				page_add_new_anon_rmap(uncow_page, vma, addr,
> 
>>> +						       false);
>>> +				rss[!!PageAnon(uncow_page)]++;
>>> +				goto out_set_pte;
>>> +			} else {
>>> +				/* unexpected: source page no longer present */
>>> +				WARN_ON_ONCE(1);
>>> +			}
>>> +		}
>>> +#endif /* CONFIG_IPIPE */
>>>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>>>  		pte = pte_wrprotect(pte);
>>>  	}
>>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  	int progress = 0;
>>>  	int rss[NR_MM_COUNTERS];
>>>  	swp_entry_t entry = (swp_entry_t){0};
>>> -
>>> +	struct page *uncow_page = NULL;
>>> +#ifdef CONFIG_IPIPE
>>> +	int do_cow_break = 0;
>>> +again:
>>> +	if (do_cow_break) {
>>> +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>>> +		if (uncow_page == NULL)
>>> +			return -ENOMEM;
>>> +		do_cow_break = 0;
>>> +	}
>>> +#else
>>>  again:
>>> +#endif
>>>  	init_rss_vec(rss);
>>>  
>>>  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>>> -	if (!dst_pte)
>>> +	if (!dst_pte) {
>>> +		if (uncow_page)
>>> +			put_page(uncow_page);
>>>  		return -ENOMEM;
>>> +	}
>>>  	src_pte = pte_offset_map(src_pmd, addr);
>>>  	src_ptl = pte_lockptr(src_mm, src_pmd);
>>>  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  			progress++;
>>>  			continue;
>>>  		}
>>> +#ifdef CONFIG_IPIPE
>>> +		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
>>> +			if (is_cow_mapping(vma->vm_flags) &&
>>> +			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>>> +			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
>>> +				arch_leave_lazy_mmu_mode();
>>> +				spin_unlock(src_ptl);
>>> +				pte_unmap(src_pte);
>>> +				add_mm_rss_vec(dst_mm, rss);
>>> +				pte_unmap_unlock(dst_pte, dst_ptl);
>>> +				cond_resched();
>>> +				do_cow_break = 1;
>>> +				goto again;
>>> +			}
>>> +		}
>>> +#endif
>>>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>>> -							vma, addr, rss);
>>> +					 vma, addr, rss, src_pmd, uncow_page);
>>> +		uncow_page = NULL;
>>>  		if (entry.val)
>>>  			break;
>>>  		progress += 8;
>>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>>>  	return same;
>>>  }
>>>  
>>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>>> -				 struct vm_fault *vmf)
>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>> +			  struct vm_fault *vmf)
>>>  {
>>>  	bool ret;
>>>  	void *kaddr;
>>>
>>
>> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see
>>
>> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>>
>> I'm inclined to take this for now so that we can move forward with other
>> topics. Any comments?
>>
> 
> Fine with me. This code can be simplified, working on it. I'll channel
> this through Dovetail.
> 

OK, applied.

Greg, you can find updated no-arch branches for 4.19 and 5.4.

Jan

-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-07 12:46     ` Jan Kiszka
@ 2021-03-07 13:09       ` Greg Gallagher
  2021-03-08  7:02         ` Greg Gallagher
  0 siblings, 1 reply; 10+ messages in thread
From: Greg Gallagher @ 2021-03-07 13:09 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Philippe Gerum, Xenomai

On Sun, Mar 7, 2021 at 7:56 AM Jan Kiszka <jan.kiszka@siemens.com> wrote:

> On 06.03.21 15:58, Philippe Gerum wrote:
> >
> > Jan Kiszka <jan.kiszka@siemens.com> writes:
> >
> >> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
> >>> From: Jan Kiszka <jan.kiszka@siemens.com>
> >>>
> >>> This is still needed to avoid that a real-time parent seems minor
> faults
> >>> after forking for shared pages until they are finally unshared.
> >>>
> >>> This missing support became visible in Xenomai when running the
> complete
> >>> smokey test suite on certain architectures/config combinations.
> >>>
> >>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
> >>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >>> ---
> >>>
> >>> For discussion. If there is a better pattern in reach, I'm happy to go
> >>> that path as well.
> >>>
> >>> Otherwise, this should go into our 4.19 branches (Greg, your arm64
> isn't
> >>> in sync with noarch, you will need manual merging). Possibly, it
> already
> >>> applies to 5.4 as well, didn't check yet.
> >>>
> >>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
> >>>  1 file changed, 69 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/mm/memory.c b/mm/memory.c
> >>> index 4cc7c72a0b7e..f102f4de2213 100644
> >>> --- a/mm/memory.c
> >>> +++ b/mm/memory.c
> >>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
> >>>
> >>>  unsigned long highest_memmap_pfn __read_mostly;
> >>>
> >>> +static bool cow_user_page(struct page *dst, struct page *src,
> >>> +                     struct vm_fault *vmf);
> >>> +
> >>>  /*
> >>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
> >>>   */
> >>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct
> vm_area_struct *vma, unsigned long addr,
> >>>
> >>>  static inline unsigned long
> >>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> >>> -           pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
> >>> -           unsigned long addr, int *rss)
> >>> +        pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
> >>> +        unsigned long addr, int *rss, pmd_t *src_pmd,
> >>> +        struct page *uncow_page)
> >>>  {
> >>>     unsigned long vm_flags = vma->vm_flags;
> >>>     pte_t pte = *src_pte;
> >>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct
> mm_struct *src_mm,
> >>>      * in the parent and the child
> >>>      */
> >>>     if (is_cow_mapping(vm_flags) && pte_write(pte)) {
> >>> +#ifdef CONFIG_IPIPE
> >>> +           if (uncow_page) {
> >>> +                   struct page *old_page = vm_normal_page(vma, addr,
> pte);
> >>> +                   struct vm_fault vmf;
> >>> +
> >>> +                   vmf.vma = vma;
> >>> +                   vmf.address = addr;
> >>> +                   vmf.orig_pte = pte;
> >>> +                   vmf.pmd = src_pmd;
> >>> +
> >>> +                   if (cow_user_page(uncow_page, old_page, &vmf)) {
> >>> +                           pte = mk_pte(uncow_page,
> vma->vm_page_prot);
> >>> +
> >>> +                           if (vm_flags & VM_SHARED)
> >>> +                                   pte = pte_mkclean(pte);
> >>> +                           pte = pte_mkold(pte);
> >>> +
> >>> +                           page_add_new_anon_rmap(uncow_page, vma,
> addr,
> >
> >>> +                                                  false);
> >>> +                           rss[!!PageAnon(uncow_page)]++;
> >>> +                           goto out_set_pte;
> >>> +                   } else {
> >>> +                           /* unexpected: source page no longer
> present */
> >>> +                           WARN_ON_ONCE(1);
> >>> +                   }
> >>> +           }
> >>> +#endif /* CONFIG_IPIPE */
> >>>             ptep_set_wrprotect(src_mm, addr, src_pte);
> >>>             pte = pte_wrprotect(pte);
> >>>     }
> >>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct
> *dst_mm, struct mm_struct *src_mm,
> >>>     int progress = 0;
> >>>     int rss[NR_MM_COUNTERS];
> >>>     swp_entry_t entry = (swp_entry_t){0};
> >>> -
> >>> +   struct page *uncow_page = NULL;
> >>> +#ifdef CONFIG_IPIPE
> >>> +   int do_cow_break = 0;
> >>> +again:
> >>> +   if (do_cow_break) {
> >>> +           uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
> >>> +           if (uncow_page == NULL)
> >>> +                   return -ENOMEM;
> >>> +           do_cow_break = 0;
> >>> +   }
> >>> +#else
> >>>  again:
> >>> +#endif
> >>>     init_rss_vec(rss);
> >>>
> >>>     dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
> >>> -   if (!dst_pte)
> >>> +   if (!dst_pte) {
> >>> +           if (uncow_page)
> >>> +                   put_page(uncow_page);
> >>>             return -ENOMEM;
> >>> +   }
> >>>     src_pte = pte_offset_map(src_pmd, addr);
> >>>     src_ptl = pte_lockptr(src_mm, src_pmd);
> >>>     spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
> >>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct
> *dst_mm, struct mm_struct *src_mm,
> >>>                     progress++;
> >>>                     continue;
> >>>             }
> >>> +#ifdef CONFIG_IPIPE
> >>> +           if (likely(uncow_page == NULL) &&
> likely(pte_present(*src_pte))) {
> >>> +                   if (is_cow_mapping(vma->vm_flags) &&
> >>> +                       test_bit(MMF_VM_PINNED, &src_mm->flags) &&
> >>> +                       ((vma->vm_flags|src_mm->def_flags) &
> VM_LOCKED)) {
> >>> +                           arch_leave_lazy_mmu_mode();
> >>> +                           spin_unlock(src_ptl);
> >>> +                           pte_unmap(src_pte);
> >>> +                           add_mm_rss_vec(dst_mm, rss);
> >>> +                           pte_unmap_unlock(dst_pte, dst_ptl);
> >>> +                           cond_resched();
> >>> +                           do_cow_break = 1;
> >>> +                           goto again;
> >>> +                   }
> >>> +           }
> >>> +#endif
> >>>             entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
> >>> -                                                   vma, addr, rss);
> >>> +                                    vma, addr, rss, src_pmd,
> uncow_page);
> >>> +           uncow_page = NULL;
> >>>             if (entry.val)
> >>>                     break;
> >>>             progress += 8;
> >>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct
> mm_struct *mm, pmd_t *pmd,
> >>>     return same;
> >>>  }
> >>>
> >>> -static inline bool cow_user_page(struct page *dst, struct page *src,
> >>> -                            struct vm_fault *vmf)
> >>> +static bool cow_user_page(struct page *dst, struct page *src,
> >>> +                     struct vm_fault *vmf)
> >>>  {
> >>>     bool ret;
> >>>     void *kaddr;
> >>>
> >>
> >> With this applied to ipipe-4.19 and 5.4 queues, related issues are
> gone, see
> >>
> >> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
> >>
> >> I'm inclined to take this for now so that we can move forward with other
> >> topics. Any comments?
> >>
> >
> > Fine with me. This code can be simplified, working on it. I'll channel
> > this through Dovetail.
> >
>
> OK, applied.
>
> Greg, you can find updated no-arch branches for 4.19 and 5.4.


Ack


>
> Jan
>
> --
> Siemens AG, T RDA IOT
> Corporate Competence Center Embedded Linux




>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-07 13:09       ` Greg Gallagher
@ 2021-03-08  7:02         ` Greg Gallagher
  2021-03-08  7:32           ` Jan Kiszka
  0 siblings, 1 reply; 10+ messages in thread
From: Greg Gallagher @ 2021-03-08  7:02 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Philippe Gerum, Xenomai

On Sun, Mar 7, 2021 at 8:09 AM Greg Gallagher <greg@embeddedgreg.com> wrote:

>
>
> On Sun, Mar 7, 2021 at 7:56 AM Jan Kiszka <jan.kiszka@siemens.com> wrote:
>
>> On 06.03.21 15:58, Philippe Gerum wrote:
>> >
>> > Jan Kiszka <jan.kiszka@siemens.com> writes:
>> >
>> >> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>> >>> From: Jan Kiszka <jan.kiszka@siemens.com>
>> >>>
>> >>> This is still needed to avoid that a real-time parent seems minor
>> faults
>> >>> after forking for shared pages until they are finally unshared.
>> >>>
>> >>> This missing support became visible in Xenomai when running the
>> complete
>> >>> smokey test suite on certain architectures/config combinations.
>> >>>
>> >>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>> >>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> >>> ---
>> >>>
>> >>> For discussion. If there is a better pattern in reach, I'm happy to
>> go
>> >>> that path as well.
>> >>>
>> >>> Otherwise, this should go into our 4.19 branches (Greg, your arm64
>> isn't
>> >>> in sync with noarch, you will need manual merging). Possibly, it
>> already
>> >>> applies to 5.4 as well, didn't check yet.
>> >>>
>> >>>  mm/memory.c | 76
>> ++++++++++++++++++++++++++++++++++++++++++++++++-----
>> >>>  1 file changed, 69 insertions(+), 7 deletions(-)
>> >>>
>> >>> diff --git a/mm/memory.c b/mm/memory.c
>> >>> index 4cc7c72a0b7e..f102f4de2213 100644
>> >>> --- a/mm/memory.c
>> >>> +++ b/mm/memory.c
>> >>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>> >>>
>> >>>  unsigned long highest_memmap_pfn __read_mostly;
>> >>>
>> >>> +static bool cow_user_page(struct page *dst, struct page *src,
>> >>> +                     struct vm_fault *vmf);
>> >>> +
>> >>>  /*
>> >>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>> >>>   */
>> >>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct
>> vm_area_struct *vma, unsigned long addr,
>> >>>
>> >>>  static inline unsigned long
>> >>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>> >>> -           pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct
>> *vma,
>> >>> -           unsigned long addr, int *rss)
>> >>> +        pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>> >>> +        unsigned long addr, int *rss, pmd_t *src_pmd,
>> >>> +        struct page *uncow_page)
>> >>>  {
>> >>>     unsigned long vm_flags = vma->vm_flags;
>> >>>     pte_t pte = *src_pte;
>> >>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct
>> mm_struct *src_mm,
>> >>>      * in the parent and the child
>> >>>      */
>> >>>     if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>> >>> +#ifdef CONFIG_IPIPE
>> >>> +           if (uncow_page) {
>> >>> +                   struct page *old_page = vm_normal_page(vma, addr,
>> pte);
>> >>> +                   struct vm_fault vmf;
>> >>> +
>> >>> +                   vmf.vma = vma;
>> >>> +                   vmf.address = addr;
>> >>> +                   vmf.orig_pte = pte;
>> >>> +                   vmf.pmd = src_pmd;
>> >>> +
>> >>> +                   if (cow_user_page(uncow_page, old_page, &vmf)) {
>> >>> +                           pte = mk_pte(uncow_page,
>> vma->vm_page_prot);
>> >>> +
>> >>> +                           if (vm_flags & VM_SHARED)
>> >>> +                                   pte = pte_mkclean(pte);
>> >>> +                           pte = pte_mkold(pte);
>> >>> +
>> >>> +                           page_add_new_anon_rmap(uncow_page, vma,
>> addr,
>> >
>> >>> +                                                  false);
>> >>> +                           rss[!!PageAnon(uncow_page)]++;
>> >>> +                           goto out_set_pte;
>> >>> +                   } else {
>> >>> +                           /* unexpected: source page no longer
>> present */
>> >>> +                           WARN_ON_ONCE(1);
>> >>> +                   }
>> >>> +           }
>> >>> +#endif /* CONFIG_IPIPE */
>> >>>             ptep_set_wrprotect(src_mm, addr, src_pte);
>> >>>             pte = pte_wrprotect(pte);
>> >>>     }
>> >>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct
>> *dst_mm, struct mm_struct *src_mm,
>> >>>     int progress = 0;
>> >>>     int rss[NR_MM_COUNTERS];
>> >>>     swp_entry_t entry = (swp_entry_t){0};
>> >>> -
>> >>> +   struct page *uncow_page = NULL;
>> >>> +#ifdef CONFIG_IPIPE
>> >>> +   int do_cow_break = 0;
>> >>> +again:
>> >>> +   if (do_cow_break) {
>> >>> +           uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>> >>> +           if (uncow_page == NULL)
>> >>> +                   return -ENOMEM;
>> >>> +           do_cow_break = 0;
>> >>> +   }
>> >>> +#else
>> >>>  again:
>> >>> +#endif
>> >>>     init_rss_vec(rss);
>> >>>
>> >>>     dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>> >>> -   if (!dst_pte)
>> >>> +   if (!dst_pte) {
>> >>> +           if (uncow_page)
>> >>> +                   put_page(uncow_page);
>> >>>             return -ENOMEM;
>> >>> +   }
>> >>>     src_pte = pte_offset_map(src_pmd, addr);
>> >>>     src_ptl = pte_lockptr(src_mm, src_pmd);
>> >>>     spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>> >>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct
>> *dst_mm, struct mm_struct *src_mm,
>> >>>                     progress++;
>> >>>                     continue;
>> >>>             }
>> >>> +#ifdef CONFIG_IPIPE
>> >>> +           if (likely(uncow_page == NULL) &&
>> likely(pte_present(*src_pte))) {
>> >>> +                   if (is_cow_mapping(vma->vm_flags) &&
>> >>> +                       test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>> >>> +                       ((vma->vm_flags|src_mm->def_flags) &
>> VM_LOCKED)) {
>> >>> +                           arch_leave_lazy_mmu_mode();
>> >>> +                           spin_unlock(src_ptl);
>> >>> +                           pte_unmap(src_pte);
>> >>> +                           add_mm_rss_vec(dst_mm, rss);
>> >>> +                           pte_unmap_unlock(dst_pte, dst_ptl);
>> >>> +                           cond_resched();
>> >>> +                           do_cow_break = 1;
>> >>> +                           goto again;
>> >>> +                   }
>> >>> +           }
>> >>> +#endif
>> >>>             entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>> >>> -                                                   vma, addr, rss);
>> >>> +                                    vma, addr, rss, src_pmd,
>> uncow_page);
>> >>> +           uncow_page = NULL;
>> >>>             if (entry.val)
>> >>>                     break;
>> >>>             progress += 8;
>> >>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct
>> mm_struct *mm, pmd_t *pmd,
>> >>>     return same;
>> >>>  }
>> >>>
>> >>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>> >>> -                            struct vm_fault *vmf)
>> >>> +static bool cow_user_page(struct page *dst, struct page *src,
>> >>> +                     struct vm_fault *vmf)
>> >>>  {
>> >>>     bool ret;
>> >>>     void *kaddr;
>> >>>
>> >>
>> >> With this applied to ipipe-4.19 and 5.4 queues, related issues are
>> gone, see
>> >>
>> >> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>> >>
>> >> I'm inclined to take this for now so that we can move forward with
>> other
>> >> topics. Any comments?
>> >>
>> >
>> > Fine with me. This code can be simplified, working on it. I'll channel
>> > this through Dovetail.
>> >
>>
>> OK, applied.
>>
>> Greg, you can find updated no-arch branches for 4.19 and 5.4.
>
>
>> Did this commit go into the no-arch tree? I'll have new arm/arm64 5.4 and
4.19 releases this week (aiming for Wednesday).

Thanks

Greg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-08  7:02         ` Greg Gallagher
@ 2021-03-08  7:32           ` Jan Kiszka
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2021-03-08  7:32 UTC (permalink / raw)
  To: Greg Gallagher; +Cc: Philippe Gerum, Xenomai

On 08.03.21 08:02, Greg Gallagher wrote:
> 
> 
> On Sun, Mar 7, 2021 at 8:09 AM Greg Gallagher <greg@embeddedgreg.com
> <mailto:greg@embeddedgreg.com>> wrote:
> 
> 
> 
>     On Sun, Mar 7, 2021 at 7:56 AM Jan Kiszka <jan.kiszka@siemens.com
>     <mailto:jan.kiszka@siemens.com>> wrote:
> 
>         On 06.03.21 15:58, Philippe Gerum wrote:
>         >
>         > Jan Kiszka <jan.kiszka@siemens.com
>         <mailto:jan.kiszka@siemens.com>> writes:
>         >
>         >> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>         >>> From: Jan Kiszka <jan.kiszka@siemens.com
>         <mailto:jan.kiszka@siemens.com>>
>         >>>
>         >>> This is still needed to avoid that a real-time parent seems
>         minor faults
>         >>> after forking for shared pages until they are finally unshared.
>         >>>
>         >>> This missing support became visible in Xenomai when running
>         the complete
>         >>> smokey test suite on certain architectures/config combinations.
>         >>>
>         >>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from
>         copy_pte_range")
>         >>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com
>         <mailto:jan.kiszka@siemens.com>>
>         >>> ---
>         >>>
>         >>> For discussion. If there is a better pattern in reach, I'm
>         happy to go
>         >>> that path as well.
>         >>>
>         >>> Otherwise, this should go into our 4.19 branches (Greg, your
>         arm64 isn't
>         >>> in sync with noarch, you will need manual merging).
>         Possibly, it already
>         >>> applies to 5.4 as well, didn't check yet.
>         >>>
>         >>>  mm/memory.c | 76
>         ++++++++++++++++++++++++++++++++++++++++++++++++-----
>         >>>  1 file changed, 69 insertions(+), 7 deletions(-)
>         >>>
>         >>> diff --git a/mm/memory.c b/mm/memory.c
>         >>> index 4cc7c72a0b7e..f102f4de2213 100644
>         >>> --- a/mm/memory.c
>         >>> +++ b/mm/memory.c
>         >>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>         >>> 
>         >>>  unsigned long highest_memmap_pfn __read_mostly;
>         >>> 
>         >>> +static bool cow_user_page(struct page *dst, struct page *src,
>         >>> +                     struct vm_fault *vmf);
>         >>> +
>         >>>  /*
>         >>>   * CONFIG_MMU architectures set up ZERO_PAGE in their
>         paging_init()
>         >>>   */
>         >>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct
>         vm_area_struct *vma, unsigned long addr,
>         >>> 
>         >>>  static inline unsigned long
>         >>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct
>         *src_mm,
>         >>> -           pte_t *dst_pte, pte_t *src_pte, struct
>         vm_area_struct *vma,
>         >>> -           unsigned long addr, int *rss)
>         >>> +        pte_t *dst_pte, pte_t *src_pte, struct
>         vm_area_struct *vma,
>         >>> +        unsigned long addr, int *rss, pmd_t *src_pmd,
>         >>> +        struct page *uncow_page)
>         >>>  {
>         >>>     unsigned long vm_flags = vma->vm_flags;
>         >>>     pte_t pte = *src_pte;
>         >>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct
>         *dst_mm, struct mm_struct *src_mm,
>         >>>      * in the parent and the child
>         >>>      */
>         >>>     if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>         >>> +#ifdef CONFIG_IPIPE
>         >>> +           if (uncow_page) {
>         >>> +                   struct page *old_page =
>         vm_normal_page(vma, addr, pte);
>         >>> +                   struct vm_fault vmf;
>         >>> +
>         >>> +                   vmf.vma = vma;
>         >>> +                   vmf.address = addr;
>         >>> +                   vmf.orig_pte = pte;
>         >>> +                   vmf.pmd = src_pmd;
>         >>> +
>         >>> +                   if (cow_user_page(uncow_page, old_page,
>         &vmf)) {
>         >>> +                           pte = mk_pte(uncow_page,
>         vma->vm_page_prot);
>         >>> +
>         >>> +                           if (vm_flags & VM_SHARED)
>         >>> +                                   pte = pte_mkclean(pte);
>         >>> +                           pte = pte_mkold(pte);
>         >>> +
>         >>> +                         
>          page_add_new_anon_rmap(uncow_page, vma, addr,
>         >
>         >>> +                                                  false);
>         >>> +                           rss[!!PageAnon(uncow_page)]++;
>         >>> +                           goto out_set_pte;
>         >>> +                   } else {
>         >>> +                           /* unexpected: source page no
>         longer present */
>         >>> +                           WARN_ON_ONCE(1);
>         >>> +                   }
>         >>> +           }
>         >>> +#endif /* CONFIG_IPIPE */
>         >>>             ptep_set_wrprotect(src_mm, addr, src_pte);
>         >>>             pte = pte_wrprotect(pte);
>         >>>     }
>         >>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct
>         mm_struct *dst_mm, struct mm_struct *src_mm,
>         >>>     int progress = 0;
>         >>>     int rss[NR_MM_COUNTERS];
>         >>>     swp_entry_t entry = (swp_entry_t){0};
>         >>> -
>         >>> +   struct page *uncow_page = NULL;
>         >>> +#ifdef CONFIG_IPIPE
>         >>> +   int do_cow_break = 0;
>         >>> +again:
>         >>> +   if (do_cow_break) {
>         >>> +           uncow_page = alloc_page_vma(GFP_HIGHUSER, vma,
>         addr);
>         >>> +           if (uncow_page == NULL)
>         >>> +                   return -ENOMEM;
>         >>> +           do_cow_break = 0;
>         >>> +   }
>         >>> +#else
>         >>>  again:
>         >>> +#endif
>         >>>     init_rss_vec(rss);
>         >>> 
>         >>>     dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr,
>         &dst_ptl);
>         >>> -   if (!dst_pte)
>         >>> +   if (!dst_pte) {
>         >>> +           if (uncow_page)
>         >>> +                   put_page(uncow_page);
>         >>>             return -ENOMEM;
>         >>> +   }
>         >>>     src_pte = pte_offset_map(src_pmd, addr);
>         >>>     src_ptl = pte_lockptr(src_mm, src_pmd);
>         >>>     spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>         >>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct
>         mm_struct *dst_mm, struct mm_struct *src_mm,
>         >>>                     progress++;
>         >>>                     continue;
>         >>>             }
>         >>> +#ifdef CONFIG_IPIPE
>         >>> +           if (likely(uncow_page == NULL) &&
>         likely(pte_present(*src_pte))) {
>         >>> +                   if (is_cow_mapping(vma->vm_flags) &&
>         >>> +                       test_bit(MMF_VM_PINNED,
>         &src_mm->flags) &&
>         >>> +                       ((vma->vm_flags|src_mm->def_flags) &
>         VM_LOCKED)) {
>         >>> +                           arch_leave_lazy_mmu_mode();
>         >>> +                           spin_unlock(src_ptl);
>         >>> +                           pte_unmap(src_pte);
>         >>> +                           add_mm_rss_vec(dst_mm, rss);
>         >>> +                           pte_unmap_unlock(dst_pte, dst_ptl);
>         >>> +                           cond_resched();
>         >>> +                           do_cow_break = 1;
>         >>> +                           goto again;
>         >>> +                   }
>         >>> +           }
>         >>> +#endif
>         >>>             entry.val = copy_one_pte(dst_mm, src_mm,
>         dst_pte, src_pte,
>         >>> -                                                   vma,
>         addr, rss);
>         >>> +                                    vma, addr, rss,
>         src_pmd, uncow_page);
>         >>> +           uncow_page = NULL;
>         >>>             if (entry.val)
>         >>>                     break;
>         >>>             progress += 8;
>         >>> @@ -2348,8 +2410,8 @@ static inline int
>         pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>         >>>     return same;
>         >>>  }
>         >>> 
>         >>> -static inline bool cow_user_page(struct page *dst, struct
>         page *src,
>         >>> -                            struct vm_fault *vmf)
>         >>> +static bool cow_user_page(struct page *dst, struct page *src,
>         >>> +                     struct vm_fault *vmf)
>         >>>  {
>         >>>     bool ret;
>         >>>     void *kaddr;
>         >>>
>         >>
>         >> With this applied to ipipe-4.19 and 5.4 queues, related
>         issues are gone, see
>         >>
>         >>
>         https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>         <https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670>
>         >>
>         >> I'm inclined to take this for now so that we can move forward
>         with other
>         >> topics. Any comments?
>         >>
>         >
>         > Fine with me. This code can be simplified, working on it. I'll
>         channel
>         > this through Dovetail.
>         >
> 
>         OK, applied.
> 
>         Greg, you can find updated no-arch branches for 4.19 and 5.4.
> 
> 
> Did this commit go into the no-arch tree? I'll have new arm/arm64 5.4
> and 4.19 releases this week (aiming for Wednesday).
> 

Yes, but not in the form sent to the list for forward-merging trees
(like x86). The noarch trees contains the fix folded into the
introducing commits, rebased over the respective stable branches.

Jan

-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-06 14:58   ` Philippe Gerum
  2021-03-07 12:46     ` Jan Kiszka
@ 2021-03-15 10:00     ` Jan Kiszka
  2021-03-15 12:08       ` Philippe Gerum
  1 sibling, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2021-03-15 10:00 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai, Greg Gallagher

On 06.03.21 15:58, Philippe Gerum wrote:
> 
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This is still needed to avoid that a real-time parent seems minor faults
>>> after forking for shared pages until they are finally unshared.
>>>
>>> This missing support became visible in Xenomai when running the complete
>>> smokey test suite on certain architectures/config combinations.
>>>
>>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>
>>> For discussion. If there is a better pattern in reach, I'm happy to go 
>>> that path as well.
>>>
>>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
>>> in sync with noarch, you will need manual merging). Possibly, it already 
>>> applies to 5.4 as well, didn't check yet.
>>>
>>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>>  1 file changed, 69 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 4cc7c72a0b7e..f102f4de2213 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>>>  
>>>  unsigned long highest_memmap_pfn __read_mostly;
>>>  
>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>> +			  struct vm_fault *vmf);
>>> +
>>>  /*
>>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>>>   */
>>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  
>>>  static inline unsigned long
>>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>> -		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>> -		unsigned long addr, int *rss)
>>> +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>> +	     unsigned long addr, int *rss, pmd_t *src_pmd,
>>> +	     struct page *uncow_page)
>>>  {
>>>  	unsigned long vm_flags = vma->vm_flags;
>>>  	pte_t pte = *src_pte;
>>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  	 * in the parent and the child
>>>  	 */
>>>  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>>> +#ifdef CONFIG_IPIPE
>>> +		if (uncow_page) {
>>> +			struct page *old_page = vm_normal_page(vma, addr, pte);
>>> +			struct vm_fault vmf;
>>> +
>>> +			vmf.vma = vma;
>>> +			vmf.address = addr;
>>> +			vmf.orig_pte = pte;
>>> +			vmf.pmd = src_pmd;
>>> +
>>> +			if (cow_user_page(uncow_page, old_page, &vmf)) {
>>> +				pte = mk_pte(uncow_page, vma->vm_page_prot);
>>> +
>>> +				if (vm_flags & VM_SHARED)
>>> +					pte = pte_mkclean(pte);
>>> +				pte = pte_mkold(pte);
>>> +
>>> +				page_add_new_anon_rmap(uncow_page, vma, addr,
> 
>>> +						       false);
>>> +				rss[!!PageAnon(uncow_page)]++;
>>> +				goto out_set_pte;
>>> +			} else {
>>> +				/* unexpected: source page no longer present */
>>> +				WARN_ON_ONCE(1);
>>> +			}
>>> +		}
>>> +#endif /* CONFIG_IPIPE */
>>>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>>>  		pte = pte_wrprotect(pte);
>>>  	}
>>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  	int progress = 0;
>>>  	int rss[NR_MM_COUNTERS];
>>>  	swp_entry_t entry = (swp_entry_t){0};
>>> -
>>> +	struct page *uncow_page = NULL;
>>> +#ifdef CONFIG_IPIPE
>>> +	int do_cow_break = 0;
>>> +again:
>>> +	if (do_cow_break) {
>>> +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>>> +		if (uncow_page == NULL)
>>> +			return -ENOMEM;
>>> +		do_cow_break = 0;
>>> +	}
>>> +#else
>>>  again:
>>> +#endif
>>>  	init_rss_vec(rss);
>>>  
>>>  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>>> -	if (!dst_pte)
>>> +	if (!dst_pte) {
>>> +		if (uncow_page)
>>> +			put_page(uncow_page);
>>>  		return -ENOMEM;
>>> +	}
>>>  	src_pte = pte_offset_map(src_pmd, addr);
>>>  	src_ptl = pte_lockptr(src_mm, src_pmd);
>>>  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>  			progress++;
>>>  			continue;
>>>  		}
>>> +#ifdef CONFIG_IPIPE
>>> +		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
>>> +			if (is_cow_mapping(vma->vm_flags) &&
>>> +			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>>> +			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
>>> +				arch_leave_lazy_mmu_mode();
>>> +				spin_unlock(src_ptl);
>>> +				pte_unmap(src_pte);
>>> +				add_mm_rss_vec(dst_mm, rss);
>>> +				pte_unmap_unlock(dst_pte, dst_ptl);
>>> +				cond_resched();
>>> +				do_cow_break = 1;
>>> +				goto again;
>>> +			}
>>> +		}
>>> +#endif
>>>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>>> -							vma, addr, rss);
>>> +					 vma, addr, rss, src_pmd, uncow_page);
>>> +		uncow_page = NULL;
>>>  		if (entry.val)
>>>  			break;
>>>  		progress += 8;
>>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>>>  	return same;
>>>  }
>>>  
>>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>>> -				 struct vm_fault *vmf)
>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>> +			  struct vm_fault *vmf)
>>>  {
>>>  	bool ret;
>>>  	void *kaddr;
>>>
>>
>> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see
>>
>> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>>
>> I'm inclined to take this for now so that we can move forward with other
>> topics. Any comments?
>>
> 
> Fine with me. This code can be simplified, working on it. I'll channel
> this through Dovetail.
> 

This didn't make into dovetail/v5.10 yet, did it? Asking because smokey
triggers tones of primary mode migrations over dovetail.

Jan

-- 
Siemens AG, T RDA IOT
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range
  2021-03-15 10:00     ` Jan Kiszka
@ 2021-03-15 12:08       ` Philippe Gerum
  0 siblings, 0 replies; 10+ messages in thread
From: Philippe Gerum @ 2021-03-15 12:08 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai, Greg Gallagher


Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 06.03.21 15:58, Philippe Gerum wrote:
>> 
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 05.03.21 12:38, Jan Kiszka via Xenomai wrote:
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> This is still needed to avoid that a real-time parent seems minor faults
>>>> after forking for shared pages until they are finally unshared.
>>>>
>>>> This missing support became visible in Xenomai when running the complete
>>>> smokey test suite on certain architectures/config combinations.
>>>>
>>>> Fixes: 0f0b6099c45f ("ipipe: mm: Drop un-COW from copy_pte_range")
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>
>>>> For discussion. If there is a better pattern in reach, I'm happy to go 
>>>> that path as well.
>>>>
>>>> Otherwise, this should go into our 4.19 branches (Greg, your arm64 isn't 
>>>> in sync with noarch, you will need manual merging). Possibly, it already 
>>>> applies to 5.4 as well, didn't check yet.
>>>>
>>>>  mm/memory.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>>>  1 file changed, 69 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index 4cc7c72a0b7e..f102f4de2213 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -141,6 +141,9 @@ EXPORT_SYMBOL(zero_pfn);
>>>>  
>>>>  unsigned long highest_memmap_pfn __read_mostly;
>>>>  
>>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>>> +			  struct vm_fault *vmf);
>>>> +
>>>>  /*
>>>>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
>>>>   */
>>>> @@ -957,8 +960,9 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>>  
>>>>  static inline unsigned long
>>>>  copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>> -		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>>> -		unsigned long addr, int *rss)
>>>> +	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
>>>> +	     unsigned long addr, int *rss, pmd_t *src_pmd,
>>>> +	     struct page *uncow_page)
>>>>  {
>>>>  	unsigned long vm_flags = vma->vm_flags;
>>>>  	pte_t pte = *src_pte;
>>>> @@ -1036,6 +1040,33 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>>  	 * in the parent and the child
>>>>  	 */
>>>>  	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
>>>> +#ifdef CONFIG_IPIPE
>>>> +		if (uncow_page) {
>>>> +			struct page *old_page = vm_normal_page(vma, addr, pte);
>>>> +			struct vm_fault vmf;
>>>> +
>>>> +			vmf.vma = vma;
>>>> +			vmf.address = addr;
>>>> +			vmf.orig_pte = pte;
>>>> +			vmf.pmd = src_pmd;
>>>> +
>>>> +			if (cow_user_page(uncow_page, old_page, &vmf)) {
>>>> +				pte = mk_pte(uncow_page, vma->vm_page_prot);
>>>> +
>>>> +				if (vm_flags & VM_SHARED)
>>>> +					pte = pte_mkclean(pte);
>>>> +				pte = pte_mkold(pte);
>>>> +
>>>> +				page_add_new_anon_rmap(uncow_page, vma, addr,
>> 
>>>> +						       false);
>>>> +				rss[!!PageAnon(uncow_page)]++;
>>>> +				goto out_set_pte;
>>>> +			} else {
>>>> +				/* unexpected: source page no longer present */
>>>> +				WARN_ON_ONCE(1);
>>>> +			}
>>>> +		}
>>>> +#endif /* CONFIG_IPIPE */
>>>>  		ptep_set_wrprotect(src_mm, addr, src_pte);
>>>>  		pte = pte_wrprotect(pte);
>>>>  	}
>>>> @@ -1083,13 +1114,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>>  	int progress = 0;
>>>>  	int rss[NR_MM_COUNTERS];
>>>>  	swp_entry_t entry = (swp_entry_t){0};
>>>> -
>>>> +	struct page *uncow_page = NULL;
>>>> +#ifdef CONFIG_IPIPE
>>>> +	int do_cow_break = 0;
>>>> +again:
>>>> +	if (do_cow_break) {
>>>> +		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
>>>> +		if (uncow_page == NULL)
>>>> +			return -ENOMEM;
>>>> +		do_cow_break = 0;
>>>> +	}
>>>> +#else
>>>>  again:
>>>> +#endif
>>>>  	init_rss_vec(rss);
>>>>  
>>>>  	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
>>>> -	if (!dst_pte)
>>>> +	if (!dst_pte) {
>>>> +		if (uncow_page)
>>>> +			put_page(uncow_page);
>>>>  		return -ENOMEM;
>>>> +	}
>>>>  	src_pte = pte_offset_map(src_pmd, addr);
>>>>  	src_ptl = pte_lockptr(src_mm, src_pmd);
>>>>  	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
>>>> @@ -1112,8 +1157,25 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>>>>  			progress++;
>>>>  			continue;
>>>>  		}
>>>> +#ifdef CONFIG_IPIPE
>>>> +		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
>>>> +			if (is_cow_mapping(vma->vm_flags) &&
>>>> +			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
>>>> +			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
>>>> +				arch_leave_lazy_mmu_mode();
>>>> +				spin_unlock(src_ptl);
>>>> +				pte_unmap(src_pte);
>>>> +				add_mm_rss_vec(dst_mm, rss);
>>>> +				pte_unmap_unlock(dst_pte, dst_ptl);
>>>> +				cond_resched();
>>>> +				do_cow_break = 1;
>>>> +				goto again;
>>>> +			}
>>>> +		}
>>>> +#endif
>>>>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>>>> -							vma, addr, rss);
>>>> +					 vma, addr, rss, src_pmd, uncow_page);
>>>> +		uncow_page = NULL;
>>>>  		if (entry.val)
>>>>  			break;
>>>>  		progress += 8;
>>>> @@ -2348,8 +2410,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
>>>>  	return same;
>>>>  }
>>>>  
>>>> -static inline bool cow_user_page(struct page *dst, struct page *src,
>>>> -				 struct vm_fault *vmf)
>>>> +static bool cow_user_page(struct page *dst, struct page *src,
>>>> +			  struct vm_fault *vmf)
>>>>  {
>>>>  	bool ret;
>>>>  	void *kaddr;
>>>>
>>>
>>> With this applied to ipipe-4.19 and 5.4 queues, related issues are gone, see
>>>
>>> https://source.denx.de/Xenomai/xenomai-images/-/pipelines/6670
>>>
>>> I'm inclined to take this for now so that we can move forward with other
>>> topics. Any comments?
>>>
>> 
>> Fine with me. This code can be simplified, working on it. I'll channel
>> this through Dovetail.
>> 
>
> This didn't make into dovetail/v5.10 yet, did it? Asking because smokey
> triggers tones of primary mode migrations over dovetail.
>
> Jan

No, it did not.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-03-15 12:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-05 11:38 [PATCH 4.19] ipipe: mm: Restore unCOW support for copy_pte_range Jan Kiszka
2021-03-06 13:14 ` Jan Kiszka
2021-03-06 13:33   ` Henning Schild
2021-03-06 14:58   ` Philippe Gerum
2021-03-07 12:46     ` Jan Kiszka
2021-03-07 13:09       ` Greg Gallagher
2021-03-08  7:02         ` Greg Gallagher
2021-03-08  7:32           ` Jan Kiszka
2021-03-15 10:00     ` Jan Kiszka
2021-03-15 12:08       ` Philippe Gerum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.