linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Jason Gunthorpe <jgg@ziepe.ca>,
	Andrew Morton <akpm@linux-foundation.org>,
	Jan Kara <jack@suse.cz>, Michal Hocko <mhocko@suse.com>,
	Kirill Tkhai <ktkhai@virtuozzo.com>,
	Kirill Shutemov <kirill@shutemov.name>,
	Hugh Dickins <hughd@google.com>, Peter Xu <peterx@redhat.com>,
	Christoph Hellwig <hch@lst.de>,
	Andrea Arcangeli <aarcange@redhat.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Oleg Nesterov <oleg@redhat.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Jann Horn <jannh@google.com>
Subject: [PATCH 3/5] mm: Rework return value for copy_one_pte()
Date: Mon, 21 Sep 2020 17:17:42 -0400	[thread overview]
Message-ID: <20200921211744.24758-4-peterx@redhat.com> (raw)
In-Reply-To: <20200921211744.24758-1-peterx@redhat.com>

There's one special path for copy_one_pte() with swap entries, in which
add_swap_count_continuation(GFP_ATOMIC) might fail.  In that case we'll return
the swp_entry_t so that the caller will release the locks and redo the same
thing with GFP_KERNEL.

It's confusing when copy_one_pte() must return a swp_entry_t (even if all the
ptes are non-swap entries).  More importantly, we face other requirement to
extend this "we need to do something else, but without the locks held" case.

Rework the return value into something easier to understand, as defined in enum
copy_mm_ret.  We'll pass the swp_entry_t back using the newly introduced union
copy_mm_data parameter.

Another trivial change is to move the reset of the "progress" counter into the
retry path, so that we'll reset it for other reasons too.

This should prepare us with adding new return codes, very soon.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 mm/memory.c | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 7525147908c4..1530bb1070f4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -689,16 +689,24 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 #endif
 
+#define  COPY_MM_DONE               0
+#define  COPY_MM_SWAP_CONT          1
+
+struct copy_mm_data {
+	/* COPY_MM_SWAP_CONT */
+	swp_entry_t entry;
+};
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
  * covered by this vma.
  */
 
-static inline unsigned long
+static inline int
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+		unsigned long addr, int *rss, struct copy_mm_data *data)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -709,8 +717,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		swp_entry_t entry = pte_to_swp_entry(pte);
 
 		if (likely(!non_swap_entry(entry))) {
-			if (swap_duplicate(entry) < 0)
-				return entry.val;
+			if (swap_duplicate(entry) < 0) {
+				data->entry = entry;
+				return COPY_MM_SWAP_CONT;
+			}
 
 			/* make sure dst_mm is on swapoff's mmlist. */
 			if (unlikely(list_empty(&dst_mm->mmlist))) {
@@ -809,7 +819,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
-	return 0;
+	return COPY_MM_DONE;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -820,9 +830,9 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_t *orig_src_pte, *orig_dst_pte;
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
-	int progress = 0;
+	int progress, copy_ret = COPY_MM_DONE;
 	int rss[NR_MM_COUNTERS];
-	swp_entry_t entry = (swp_entry_t){0};
+	struct copy_mm_data data;
 
 again:
 	init_rss_vec(rss);
@@ -837,6 +847,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	orig_dst_pte = dst_pte;
 	arch_enter_lazy_mmu_mode();
 
+	progress = 0;
 	do {
 		/*
 		 * We are holding two locks at this point - either of them
@@ -852,9 +863,9 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			progress++;
 			continue;
 		}
-		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-							vma, addr, rss);
-		if (entry.val)
+		copy_ret = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+					vma, addr, rss, &data);
+		if (copy_ret != COPY_MM_DONE)
 			break;
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
@@ -866,13 +877,18 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
 	cond_resched();
 
-	if (entry.val) {
-		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
+	switch (copy_ret) {
+	case COPY_MM_SWAP_CONT:
+		if (add_swap_count_continuation(data.entry, GFP_KERNEL) < 0)
 			return -ENOMEM;
-		progress = 0;
+		break;
+	default:
+		break;
 	}
+
 	if (addr != end)
 		goto again;
+
 	return 0;
 }
 
-- 
2.26.2



  parent reply	other threads:[~2020-09-21 21:18 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-21 21:17 [PATCH 0/5] mm: Break COW for pinned pages during fork() Peter Xu
2020-09-21 21:17 ` [PATCH 1/5] mm: Introduce mm_struct.has_pinned Peter Xu
2020-09-21 21:43   ` Jann Horn
2020-09-21 22:30     ` Peter Xu
2020-09-21 22:47       ` Jann Horn
2020-09-22 11:54         ` Jason Gunthorpe
2020-09-22 14:28           ` Peter Xu
2020-09-22 15:56             ` Jason Gunthorpe
2020-09-22 16:25               ` Linus Torvalds
2020-09-21 23:53   ` John Hubbard
2020-09-22  0:01     ` John Hubbard
2020-09-22 15:17     ` Peter Xu
2020-09-22 16:10       ` Jason Gunthorpe
2020-09-22 17:54         ` Peter Xu
2020-09-22 19:11           ` Jason Gunthorpe
2020-09-23  0:27             ` Peter Xu
2020-09-23 13:10               ` Peter Xu
2020-09-23 14:20                 ` Jan Kara
2020-09-23 17:12                   ` Jason Gunthorpe
2020-09-24  7:44                     ` Jan Kara
2020-09-24 14:02                       ` Jason Gunthorpe
2020-09-24 14:45                         ` Jan Kara
2020-09-23 17:07               ` Jason Gunthorpe
2020-09-24 14:35                 ` Peter Xu
2020-09-24 16:51                   ` Jason Gunthorpe
2020-09-24 17:55                     ` Peter Xu
2020-09-24 18:15                       ` Jason Gunthorpe
2020-09-24 18:34                         ` Peter Xu
2020-09-24 18:39                           ` Jason Gunthorpe
2020-09-24 21:30                             ` Peter Xu
2020-09-25 19:56                               ` Linus Torvalds
2020-09-25 21:06                                 ` Linus Torvalds
2020-09-26  0:41                                   ` Jason Gunthorpe
2020-09-26  1:15                                     ` Linus Torvalds
2020-09-26 22:28                                       ` Linus Torvalds
2020-09-27  6:23                                         ` Leon Romanovsky
2020-09-27 18:16                                           ` Linus Torvalds
2020-09-27 18:45                                             ` Linus Torvalds
2020-09-28 12:49                                               ` Jason Gunthorpe
2020-09-28 16:17                                                 ` Linus Torvalds
2020-09-28 17:22                                                   ` Peter Xu
2020-09-28 17:54                                                     ` Linus Torvalds
2020-09-28 18:39                                                       ` Jason Gunthorpe
2020-09-28 19:29                                                         ` Linus Torvalds
2020-09-28 23:57                                                           ` Jason Gunthorpe
2020-09-29  0:18                                                             ` John Hubbard
2020-09-28 19:36                                                         ` Linus Torvalds
2020-09-28 19:50                                                           ` Linus Torvalds
2020-09-28 22:51                                                             ` Jason Gunthorpe
2020-09-29  0:30                                                               ` Peter Xu
2020-10-08  5:49                                                             ` Leon Romanovsky
2020-09-28 17:13                                             ` Peter Xu
2020-09-25 21:13                                 ` Peter Xu
2020-09-25 22:08                                   ` Linus Torvalds
2020-09-22 18:02       ` John Hubbard
2020-09-22 18:15         ` Peter Xu
2020-09-22 19:11       ` John Hubbard
2020-09-27  0:41   ` [mm] 698ac7610f: will-it-scale.per_thread_ops 8.2% improvement kernel test robot
2020-09-21 21:17 ` [PATCH 2/5] mm/fork: Pass new vma pointer into copy_page_range() Peter Xu
2020-09-21 21:17 ` Peter Xu [this message]
2020-09-22  7:11   ` [PATCH 3/5] mm: Rework return value for copy_one_pte() John Hubbard
2020-09-22 15:29     ` Peter Xu
2020-09-22 10:08   ` Oleg Nesterov
2020-09-22 10:18     ` Oleg Nesterov
2020-09-22 15:36       ` Peter Xu
2020-09-22 15:48         ` Oleg Nesterov
2020-09-22 16:03           ` Peter Xu
2020-09-22 16:53             ` Oleg Nesterov
2020-09-22 18:13               ` Peter Xu
2020-09-22 18:23                 ` Oleg Nesterov
2020-09-22 18:49                   ` Peter Xu
2020-09-23  6:52                     ` Oleg Nesterov
2020-09-23 17:16   ` Linus Torvalds
2020-09-23 21:24     ` Linus Torvalds
2020-09-21 21:20 ` [PATCH 4/5] mm: Do early cow for pinned pages during fork() for ptes Peter Xu
2020-09-21 21:55   ` Jann Horn
2020-09-21 22:18     ` John Hubbard
2020-09-21 22:27       ` Jann Horn
2020-09-22  0:08         ` John Hubbard
2020-09-21 22:27     ` Peter Xu
2020-09-22 11:48   ` Oleg Nesterov
2020-09-22 12:40     ` Oleg Nesterov
2020-09-22 15:58       ` Peter Xu
2020-09-22 16:52         ` Oleg Nesterov
2020-09-22 18:34           ` Peter Xu
2020-09-22 18:44             ` Oleg Nesterov
2020-09-23  1:03               ` Peter Xu
2020-09-23 20:25                 ` Linus Torvalds
2020-09-24 15:08                   ` Peter Xu
2020-09-24 11:48   ` Kirill Tkhai
2020-09-24 15:16     ` Peter Xu
2020-09-21 21:20 ` [PATCH 5/5] mm/thp: Split huge pmds/puds if they're pinned when fork() Peter Xu
2020-09-22  6:41   ` John Hubbard
2020-09-22 10:33     ` Jan Kara
2020-09-22 20:01       ` John Hubbard
2020-09-23  9:22         ` Jan Kara
2020-09-23 13:50           ` Peter Xu
2020-09-23 14:01             ` Jan Kara
2020-09-23 15:44               ` Peter Xu
2020-09-23 20:19                 ` John Hubbard
2020-09-24 18:49                   ` Peter Xu
2020-09-23 16:06     ` Peter Xu
2020-09-22 12:05   ` Jason Gunthorpe
2020-09-23 15:24     ` Peter Xu
2020-09-23 16:07       ` Yang Shi
2020-09-24 15:47         ` Peter Xu
2020-09-24 17:29           ` Yang Shi
2020-09-23 17:17       ` Jason Gunthorpe
2020-09-23 10:21 ` [PATCH 0/5] mm: Break COW for pinned pages during fork() Leon Romanovsky
2020-09-23 15:37   ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200921211744.24758-4-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@lst.de \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=kirill@shutemov.name \
    --cc=ktkhai@virtuozzo.com \
    --cc=leonro@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).