All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>, linux-mm@kvack.org
Cc: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Christian Brauner <brauner@kernel.org>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	William Kucharski <william.kucharski@oracle.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Yunsheng Lin <linyunsheng@huawei.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Suren Baghdasaryan <surenb@google.com>,
	Chih-En Lin <shiyn.lin@gmail.com>,
	Colin Cross <ccross@google.com>, Feng Tang <feng.tang@intel.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Mike Rapoport <rppt@kernel.org>,
	Geert Uytterhoeven <geert@linux-m68k.org>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Daniel Axtens <dja@axtens.net>,
	Jonathan Marek <jonathan@marek.ca>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	Peter Xu <peterx@redhat.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Andy Lutomirski <luto@kernel.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Fenghua Yu <fenghua.yu@intel.com>,
	David Hildenbrand <david@redhat.com>,
	linux-kernel@vger.kernel.org, Kaiyang Zhao <zhao776@purdue.edu>,
	Huichun Feng <foxhoundsk.tw@gmail.com>,
	Jim Huang <jserv.tw@gmail.com>
Subject: [RFC PATCH 4/6] mm: Add COW PTE fallback function
Date: Fri, 20 May 2022 02:31:25 +0800	[thread overview]
Message-ID: <20220519183127.3909598-5-shiyn.lin@gmail.com> (raw)
In-Reply-To: <20220519183127.3909598-1-shiyn.lin@gmail.com>

The lifetime of COW PTE will handle by ownership and a reference count.
When the process wants to write the COW PTE, which reference count is 1,
it will reuse the COW PTE instead of copying then free.

Only the owner will update its RSS state and the record of page table
bytes allocation. So we need to handle when the non-owner process gets
the fallback COW PTE.

This commit prepares for the following implementation of the reference
count for COW PTE.

Signed-off-by: Chih-En Lin <shiyn.lin@gmail.com>
---
 mm/memory.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 76e3af9639d9..dcb678cbb051 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1000,6 +1000,34 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma,
 	return new_page;
 }
 
+static inline void cow_pte_rss(struct mm_struct *mm, struct vm_area_struct *vma,
+	pmd_t *pmdp, unsigned long addr, unsigned long end, bool inc_dec)
+{
+	int rss[NR_MM_COUNTERS];
+	pte_t *orig_ptep, *ptep;
+	struct page *page;
+
+	init_rss_vec(rss);
+
+	ptep = pte_offset_map(pmdp, addr);
+	orig_ptep = ptep;
+	arch_enter_lazy_mmu_mode();
+	do {
+		if (pte_none(*ptep) || pte_special(*ptep))
+			continue;
+
+		page = vm_normal_page(vma, addr, *ptep);
+		if (page) {
+			if (inc_dec)
+				rss[mm_counter(page)]++;
+			else
+				rss[mm_counter(page)]--;
+		}
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+	arch_leave_lazy_mmu_mode();
+	add_mm_rss_vec(mm, rss);
+}
+
 static int
 copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 	       pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -4554,6 +4582,44 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 	return VM_FAULT_FALLBACK;
 }
 
+/* COW PTE fallback to normal PTE:
+ * - two state here
+ *   - After break child :   [parent, rss=1, ref=1, write=NO , owner=parent]
+ *                        to [parent, rss=1, ref=1, write=YES, owner=NULL  ]
+ *   - After break parent:   [child , rss=0, ref=1, write=NO , owner=NULL  ]
+ *                        to [child , rss=1, ref=1, write=YES, owner=NULL  ]
+ */
+void cow_pte_fallback(struct vm_area_struct *vma, pmd_t *pmd,
+		unsigned long addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long start, end;
+	pmd_t new;
+
+	BUG_ON(pmd_write(*pmd));
+
+	start = addr & PMD_MASK;
+	end = (addr + PMD_SIZE) & PMD_MASK;
+
+	/* If pmd is not owner, it needs to increase the rss.
+	 * Since only the owner has the RSS state for the COW PTE.
+	 */
+	if (!cow_pte_owner_is_same(pmd, pmd)) {
+		cow_pte_rss(mm, vma, pmd, start, end, true /* inc */);
+		mm_inc_nr_ptes(mm);
+		smp_wmb();
+		pmd_populate(mm, pmd, pmd_page(*pmd));
+	}
+
+	/* Reuse the pte page */
+	set_cow_pte_owner(pmd, NULL);
+	new = pmd_mkwrite(*pmd);
+	set_pmd_at(mm, addr, pmd, new);
+
+	BUG_ON(!pmd_write(*pmd));
+	BUG_ON(pmd_page(*pmd)->cow_pte_owner);
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
-- 
2.36.1


  parent reply	other threads:[~2022-05-19 18:30 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-19 18:31 [RFC PATCH 0/6] Introduce Copy-On-Write to Page Table Chih-En Lin
2022-05-19 18:31 ` [RFC PATCH 1/6] mm: Add a new mm flag for Copy-On-Write PTE table Chih-En Lin
2022-05-19 18:31 ` [RFC PATCH 2/6] mm: clone3: Add CLONE_COW_PGTABLE flag Chih-En Lin
2022-05-20 14:13   ` Christophe Leroy
2022-05-21  3:50     ` Chih-En Lin
2022-05-19 18:31 ` [RFC PATCH 3/6] mm, pgtable: Add ownership for the PTE table Chih-En Lin
2022-05-19 23:07   ` kernel test robot
2022-05-20  0:08   ` kernel test robot
2022-05-20 14:15   ` Christophe Leroy
2022-05-21  4:03     ` Chih-En Lin
2022-05-21  4:02   ` Matthew Wilcox
2022-05-21  5:01     ` Chih-En Lin
2022-05-19 18:31 ` Chih-En Lin [this message]
2022-05-20  0:20   ` [RFC PATCH 4/6] mm: Add COW PTE fallback function kernel test robot
2022-05-20 14:21   ` Christophe Leroy
2022-05-21  4:15     ` Chih-En Lin
2022-05-19 18:31 ` [RFC PATCH 5/6] mm, pgtable: Add the reference counter for COW PTE Chih-En Lin
2022-05-20 14:30   ` Christophe Leroy
2022-05-21  4:22     ` Chih-En Lin
2022-05-21  4:08   ` Matthew Wilcox
2022-05-21  5:10     ` Chih-En Lin
2022-05-19 18:31 ` [RFC PATCH 6/6] mm: Expand Copy-On-Write to PTE table Chih-En Lin
2022-05-20 14:49   ` Christophe Leroy
2022-05-21  4:38     ` Chih-En Lin
2022-05-21  8:59 ` [External] [RFC PATCH 0/6] Introduce Copy-On-Write to Page Table Qi Zheng
2022-05-21 19:08   ` Chih-En Lin
2022-05-21 16:07 ` David Hildenbrand
2022-05-21 18:50   ` Chih-En Lin
2022-05-21 20:28     ` David Hildenbrand
2022-05-21 20:12   ` Matthew Wilcox
2022-05-21 20:22     ` David Hildenbrand
2022-05-21 22:19     ` Andy Lutomirski
2022-05-22  0:31       ` Matthew Wilcox
2022-05-22 15:20         ` Andy Lutomirski
2022-05-22 19:40           ` Matthew Wilcox
2022-05-20 10:16 [RFC PATCH 4/6] mm: Add COW PTE fallback function kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220519183127.3909598-5-shiyn.lin@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=anshuman.khandual@arm.com \
    --cc=arnd@arndb.de \
    --cc=bigeasy@linutronix.de \
    --cc=brauner@kernel.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=ccross@google.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=david@redhat.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dja@axtens.net \
    --cc=ebiederm@xmission.com \
    --cc=feng.tang@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=foxhoundsk.tw@gmail.com \
    --cc=geert@linux-m68k.org \
    --cc=jhubbard@nvidia.com \
    --cc=jonathan@marek.ca \
    --cc=jserv.tw@gmail.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linyunsheng@huawei.com \
    --cc=luto@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=william.kucharski@oracle.com \
    --cc=willy@infradead.org \
    --cc=zhao776@purdue.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.