All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Mike Kravetz <mike.kravetz@oracle.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	peterx@redhat.com, Alistair Popple <apopple@nvidia.com>,
	Nadav Amit <nadav.amit@gmail.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	Hugh Dickins <hughd@google.com>,
	Jerome Glisse <jglisse@redhat.com>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>
Subject: [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry
Date: Mon,  4 Apr 2022 21:46:24 -0400	[thread overview]
Message-ID: <20220405014646.13522-2-peterx@redhat.com> (raw)
In-Reply-To: <20220405014646.13522-1-peterx@redhat.com>

This patch introduces a new swap entry type called PTE_MARKER.  It can be
installed for any pte that maps a file-backed memory when the pte is
temporarily zapped, so as to maintain per-pte information.

The information that kept in the pte is called a "marker".  Here we define the
marker as "unsigned long" just to match pgoff_t, however it will only work if
it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.

A new config CONFIG_PTE_MARKER is introduced too; it's by default off.  A bunch
of helpers are defined altogether to service the rest of the pte marker code.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/asm-generic/hugetlb.h |  9 ++++
 include/linux/swap.h          | 15 ++++++-
 include/linux/swapops.h       | 78 +++++++++++++++++++++++++++++++++++
 mm/Kconfig                    |  6 +++
 4 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 8e1e6244a89d..f39cad20ffc6 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -2,6 +2,9 @@
 #ifndef _ASM_GENERIC_HUGETLB_H
 #define _ASM_GENERIC_HUGETLB_H
 
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
 static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
 {
 	return mk_pte(page, pgprot);
@@ -80,6 +83,12 @@ static inline int huge_pte_none(pte_t pte)
 }
 #endif
 
+/* Please refer to comments above pte_none_mostly() for the usage */
+static inline int huge_pte_none_mostly(pte_t pte)
+{
+	return huge_pte_none(pte) || is_pte_marker(pte);
+}
+
 #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
 static inline pte_t huge_pte_wrprotect(pte_t pte)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7daae5a4b3e1..5553189d0215 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,6 +55,19 @@ static inline int current_is_kswapd(void)
  * actions on faults.
  */
 
+/*
+ * PTE markers are used to persist information onto PTEs that are mapped with
+ * file-backed memories.  As its name "PTE" hints, it should only be applied to
+ * the leaves of pgtables.
+ */
+#ifdef CONFIG_PTE_MARKER
+#define SWP_PTE_MARKER_NUM 1
+#define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
+			    SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
+#else
+#define SWP_PTE_MARKER_NUM 0
+#endif
+
 /*
  * Unaddressable device memory support. See include/linux/hmm.h and
  * Documentation/vm/hmm.rst. Short description is we need struct pages for
@@ -107,7 +120,7 @@ static inline int current_is_kswapd(void)
 
 #define MAX_SWAPFILES \
 	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
-	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
 
 /*
  * Magic header for a swap area. The first part of the union is
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 32d517a28969..7a00627845f0 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -274,6 +274,84 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
 
 #endif
 
+typedef unsigned long pte_marker;
+
+#define  PTE_MARKER_MASK     (0)
+
+#ifdef CONFIG_PTE_MARKER
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+	return swp_entry(SWP_PTE_MARKER, marker);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_PTE_MARKER;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+	return swp_offset(entry) & PTE_MARKER_MASK;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
+}
+
+#else /* CONFIG_PTE_MARKER */
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+	/* This should never be called if !CONFIG_PTE_MARKER */
+	WARN_ON_ONCE(1);
+	return swp_entry(0, 0);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return false;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+	return 0;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+	return false;
+}
+
+#endif /* CONFIG_PTE_MARKER */
+
+static inline pte_t make_pte_marker(pte_marker marker)
+{
+	return swp_entry_to_pte(make_pte_marker_entry(marker));
+}
+
+/*
+ * This is a special version to check pte_none() just to cover the case when
+ * the pte is a pte marker.  It existed because in many cases the pte marker
+ * should be seen as a none pte; it's just that we have stored some information
+ * onto the none pte so it becomes not-none any more.
+ *
+ * It should be used when the pte is file-backed, ram-based and backing
+ * userspace pages, like shmem.  It is not needed upon pgtables that do not
+ * support pte markers at all.  For example, it's not needed on anonymous
+ * memory, kernel-only memory (including when the system is during-boot),
+ * non-ram based generic file-system.  It's fine to be used even there, but the
+ * extra pte marker check will be pure overhead.
+ *
+ * For systems configured with !CONFIG_PTE_MARKER this will be automatically
+ * optimized to pte_none().
+ */
+static inline int pte_none_mostly(pte_t pte)
+{
+	return pte_none(pte) || is_pte_marker(pte);
+}
+
 static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
 {
 	struct page *p = pfn_to_page(swp_offset(entry));
diff --git a/mm/Kconfig b/mm/Kconfig
index 034d87953600..a1688b9314b2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -909,6 +909,12 @@ config ANON_VMA_NAME
 	  area from being merged with adjacent virtual memory areas due to the
 	  difference in their name.
 
+config PTE_MARKER
+	bool "Marker PTEs support"
+
+	help
+	  Allows to create marker PTEs for file-backed memory.
+
 source "mm/damon/Kconfig"
 
 endmenu
-- 
2.32.0


  reply	other threads:[~2022-04-05  2:40 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-05  1:46 [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2022-04-05  1:46 ` Peter Xu [this message]
2022-04-12  1:07   ` [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry Alistair Popple
2022-04-12 19:45     ` Peter Xu
2022-04-13  0:30       ` Alistair Popple
2022-04-13 13:44         ` Peter Xu
2022-04-19  8:25   ` Alistair Popple
2022-04-19 19:44     ` Peter Xu
2022-04-05  1:48 ` [PATCH v8 02/23] mm: Teach core mm about pte markers Peter Xu
2022-04-12  1:22   ` Alistair Popple
2022-04-12 19:53     ` Peter Xu
2022-04-05  1:48 ` [PATCH v8 03/23] mm: Check against orig_pte for finish_fault() Peter Xu
2022-04-12  2:05   ` Alistair Popple
2022-04-12 19:54     ` Peter Xu
     [not found]   ` <CGME20220413140330eucas1p167da41e079712b829ef8237dc27b049c@eucas1p1.samsung.com>
2022-04-13 14:03     ` Marek Szyprowski
2022-04-13 16:43       ` Peter Xu
2022-04-14  7:51         ` Marek Szyprowski
2022-04-14 16:30           ` Peter Xu
2022-04-14 20:57             ` Andrew Morton
2022-04-14 21:08               ` Peter Xu
2022-04-15 14:21   ` Guenter Roeck
2022-04-15 14:41     ` Peter Xu
2022-04-05  1:48 ` [PATCH v8 04/23] mm/uffd: PTE_MARKER_UFFD_WP Peter Xu
2022-04-06  1:41   ` kernel test robot
2022-04-05  1:48 ` [PATCH v8 05/23] mm/shmem: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05  1:48 ` [PATCH v8 06/23] mm/shmem: Handle uffd-wp special pte in page fault handler Peter Xu
2022-05-11 16:30   ` David Hildenbrand
2022-05-12 16:34     ` Peter Xu
2022-04-05  1:48 ` [PATCH v8 07/23] mm/shmem: Persist uffd-wp bit across zapping for file-backed Peter Xu
2022-04-05  1:48 ` [PATCH v8 08/23] mm/shmem: Allow uffd wr-protect none pte for file-backed mem Peter Xu
2022-04-05  1:48 ` [PATCH v8 09/23] mm/shmem: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2022-04-05  1:48 ` [PATCH v8 10/23] mm/shmem: Handle uffd-wp during fork() Peter Xu
2022-04-06  6:16   ` kernel test robot
2022-04-06 12:18     ` Peter Xu
2022-04-06 12:18       ` Peter Xu
2022-04-05  1:48 ` [PATCH v8 11/23] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2022-04-05  1:49 ` [PATCH v8 12/23] mm/hugetlb: Hook page faults for uffd write protection Peter Xu
2022-04-05  1:49 ` [PATCH v8 13/23] mm/hugetlb: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05  1:49 ` [PATCH v8 14/23] mm/hugetlb: Handle UFFDIO_WRITEPROTECT Peter Xu
2022-04-05  1:49 ` [PATCH v8 15/23] mm/hugetlb: Handle pte markers in page faults Peter Xu
2022-04-06 13:37   ` kernel test robot
2022-04-06 15:02     ` Peter Xu
2022-04-06 15:02       ` Peter Xu
2022-04-05  1:49 ` [PATCH v8 16/23] mm/hugetlb: Allow uffd wr-protect none ptes Peter Xu
2022-04-05  1:49 ` [PATCH v8 17/23] mm/hugetlb: Only drop uffd-wp special pte if required Peter Xu
2022-04-05  1:49 ` [PATCH v8 18/23] mm/hugetlb: Handle uffd-wp during fork() Peter Xu
2022-04-05  1:49 ` [PATCH v8 19/23] mm/khugepaged: Don't recycle vma pgtable if uffd-wp registered Peter Xu
2022-04-05  1:49 ` [PATCH v8 20/23] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2022-04-05  1:49 ` [PATCH v8 21/23] mm/uffd: Enable write protection for shmem & hugetlbfs Peter Xu
2022-04-05  1:49 ` [PATCH v8 22/23] mm: Enable PTE markers by default Peter Xu
2022-04-19 15:13   ` Johannes Weiner
2022-04-19 19:59     ` Peter Xu
2022-04-19 20:14       ` Johannes Weiner
2022-04-19 20:28         ` Peter Xu
2022-04-19 21:24           ` Johannes Weiner
2022-04-19 22:01             ` Peter Xu
2022-04-20 13:46               ` Johannes Weiner
2022-04-20 14:25                 ` Peter Xu
2022-04-05  1:49 ` [PATCH v8 23/23] selftests/uffd: Enable uffd-wp for shmem/hugetlbfs Peter Xu
2022-04-05 22:16 ` [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Andrew Morton
2022-04-05 22:42   ` Peter Xu
2022-04-05 22:49     ` Andrew Morton
2022-04-05 23:02       ` Peter Xu
2022-04-05 23:08         ` Andrew Morton
2022-05-10 19:05 ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220405014646.13522-2-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axelrasmussen@google.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=nadav.amit@gmail.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.