All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexandru Elisei <alexandru.elisei@arm.com>
To: catalin.marinas@arm.com, will@kernel.org, oliver.upton@linux.dev,
	maz@kernel.org, james.morse@arm.com, suzuki.poulose@arm.com,
	yuzenghui@huawei.com, arnd@arndb.de, akpm@linux-foundation.org,
	mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com, mhiramat@kernel.org,
	rppt@kernel.org, hughd@google.com
Cc: pcc@google.com, steven.price@arm.com, anshuman.khandual@arm.com,
	vincenzo.frascino@arm.com, david@redhat.com, eugenis@google.com,
	kcc@google.com, hyesoo.yu@samsung.com,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, kvmarm@lists.linux.dev,
	linux-fsdevel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-mm@kvack.org, linux-trace-kernel@vger.kernel.org
Subject: [PATCH RFC v3 13/35] mm: memory: Introduce fault-on-access mechanism for pages
Date: Thu, 25 Jan 2024 16:42:34 +0000	[thread overview]
Message-ID: <20240125164256.4147-14-alexandru.elisei@arm.com> (raw)
In-Reply-To: <20240125164256.4147-1-alexandru.elisei@arm.com>

Introduce a mechanism that allows an architecture to trigger a page fault,
and add the infrastructure to handle that fault accordingly. To use make
use of this, an arch is expected to mark the table entry as PAGE_NONE (which
will cause a fault next time it is accessed) and to implement an
arch-specific method (like a software bit) for recognizing that the fault
needs to be handled by the arch code.

arm64 will use of this approach to reserve tag storage for pages which are
mapped in an MTE enabled VMA, but the storage needed to store tags isn't
reserved (for example, because of an mprotect(PROT_MTE) call on a VMA with
existing pages).

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---

Changes since rfc v2:

* New patch. Split from patch #19 ("mm: mprotect: Introduce PAGE_FAULT_ON_ACCESS
for mprotect(PROT_MTE)") (David Hildenbrand).

 include/linux/huge_mm.h |  4 ++--
 include/linux/pgtable.h | 47 +++++++++++++++++++++++++++++++++++--
 mm/Kconfig              |  3 +++
 mm/huge_memory.c        | 36 +++++++++++++++++++++--------
 mm/memory.c             | 51 ++++++++++++++++++++++++++---------------
 5 files changed, 109 insertions(+), 32 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5adb86af35fc..4678a0a5e6a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -346,7 +346,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t *pud, int flags, struct dev_pagemap **pgmap);
 
-vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf);
 
 extern struct page *huge_zero_page;
 extern unsigned long huge_zero_pfn;
@@ -476,7 +476,7 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 	return NULL;
 }
 
-static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
+static inline vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf)
 {
 	return 0;
 }
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2d0f04042f62..81a21be855a2 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1455,7 +1455,7 @@ static inline int pud_trans_unstable(pud_t *pud)
 	return 0;
 }
 
-#ifndef CONFIG_NUMA_BALANCING
+#if !defined(CONFIG_NUMA_BALANCING) && !defined(CONFIG_ARCH_HAS_FAULT_ON_ACCESS)
 /*
  * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
  * perfectly valid to indicate "no" in that case, which is why our default
@@ -1477,7 +1477,50 @@ static inline int pmd_protnone(pmd_t pmd)
 {
 	return 0;
 }
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* !CONFIG_NUMA_BALANCING && !CONFIG_ARCH_HAS_FAULT_ON_ACCESS */
+
+#ifndef CONFIG_ARCH_HAS_FAULT_ON_ACCESS
+static inline bool arch_fault_on_access_pte(pte_t pte)
+{
+	return false;
+}
+
+static inline bool arch_fault_on_access_pmd(pmd_t pmd)
+{
+	return false;
+}
+
+/*
+ * The function is called with the fault lock held and an elevated reference on
+ * the folio.
+ *
+ * Rules that an arch implementation of the function must follow:
+ *
+ * 1. The function must return with the elevated reference dropped.
+ *
+ * 2. If the return value contains VM_FAULT_RETRY or VM_FAULT_COMPLETED then:
+ *
+ * - if FAULT_FLAG_RETRY_NOWAIT is not set, the function must return with the
+ *   correct fault lock released, which can be accomplished with
+ *   release_fault_lock(vmf). Note that release_fault_lock() doesn't check if
+ *   FAULT_FLAG_RETRY_NOWAIT is set before releasing the mmap_lock.
+ *
+ * - if FAULT_FLAG_RETRY_NOWAIT is set, then the function must not release the
+ *   mmap_lock. The flag should be set only if the mmap_lock is held.
+ *
+ * 3. If the return value contains neither of the above, the function must not
+ * release the fault lock; the generic fault handler will take care of releasing
+ * the correct lock.
+ */
+static inline vm_fault_t arch_handle_folio_fault_on_access(struct folio *folio,
+							   struct vm_fault *vmf,
+							   bool *map_pte)
+{
+	*map_pte = false;
+
+	return VM_FAULT_SIGBUS;
+}
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/mm/Kconfig b/mm/Kconfig
index 341cf53898db..153df67221f1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1006,6 +1006,9 @@ config IDLE_PAGE_TRACKING
 config ARCH_HAS_CACHE_LINE_SIZE
 	bool
 
+config ARCH_HAS_FAULT_ON_ACCESS
+	bool
+
 config ARCH_HAS_CURRENT_STACK_POINTER
 	bool
 	help
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 94ef5c02b459..2bad63a7ec16 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1698,7 +1698,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
+vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	pmd_t oldpmd = vmf->orig_pmd;
@@ -1708,6 +1708,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	int nid = NUMA_NO_NODE;
 	int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
 	bool migrated = false, writable = false;
+	vm_fault_t ret;
 	int flags = 0;
 
 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -1731,6 +1732,20 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	if (!folio)
 		goto out_map;
 
+	folio_get(folio);
+	vma_set_access_pid_bit(vma);
+
+	if (arch_fault_on_access_pmd(oldpmd)) {
+		bool map_pte = false;
+
+		spin_unlock(vmf->ptl);
+		ret = arch_handle_folio_fault_on_access(folio, vmf, &map_pte);
+		if (ret || !map_pte)
+			return ret;
+		writable = false;
+		goto out_lock_and_map;
+	}
+
 	/* See similar comment in do_numa_page for explanation */
 	if (!writable)
 		flags |= TNF_NO_GROUP;
@@ -1755,15 +1770,18 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		nid = target_nid;
-	} else {
-		flags |= TNF_MIGRATE_FAIL;
-		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-		if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
-			spin_unlock(vmf->ptl);
-			goto out;
-		}
-		goto out_map;
+		goto out;
+	}
+
+	flags |= TNF_MIGRATE_FAIL;
+
+out_lock_and_map:
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+		spin_unlock(vmf->ptl);
+		goto out;
 	}
+	goto out_map;
 
 out:
 	if (nid != NUMA_NO_NODE)
diff --git a/mm/memory.c b/mm/memory.c
index 8a421e168b57..110fe2224277 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4886,11 +4886,6 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
 int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
 		      unsigned long addr, int page_nid, int *flags)
 {
-	folio_get(folio);
-
-	/* Record the current PID acceesing VMA */
-	vma_set_access_pid_bit(vma);
-
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (page_nid == numa_node_id()) {
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
@@ -4900,13 +4895,14 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
 	return mpol_misplaced(folio, vma, addr);
 }
 
-static vm_fault_t do_numa_page(struct vm_fault *vmf)
+static vm_fault_t handle_pte_protnone(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct folio *folio = NULL;
 	int nid = NUMA_NO_NODE;
 	bool writable = false;
 	int last_cpupid;
+	vm_fault_t ret;
 	int target_nid;
 	pte_t pte, old_pte;
 	int flags = 0;
@@ -4939,6 +4935,20 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	if (!folio || folio_is_zone_device(folio))
 		goto out_map;
 
+	folio_get(folio);
+	/* Record the current PID acceesing VMA */
+	vma_set_access_pid_bit(vma);
+
+	if (arch_fault_on_access_pte(old_pte)) {
+		bool map_pte = false;
+
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		ret = arch_handle_folio_fault_on_access(folio, vmf, &map_pte);
+		if (ret || !map_pte)
+			return ret;
+		goto out_lock_and_map;
+	}
+
 	/* TODO: handle PTE-mapped THP */
 	if (folio_test_large(folio))
 		goto out_map;
@@ -4983,18 +4993,21 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	if (migrate_misplaced_folio(folio, vma, target_nid)) {
 		nid = target_nid;
 		flags |= TNF_MIGRATED;
-	} else {
-		flags |= TNF_MIGRATE_FAIL;
-		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-					       vmf->address, &vmf->ptl);
-		if (unlikely(!vmf->pte))
-			goto out;
-		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
-			pte_unmap_unlock(vmf->pte, vmf->ptl);
-			goto out;
-		}
-		goto out_map;
+		goto out;
+	}
+
+	flags |= TNF_MIGRATE_FAIL;
+
+out_lock_and_map:
+	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+				       vmf->address, &vmf->ptl);
+	if (unlikely(!vmf->pte))
+		goto out;
+	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		goto out;
 	}
+	goto out_map;
 
 out:
 	if (nid != NUMA_NO_NODE)
@@ -5151,7 +5164,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		return do_swap_page(vmf);
 
 	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
-		return do_numa_page(vmf);
+		return handle_pte_protnone(vmf);
 
 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
@@ -5272,7 +5285,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		}
 		if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) {
 			if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
-				return do_huge_pmd_numa_page(&vmf);
+				return handle_huge_pmd_protnone(&vmf);
 
 			if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
 			    !pmd_write(vmf.orig_pmd)) {
-- 
2.43.0


WARNING: multiple messages have this Message-ID (diff)
From: Alexandru Elisei <alexandru.elisei@arm.com>
To: catalin.marinas@arm.com, will@kernel.org, oliver.upton@linux.dev,
	maz@kernel.org, james.morse@arm.com, suzuki.poulose@arm.com,
	yuzenghui@huawei.com, arnd@arndb.de, akpm@linux-foundation.org,
	mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, vschneid@redhat.com, mhiramat@kernel.org,
	rppt@kernel.org, hughd@google.com
Cc: pcc@google.com, steven.price@arm.com, anshuman.khandual@arm.com,
	vincenzo.frascino@arm.com, david@redhat.com, eugenis@google.com,
	kcc@google.com, hyesoo.yu@samsung.com,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, kvmarm@lists.linux.dev,
	linux-fsdevel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-mm@kvack.org, linux-trace-kernel@vger.kernel.org
Subject: [PATCH RFC v3 13/35] mm: memory: Introduce fault-on-access mechanism for pages
Date: Thu, 25 Jan 2024 16:42:34 +0000	[thread overview]
Message-ID: <20240125164256.4147-14-alexandru.elisei@arm.com> (raw)
In-Reply-To: <20240125164256.4147-1-alexandru.elisei@arm.com>

Introduce a mechanism that allows an architecture to trigger a page fault,
and add the infrastructure to handle that fault accordingly. To use make
use of this, an arch is expected to mark the table entry as PAGE_NONE (which
will cause a fault next time it is accessed) and to implement an
arch-specific method (like a software bit) for recognizing that the fault
needs to be handled by the arch code.

arm64 will use of this approach to reserve tag storage for pages which are
mapped in an MTE enabled VMA, but the storage needed to store tags isn't
reserved (for example, because of an mprotect(PROT_MTE) call on a VMA with
existing pages).

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---

Changes since rfc v2:

* New patch. Split from patch #19 ("mm: mprotect: Introduce PAGE_FAULT_ON_ACCESS
for mprotect(PROT_MTE)") (David Hildenbrand).

 include/linux/huge_mm.h |  4 ++--
 include/linux/pgtable.h | 47 +++++++++++++++++++++++++++++++++++--
 mm/Kconfig              |  3 +++
 mm/huge_memory.c        | 36 +++++++++++++++++++++--------
 mm/memory.c             | 51 ++++++++++++++++++++++++++---------------
 5 files changed, 109 insertions(+), 32 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5adb86af35fc..4678a0a5e6a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -346,7 +346,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t *pud, int flags, struct dev_pagemap **pgmap);
 
-vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf);
 
 extern struct page *huge_zero_page;
 extern unsigned long huge_zero_pfn;
@@ -476,7 +476,7 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 	return NULL;
 }
 
-static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
+static inline vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf)
 {
 	return 0;
 }
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2d0f04042f62..81a21be855a2 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1455,7 +1455,7 @@ static inline int pud_trans_unstable(pud_t *pud)
 	return 0;
 }
 
-#ifndef CONFIG_NUMA_BALANCING
+#if !defined(CONFIG_NUMA_BALANCING) && !defined(CONFIG_ARCH_HAS_FAULT_ON_ACCESS)
 /*
  * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
  * perfectly valid to indicate "no" in that case, which is why our default
@@ -1477,7 +1477,50 @@ static inline int pmd_protnone(pmd_t pmd)
 {
 	return 0;
 }
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* !CONFIG_NUMA_BALANCING && !CONFIG_ARCH_HAS_FAULT_ON_ACCESS */
+
+#ifndef CONFIG_ARCH_HAS_FAULT_ON_ACCESS
+static inline bool arch_fault_on_access_pte(pte_t pte)
+{
+	return false;
+}
+
+static inline bool arch_fault_on_access_pmd(pmd_t pmd)
+{
+	return false;
+}
+
+/*
+ * The function is called with the fault lock held and an elevated reference on
+ * the folio.
+ *
+ * Rules that an arch implementation of the function must follow:
+ *
+ * 1. The function must return with the elevated reference dropped.
+ *
+ * 2. If the return value contains VM_FAULT_RETRY or VM_FAULT_COMPLETED then:
+ *
+ * - if FAULT_FLAG_RETRY_NOWAIT is not set, the function must return with the
+ *   correct fault lock released, which can be accomplished with
+ *   release_fault_lock(vmf). Note that release_fault_lock() doesn't check if
+ *   FAULT_FLAG_RETRY_NOWAIT is set before releasing the mmap_lock.
+ *
+ * - if FAULT_FLAG_RETRY_NOWAIT is set, then the function must not release the
+ *   mmap_lock. The flag should be set only if the mmap_lock is held.
+ *
+ * 3. If the return value contains neither of the above, the function must not
+ * release the fault lock; the generic fault handler will take care of releasing
+ * the correct lock.
+ */
+static inline vm_fault_t arch_handle_folio_fault_on_access(struct folio *folio,
+							   struct vm_fault *vmf,
+							   bool *map_pte)
+{
+	*map_pte = false;
+
+	return VM_FAULT_SIGBUS;
+}
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/mm/Kconfig b/mm/Kconfig
index 341cf53898db..153df67221f1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1006,6 +1006,9 @@ config IDLE_PAGE_TRACKING
 config ARCH_HAS_CACHE_LINE_SIZE
 	bool
 
+config ARCH_HAS_FAULT_ON_ACCESS
+	bool
+
 config ARCH_HAS_CURRENT_STACK_POINTER
 	bool
 	help
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 94ef5c02b459..2bad63a7ec16 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1698,7 +1698,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
+vm_fault_t handle_huge_pmd_protnone(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	pmd_t oldpmd = vmf->orig_pmd;
@@ -1708,6 +1708,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	int nid = NUMA_NO_NODE;
 	int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
 	bool migrated = false, writable = false;
+	vm_fault_t ret;
 	int flags = 0;
 
 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -1731,6 +1732,20 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	if (!folio)
 		goto out_map;
 
+	folio_get(folio);
+	vma_set_access_pid_bit(vma);
+
+	if (arch_fault_on_access_pmd(oldpmd)) {
+		bool map_pte = false;
+
+		spin_unlock(vmf->ptl);
+		ret = arch_handle_folio_fault_on_access(folio, vmf, &map_pte);
+		if (ret || !map_pte)
+			return ret;
+		writable = false;
+		goto out_lock_and_map;
+	}
+
 	/* See similar comment in do_numa_page for explanation */
 	if (!writable)
 		flags |= TNF_NO_GROUP;
@@ -1755,15 +1770,18 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		nid = target_nid;
-	} else {
-		flags |= TNF_MIGRATE_FAIL;
-		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-		if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
-			spin_unlock(vmf->ptl);
-			goto out;
-		}
-		goto out_map;
+		goto out;
+	}
+
+	flags |= TNF_MIGRATE_FAIL;
+
+out_lock_and_map:
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
+		spin_unlock(vmf->ptl);
+		goto out;
 	}
+	goto out_map;
 
 out:
 	if (nid != NUMA_NO_NODE)
diff --git a/mm/memory.c b/mm/memory.c
index 8a421e168b57..110fe2224277 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4886,11 +4886,6 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
 int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
 		      unsigned long addr, int page_nid, int *flags)
 {
-	folio_get(folio);
-
-	/* Record the current PID acceesing VMA */
-	vma_set_access_pid_bit(vma);
-
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (page_nid == numa_node_id()) {
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
@@ -4900,13 +4895,14 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
 	return mpol_misplaced(folio, vma, addr);
 }
 
-static vm_fault_t do_numa_page(struct vm_fault *vmf)
+static vm_fault_t handle_pte_protnone(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct folio *folio = NULL;
 	int nid = NUMA_NO_NODE;
 	bool writable = false;
 	int last_cpupid;
+	vm_fault_t ret;
 	int target_nid;
 	pte_t pte, old_pte;
 	int flags = 0;
@@ -4939,6 +4935,20 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	if (!folio || folio_is_zone_device(folio))
 		goto out_map;
 
+	folio_get(folio);
+	/* Record the current PID acceesing VMA */
+	vma_set_access_pid_bit(vma);
+
+	if (arch_fault_on_access_pte(old_pte)) {
+		bool map_pte = false;
+
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		ret = arch_handle_folio_fault_on_access(folio, vmf, &map_pte);
+		if (ret || !map_pte)
+			return ret;
+		goto out_lock_and_map;
+	}
+
 	/* TODO: handle PTE-mapped THP */
 	if (folio_test_large(folio))
 		goto out_map;
@@ -4983,18 +4993,21 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	if (migrate_misplaced_folio(folio, vma, target_nid)) {
 		nid = target_nid;
 		flags |= TNF_MIGRATED;
-	} else {
-		flags |= TNF_MIGRATE_FAIL;
-		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-					       vmf->address, &vmf->ptl);
-		if (unlikely(!vmf->pte))
-			goto out;
-		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
-			pte_unmap_unlock(vmf->pte, vmf->ptl);
-			goto out;
-		}
-		goto out_map;
+		goto out;
+	}
+
+	flags |= TNF_MIGRATE_FAIL;
+
+out_lock_and_map:
+	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+				       vmf->address, &vmf->ptl);
+	if (unlikely(!vmf->pte))
+		goto out;
+	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		goto out;
 	}
+	goto out_map;
 
 out:
 	if (nid != NUMA_NO_NODE)
@@ -5151,7 +5164,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		return do_swap_page(vmf);
 
 	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
-		return do_numa_page(vmf);
+		return handle_pte_protnone(vmf);
 
 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
@@ -5272,7 +5285,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		}
 		if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) {
 			if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
-				return do_huge_pmd_numa_page(&vmf);
+				return handle_huge_pmd_protnone(&vmf);
 
 			if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
 			    !pmd_write(vmf.orig_pmd)) {
-- 
2.43.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2024-01-25 16:44 UTC|newest]

Thread overview: 190+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-25 16:42 [PATCH RFC v3 00/35] Add support for arm64 MTE dynamic tag storage reuse Alexandru Elisei
2024-01-25 16:42 ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 01/35] mm: page_alloc: Add gfp_flags parameter to arch_alloc_page() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  5:48   ` Anshuman Khandual
2024-01-29  5:48     ` Anshuman Khandual
2024-01-29 11:41     ` Alexandru Elisei
2024-01-29 11:41       ` Alexandru Elisei
2024-01-30  4:26       ` Anshuman Khandual
2024-01-30  4:26         ` Anshuman Khandual
2024-01-30 11:56         ` Alexandru Elisei
2024-01-30 11:56           ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 02/35] mm: page_alloc: Add an arch hook early in free_pages_prepare() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  8:19   ` Anshuman Khandual
2024-01-29  8:19     ` Anshuman Khandual
2024-01-29 11:42     ` Alexandru Elisei
2024-01-29 11:42       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 03/35] mm: page_alloc: Add an arch hook to filter MIGRATE_CMA allocations Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  8:44   ` Anshuman Khandual
2024-01-29  8:44     ` Anshuman Khandual
2024-01-29 11:45     ` Alexandru Elisei
2024-01-29 11:45       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 04/35] mm: page_alloc: Partially revert "mm: page_alloc: remove stale CMA guard code" Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  9:01   ` Anshuman Khandual
2024-01-29  9:01     ` Anshuman Khandual
2024-01-29 11:46     ` Alexandru Elisei
2024-01-29 11:46       ` Alexandru Elisei
2024-01-30  4:34       ` Anshuman Khandual
2024-01-30  4:34         ` Anshuman Khandual
2024-01-30 11:57         ` Alexandru Elisei
2024-01-30 11:57           ` Alexandru Elisei
2024-01-31  3:27           ` Anshuman Khandual
2024-01-31  3:27             ` Anshuman Khandual
2024-01-25 16:42 ` [PATCH RFC v3 05/35] mm: cma: Don't append newline when generating CMA area name Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  9:13   ` Anshuman Khandual
2024-01-29  9:13     ` Anshuman Khandual
2024-01-29 11:46     ` Alexandru Elisei
2024-01-29 11:46       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 06/35] mm: cma: Make CMA_ALLOC_SUCCESS/FAIL count the number of pages Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  9:24   ` Anshuman Khandual
2024-01-29  9:24     ` Anshuman Khandual
2024-01-29 11:51     ` Alexandru Elisei
2024-01-29 11:51       ` Alexandru Elisei
2024-01-30  4:52       ` Anshuman Khandual
2024-01-30  4:52         ` Anshuman Khandual
2024-01-30 11:58         ` Alexandru Elisei
2024-01-30 11:58           ` Alexandru Elisei
2024-01-31  4:40           ` Anshuman Khandual
2024-01-31  4:40             ` Anshuman Khandual
2024-01-31 13:27             ` Alexandru Elisei
2024-01-31 13:27               ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 07/35] mm: cma: Add CMA_RELEASE_{SUCCESS,FAIL} events Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-29  9:31   ` Anshuman Khandual
2024-01-29  9:31     ` Anshuman Khandual
2024-01-29 11:53     ` Alexandru Elisei
2024-01-29 11:53       ` Alexandru Elisei
2024-01-31  5:59       ` Anshuman Khandual
2024-01-31  5:59         ` Anshuman Khandual
2024-01-25 16:42 ` [PATCH RFC v3 08/35] mm: cma: Introduce cma_alloc_range() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-30  5:20   ` Anshuman Khandual
2024-01-30  5:20     ` Anshuman Khandual
2024-01-30 11:35     ` Alexandru Elisei
2024-01-30 11:35       ` Alexandru Elisei
2024-01-31  6:24       ` Anshuman Khandual
2024-01-31  6:24         ` Anshuman Khandual
2024-01-31 14:18         ` Alexandru Elisei
2024-01-31 14:18           ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 09/35] mm: cma: Introduce cma_remove_mem() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-30  5:50   ` Anshuman Khandual
2024-01-30  5:50     ` Anshuman Khandual
2024-01-30 11:33     ` Alexandru Elisei
2024-01-30 11:33       ` Alexandru Elisei
2024-01-31 13:19       ` Anshuman Khandual
2024-01-31 13:19         ` Anshuman Khandual
2024-01-31 13:48         ` Alexandru Elisei
2024-01-31 13:48           ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 10/35] mm: cma: Fast track allocating memory when the pages are free Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-30  9:18   ` Anshuman Khandual
2024-01-30  9:18     ` Anshuman Khandual
2024-01-30 11:34     ` Alexandru Elisei
2024-01-30 11:34       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 11/35] mm: Allow an arch to hook into folio allocation when VMA is known Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-26 20:00   ` Peter Collingbourne
2024-01-26 20:00     ` Peter Collingbourne
2024-01-29 11:59     ` Alexandru Elisei
2024-01-29 11:59       ` Alexandru Elisei
2024-01-30  9:55   ` Anshuman Khandual
2024-01-30  9:55     ` Anshuman Khandual
2024-01-30 11:34     ` Alexandru Elisei
2024-01-30 11:34       ` Alexandru Elisei
2024-01-31  6:53       ` Anshuman Khandual
2024-01-31  6:53         ` Anshuman Khandual
2024-01-31 12:22         ` Alexandru Elisei
2024-01-31 12:22           ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 12/35] mm: Call arch_swap_prepare_to_restore() before arch_swap_restore() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-02-01  3:30   ` Anshuman Khandual
2024-02-01  3:30     ` Anshuman Khandual
2024-02-01 17:32     ` Alexandru Elisei
2024-02-01 17:32       ` Alexandru Elisei
2024-01-25 16:42 ` Alexandru Elisei [this message]
2024-01-25 16:42   ` [PATCH RFC v3 13/35] mm: memory: Introduce fault-on-access mechanism for pages Alexandru Elisei
2024-02-01  5:52   ` Anshuman Khandual
2024-02-01  5:52     ` Anshuman Khandual
2024-02-01 17:36     ` Alexandru Elisei
2024-02-01 17:36       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 14/35] of: fdt: Return the region size in of_flat_dt_translate_address() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 15/35] of: fdt: Add of_flat_read_u32() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 16/35] KVM: arm64: Don't deny VM_PFNMAP VMAs when kvm_has_mte() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 17/35] arm64: mte: Rework naming for tag manipulation functions Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 18/35] arm64: mte: Rename __GFP_ZEROTAGS to __GFP_TAGGED Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 19/35] arm64: mte: Discover tag storage memory Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-26  8:50   ` Krzysztof Kozlowski
2024-01-26  8:50     ` Krzysztof Kozlowski
2024-01-26 17:01     ` Alexandru Elisei
2024-01-26 17:01       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 20/35] arm64: mte: Add tag storage memory to CMA Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 21/35] arm64: mte: Disable dynamic tag storage management if HW KASAN is enabled Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 22/35] arm64: mte: Enable tag storage if CMA areas have been activated Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-02-02 22:30   ` Evgenii Stepanov
2024-02-02 22:30     ` Evgenii Stepanov
2024-02-05 16:30     ` Alexandru Elisei
2024-02-05 16:30       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 23/35] arm64: mte: Try to reserve tag storage in arch_alloc_page() Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-30  0:04   ` Peter Collingbourne
2024-01-30  0:04     ` Peter Collingbourne
2024-01-30 11:38     ` Alexandru Elisei
2024-01-30 11:38       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 24/35] arm64: mte: Perform CMOs for tag blocks Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 25/35] arm64: mte: Reserve tag block for the zero page Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 26/35] arm64: mte: Use fault-on-access to reserve missing tag storage Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 27/35] arm64: mte: Handle tag storage pages mapped in an MTE VMA Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 28/35] arm64: mte: swap: Handle tag restoring when missing tag storage Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-02-02  4:02   ` Peter Collingbourne
2024-02-02  4:02     ` Peter Collingbourne
2024-02-02 14:56     ` Alexandru Elisei
2024-02-02 14:56       ` Alexandru Elisei
2024-02-03  1:32       ` Evgenii Stepanov
2024-02-03  1:32         ` Evgenii Stepanov
2024-02-03  1:52       ` Peter Collingbourne
2024-02-03  1:52         ` Peter Collingbourne
2024-01-25 16:42 ` [PATCH RFC v3 29/35] arm64: mte: copypage: " Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 30/35] arm64: mte: ptrace: Handle pages with " Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-02-01  9:21   ` Anshuman Khandual
2024-02-01  9:21     ` Anshuman Khandual
2024-02-01 17:38     ` Alexandru Elisei
2024-02-01 17:38       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 31/35] khugepaged: arm64: Don't collapse MTE enabled VMAs Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-02-01  8:12   ` Anshuman Khandual
2024-02-01  8:12     ` Anshuman Khandual
2024-02-01 17:38     ` Alexandru Elisei
2024-02-01 17:38       ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 32/35] KVM: arm64: mte: Reserve tag storage for virtual machines with MTE Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 33/35] KVM: arm64: mte: Introduce VM_MTE_KVM VMA flag Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 34/35] arm64: mte: Enable dynamic tag storage management Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 16:42 ` [PATCH RFC v3 35/35] HACK! arm64: dts: Add fake tag storage to fvp-base-revc.dts Alexandru Elisei
2024-01-25 16:42   ` Alexandru Elisei
2024-01-25 17:01 ` [PATCH RFC v3 00/35] Add support for arm64 MTE dynamic tag storage reuse Steven Rostedt
2024-01-25 17:01   ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240125164256.4147-14-alexandru.elisei@arm.com \
    --to=alexandru.elisei@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=arnd@arndb.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=david@redhat.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=eugenis@google.com \
    --cc=hughd@google.com \
    --cc=hyesoo.yu@samsung.com \
    --cc=james.morse@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=kcc@google.com \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mhiramat@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oliver.upton@linux.dev \
    --cc=pcc@google.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=steven.price@arm.com \
    --cc=suzuki.poulose@arm.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vincenzo.frascino@arm.com \
    --cc=vschneid@redhat.com \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.