linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kvm-ppc@vger.kernel.org
Cc: npiggin@gmail.com, paulus@ozlabs.org, leonardo@linux.ibm.com,
	kirill@shutemov.name,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Ram Pai <linuxram@linux.ibm.com>
Subject: [PATCH v2 01/22] powerpc/pkeys: Avoid using lockless page table walk
Date: Thu, 19 Mar 2020 09:25:48 +0530	[thread overview]
Message-ID: <20200319035609.158654-2-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20200319035609.158654-1-aneesh.kumar@linux.ibm.com>

Fetch pkey from vma instead of linux page table. Also document the fact that in
some cases the pkey returned in siginfo won't be the same as the one we took
keyfault on. Even with linux page table walk, we can end up in a similar scenario.

Cc: Ram Pai <linuxram@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/mmu.h        |  9 ---
 arch/powerpc/mm/book3s64/hash_utils.c | 24 --------
 arch/powerpc/mm/fault.c               | 83 +++++++++++++++++++--------
 3 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 0699cfeeb8c9..cf2a08bfd5cd 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -291,15 +291,6 @@ static inline bool early_radix_enabled(void)
 }
 #endif
 
-#ifdef CONFIG_PPC_MEM_KEYS
-extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
-#else
-static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static inline bool strict_kernel_rwx_enabled(void)
 {
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 523d4d39d11e..8530ddbba56f 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1670,30 +1670,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	hash_preload(vma->vm_mm, address, is_exec, trap);
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
-/*
- * Return the protection key associated with the given address and the
- * mm_struct.
- */
-u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
-{
-	pte_t *ptep;
-	u16 pkey = 0;
-	unsigned long flags;
-
-	if (!mm || !mm->pgd)
-		return 0;
-
-	local_irq_save(flags);
-	ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
-	if (ptep)
-		pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
-	local_irq_restore(flags);
-
-	return pkey;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 static inline void tm_flush_hash_page(int local)
 {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8db0507619e2..ab99ffa7d946 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -118,9 +118,34 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 	return __bad_area(regs, address, SEGV_MAPERR);
 }
 
-static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
-				    int pkey)
+#ifdef CONFIG_PPC_MEM_KEYS
+static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+				    struct vm_area_struct *vma)
 {
+	struct mm_struct *mm = current->mm;
+	int pkey;
+
+	/*
+	 * We don't try to fetch the pkey from page table because reading
+	 * page table without locking doesn't guarantee stable pte value.
+	 * Hence the pkey value that we return to userspace can be different
+	 * from the pkey that actually caused access error.
+	 *
+	 * It does *not* guarantee that the VMA we find here
+	 * was the one that we faulted on.
+	 *
+	 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+	 * 2. T1   : set AMR to deny access to pkey=4, touches, page
+	 * 3. T1   : faults...
+	 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+	 * 5. T1   : enters fault handler, takes mmap_sem, etc...
+	 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+	 *	     faulted on a pte with its pkey=4.
+	 */
+	pkey = vma_pkey(vma);
+
+	up_read(&mm->mmap_sem);
+
 	/*
 	 * If we are in kernel mode, bail out with a SEGV, this will
 	 * be caught by the assembly which will restore the non-volatile
@@ -133,6 +158,7 @@ static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
 
 	return 0;
 }
+#endif
 
 static noinline int bad_access(struct pt_regs *regs, unsigned long address)
 {
@@ -289,8 +315,31 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
 	return false;
 }
 
-static bool access_error(bool is_write, bool is_exec,
-			 struct vm_area_struct *vma)
+#ifdef CONFIG_PPC_MEM_KEYS
+static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
+			      struct vm_area_struct *vma)
+{
+	/*
+	 * Read or write was blocked by protection keys.  This is
+	 * always an unconditional error and can never result in
+	 * a follow-up action to resolve the fault, like a COW.
+	 */
+	if (is_pkey)
+		return true;
+
+	/*
+	 * Make sure to check the VMA so that we do not perform
+	 * faults just to hit a pkey fault as soon as we fill in a
+	 * page. Only called for current mm, hence foreign == 0
+	 */
+	if (!arch_vma_access_permitted(vma, is_write, is_exec, 0))
+		return true;
+
+	return false;
+}
+#endif
+
+static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
 {
 	/*
 	 * Allow execution from readable areas if the MMU does not
@@ -483,10 +532,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
-	if (error_code & DSISR_KEYFAULT)
-		return bad_key_fault_exception(regs, address,
-					       get_mm_addr_key(mm, address));
-
 	/*
 	 * We want to do this outside mmap_sem, because reading code around nip
 	 * can result in fault, which will cause a deadlock when called with
@@ -555,6 +600,13 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 		return bad_area(regs, address);
 
 good_area:
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	if (unlikely(access_pkey_error(is_write, is_exec,
+				       (error_code & DSISR_KEYFAULT), vma)))
+		return bad_access_pkey(regs, address, vma);
+#endif /* CONFIG_PPC_MEM_KEYS */
+
 	if (unlikely(access_error(is_write, is_exec, vma)))
 		return bad_access(regs, address);
 
@@ -565,21 +617,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	 */
 	fault = handle_mm_fault(vma, address, flags);
 
-#ifdef CONFIG_PPC_MEM_KEYS
-	/*
-	 * we skipped checking for access error due to key earlier.
-	 * Check that using handle_mm_fault error return.
-	 */
-	if (unlikely(fault & VM_FAULT_SIGSEGV) &&
-		!arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
-
-		int pkey = vma_pkey(vma);
-
-		up_read(&mm->mmap_sem);
-		return bad_key_fault_exception(regs, address, pkey);
-	}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
 	major |= fault & VM_FAULT_MAJOR;
 
 	/*
-- 
2.24.1



  reply	other threads:[~2020-03-19  3:56 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-19  3:55 [PATCH v2 00/22] Avoid IPI while updating page table entries Aneesh Kumar K.V
2020-03-19  3:55 ` Aneesh Kumar K.V [this message]
2020-04-03  0:28   ` [PATCH v2 01/22] powerpc/pkeys: Avoid using lockless page table walk Ram Pai
2020-04-05 13:37     ` Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 02/22] powerpc/pkeys: Check vma before returning key fault error to the user Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 03/22] powerpc/mm/hash64: use _PAGE_PTE when checking for pte_present Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 04/22] powerpc/hash64: Restrict page table lookup using init_mm with __flush_hash_table_range Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 05/22] powerpc/book3s64/hash: Use the pte_t address from the caller Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 06/22] powerpc/mce: Don't reload pte val in addr_to_pfn Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 07/22] powerpc/perf/callchain: Use __get_user_pages_fast in read_user_stack_slow Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 08/22] powerpc/kvm/book3s: switch from raw_spin_*lock to arch_spin_lock Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 09/22] powerpc/kvm/book3s: Add helper to walk partition scoped linux page table Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 10/22] powerpc/kvm/nested: Add helper to walk nested shadow " Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 11/22] powerpc/kvm/book3s: Use kvm helpers to walk shadow or secondary table Aneesh Kumar K.V
2020-03-19  3:55 ` [PATCH v2 12/22] powerpc/kvm/book3s: Add helper for host page table walk Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 13/22] powerpc/kvm/book3s: Use find_kvm_host_pte in page fault handler Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 14/22] powerpc/kvm/book3s: Use find_kvm_host_pte in h_enter Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 17/22] powerpc/kvm/book3s: use find_kvm_host_pte in kvmppc_book3s_instantiate_page Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 18/22] powerpc/kvm/book3s: Use find_kvm_host_pte in kvmppc_get_hpa Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 19/22] powerpc/kvm/book3s: Use pte_present instead of opencoding _PAGE_PRESENT check Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 20/22] powerpc/mm/book3s64: Avoid sending IPI on clearing PMD Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 21/22] mm: change pmdp_huge_get_and_clear_full take vm_area_struct as arg Aneesh Kumar K.V
2020-03-19  3:56 ` [PATCH v2 22/22] powerpc/mm/book3s64: Fix MADV_DONTNEED and parallel page fault race Aneesh Kumar K.V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200319035609.158654-2-aneesh.kumar@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=kirill@shutemov.name \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=leonardo@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=linuxram@linux.ibm.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=paulus@ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).