kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Brijesh Singh <brijesh.singh@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: tglx@linutronix.de, bp@alien8.de, jroedel@suse.de,
	thomas.lendacky@amd.com, pbonzini@redhat.com, mingo@redhat.com,
	dave.hansen@intel.com, rientjes@google.com, seanjc@google.com,
	peterz@infradead.org, hpa@zytor.com, tony.luck@intel.com,
	Brijesh Singh <brijesh.singh@amd.com>
Subject: [PATCH Part2 RFC v2 10/37] x86/fault: Add support to handle the RMP fault for kernel address
Date: Fri, 30 Apr 2021 07:37:55 -0500	[thread overview]
Message-ID: <20210430123822.13825-11-brijesh.singh@amd.com> (raw)
In-Reply-To: <20210430123822.13825-1-brijesh.singh@amd.com>

When SEV-SNP is enabled globally, a write from the host goes through the
RMP check. When the host writes to pages, hardware checks the following
conditions at the end of page walk:

1. Assigned bit in the RMP table is zero (i.e page is shared).
2. If the page table entry that gives the sPA indicates that the target
   page size is a large page, then all RMP entries for the 4KB
   constituting pages of the target must have the assigned bit 0.
3. Immutable bit in the RMP table is not zero.

The hardware will raise page fault if one of the above conditions is not
met. A host should not encounter the RMP fault in normal execution, but
a malicious guest could trick the hypervisor into it. e.g., a guest does
not make the GHCB page shared, on #VMGEXIT, the hypervisor will attempt
to write to GHCB page.

Try resolving the fault instead of crashing the host. To resolve it,
forcefully clear the assigned bit from the RMP entry to make the page
shared so that the write succeeds.  If the fault handler cannot resolve
the RMP violation, then dump the RMP entry for debugging purposes.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/mm/fault.c | 146 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 39d22f6870e1..d833fe84010f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -19,6 +19,7 @@
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_crash_gracefully_on_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/sev.h>			/* snp_lookup_page_in_rmptable() */
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -1132,6 +1133,145 @@ bool fault_in_kernel_space(unsigned long address)
 	return address >= TASK_SIZE_MAX;
 }
 
+#define RMP_FAULT_RETRY		0
+#define RMP_FAULT_KILL		1
+#define RMP_FAULT_PAGE_SPLIT	2
+
+static inline size_t pages_per_hpage(int level)
+{
+	return page_level_size(level) / PAGE_SIZE;
+}
+
+static void dump_rmpentry(unsigned long pfn)
+{
+	struct rmpentry *e;
+	int level;
+
+	e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &level);
+
+	/*
+	 * If the RMP entry at the faulting address was not assigned, then dump may not
+	 * provide any useful debug information. Iterate through the entire 2MB region,
+	 * and dump the RMP entries if one of the bit in the RMP entry is set.
+	 */
+	if (rmpentry_assigned(e)) {
+		pr_alert("RMPEntry paddr 0x%lx [assigned=%d immutable=%d pagesize=%d gpa=0x%lx"
+			" asid=%d vmsa=%d validated=%d]\n", pfn << PAGE_SHIFT,
+			rmpentry_assigned(e), rmpentry_immutable(e), rmpentry_pagesize(e),
+			rmpentry_gpa(e), rmpentry_asid(e), rmpentry_vmsa(e),
+			rmpentry_validated(e));
+
+		pr_alert("RMPEntry paddr 0x%lx %016llx %016llx\n", pfn << PAGE_SHIFT,
+			e->high, e->low);
+	} else {
+		unsigned long pfn_end;
+
+		pfn = pfn & ~0x1ff;
+		pfn_end = pfn + PTRS_PER_PMD;
+
+		while (pfn < pfn_end) {
+			e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &level);
+
+			if (unlikely(!e))
+				return;
+
+			if (e->low || e->high)
+				pr_alert("RMPEntry paddr 0x%lx: %016llx %016llx\n",
+					pfn << PAGE_SHIFT, e->high, e->low);
+			pfn++;
+		}
+	}
+}
+
+/*
+ * Called for all faults where 'address' is part of the kernel address space.
+ * The function returns RMP_FAULT_RETRY when its able to resolve the fault and
+ * its safe to retry.
+ */
+static int handle_kern_rmp_fault(unsigned long hw_error_code, unsigned long address)
+{
+	int ret, level, rmp_level, mask;
+	struct rmpupdate val = {};
+	struct rmpentry *e;
+	unsigned long pfn;
+	pgd_t *pgd;
+	pte_t *pte;
+
+	if (unlikely(!cpu_feature_enabled(X86_FEATURE_SEV_SNP)))
+		return RMP_FAULT_KILL;
+
+	pgd = __va(read_cr3_pa());
+	pgd += pgd_index(address);
+
+	pte = lookup_address_in_pgd(pgd, address, &level);
+
+	if (unlikely(!pte))
+		return RMP_FAULT_KILL;
+
+	switch(level) {
+	case PG_LEVEL_4K: pfn = pte_pfn(*pte); break;
+	case PG_LEVEL_2M: pfn = pmd_pfn(*(pmd_t *)pte); break;
+	case PG_LEVEL_1G: pfn = pud_pfn(*(pud_t *)pte); break;
+	case PG_LEVEL_512G: pfn = p4d_pfn(*(p4d_t *)pte); break;
+	default: return RMP_FAULT_KILL;
+	}
+
+	/* Calculate the PFN within large page. */
+	if (level > PG_LEVEL_4K) {
+		mask = pages_per_hpage(level) - pages_per_hpage(level - 1);
+		pfn |= (address >> PAGE_SHIFT) & mask;
+	}
+
+	e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
+	if (unlikely(!e))
+		return RMP_FAULT_KILL;
+
+	/*
+	 * If the immutable bit is set, we cannot convert the page to shared
+	 * to resolve the fault.
+	 */
+	if (rmpentry_immutable(e))
+		goto e_dump_rmpentry;
+
+	/*
+	 * If the host page level is greather than RMP page level then only way to
+	 * resolve the fault is to split the address. We don't support splitting
+	 * kernel address in the fault path yet.
+	 */
+	if (level > rmp_level)
+		goto e_dump_rmpentry;
+
+	/*
+	 * If the RMP page level is higher than host page level then use the PSMASH
+	 * to split the RMP large entry into 512 4K entries.
+	 */
+	if (rmp_level > level) {
+		ret = psmash(pfn_to_page(pfn & ~0x1FF));
+		if (ret) {
+			pr_alert("Failed to psmash pfn 0x%lx (rc %d)\n", pfn, ret);
+			goto e_dump_rmpentry;
+		}
+	}
+
+	/* Log that the RMP fault handler is clearing the assigned bit. */
+	if (rmpentry_assigned(e))
+		pr_alert("Force address %lx from assigned -> unassigned in RMP table\n", address);
+
+	/* Clear the assigned bit from the RMP table. */
+	ret = rmpupdate(pfn_to_page(pfn), &val);
+	if (ret) {
+		pr_alert("Failed to unassign address 0x%lx in RMP table\n", address);
+		goto e_dump_rmpentry;
+	}
+
+	return RMP_FAULT_RETRY;
+
+e_dump_rmpentry:
+
+	dump_rmpentry(pfn);
+	return RMP_FAULT_KILL;
+}
+
 /*
  * Called for all faults where 'address' is part of the kernel address
  * space.  Might get called for faults that originate from *code* that
@@ -1179,6 +1319,12 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 	}
 #endif
 
+	/* Try resolving the RMP fault. */
+	if (hw_error_code & X86_PF_RMP) {
+		if (handle_kern_rmp_fault(hw_error_code, address) == RMP_FAULT_RETRY)
+			return;
+	}
+
 	if (is_f00f_bug(regs, hw_error_code, address))
 		return;
 
-- 
2.17.1


  parent reply	other threads:[~2021-04-30 12:39 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-30 12:37 [PATCH Part2 RFC v2 00/37] Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 01/37] KVM: SVM: Add support to handle AP reset MSR protocol Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 02/37] KVM: SVM: Provide the Hypervisor Feature support VMGEXIT Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 03/37] KVM: SVM: Increase the GHCB protocol version Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 04/37] x86/cpufeatures: Add SEV-SNP CPU feature Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 05/37] x86/sev: Add the host SEV-SNP initialization support Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 06/37] x86/sev: Add RMP entry lookup helpers Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 07/37] x86/sev: Add helper functions for RMPUPDATE and PSMASH instruction Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 08/37] x86/sev: Split the physmap when adding the page in RMP table Brijesh Singh
2021-05-03 15:07   ` Peter Zijlstra
2021-05-03 15:15   ` Andy Lutomirski
2021-05-03 15:41     ` Dave Hansen
2021-05-07 11:28       ` Vlastimil Babka
2021-04-30 12:37 ` [PATCH Part2 RFC v2 09/37] x86/traps: Define RMP violation #PF error code Brijesh Singh
2021-04-30 12:37 ` Brijesh Singh [this message]
2021-05-03 14:44   ` [PATCH Part2 RFC v2 10/37] x86/fault: Add support to handle the RMP fault for kernel address Dave Hansen
2021-05-03 15:03     ` Andy Lutomirski
2021-05-03 15:49       ` Brijesh Singh
2021-05-03 15:37     ` Brijesh Singh
2021-05-03 16:15       ` Dave Hansen
2021-05-03 17:19         ` Brijesh Singh
2021-05-03 17:31           ` Brijesh Singh
2021-05-03 17:40           ` Andy Lutomirski
2021-05-03 19:41             ` Brijesh Singh
2021-05-03 19:43               ` Dave Hansen
2021-05-04 12:31                 ` Brijesh Singh
2021-05-04 14:33                   ` Dave Hansen
2021-05-04 15:16                     ` Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 11/37] x86/fault: Add support to handle the RMP fault for user address Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 12/37] crypto:ccp: Define the SEV-SNP commands Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 13/37] crypto: ccp: Add support to initialize the AMD-SP for SEV-SNP Brijesh Singh
2021-04-30 12:37 ` [PATCH Part2 RFC v2 14/37] crypto: ccp: Shutdown SNP firmware on kexec Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 15/37] crypto:ccp: Provide APIs to issue SEV-SNP commands Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 16/37] crypto: ccp: Handle the legacy TMR allocation when SNP is enabled Brijesh Singh
2021-05-10 18:23   ` Peter Gonda
2021-05-10 20:07     ` Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 17/37] crypto: ccp: Handle the legacy SEV command " Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 18/37] KVM: SVM: make AVIC backing, VMSA and VMCB memory allocation SNP safe Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 19/37] KVM: SVM: Add initial SEV-SNP support Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 20/37] KVM: SVM: define new SEV_FEATURES field in the VMCB Save State Area Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 21/37] KVM: SVM: Add KVM_SNP_INIT command Brijesh Singh
2021-05-06 20:25   ` Peter Gonda
2021-05-06 22:29     ` Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 22/37] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_START command Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 23/37] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_UPDATE command Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 24/37] KVM: SVM: Reclaim the guest pages when SEV-SNP VM terminates Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 25/37] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_FINISH command Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 26/37] KVM: X86: Add kvm_x86_ops to get the max page level for the TDP Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 27/37] KVM: X86: Introduce kvm_mmu_map_tdp_page() for use by SEV Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 28/37] KVM: X86: Introduce kvm_mmu_get_tdp_walk() for SEV-SNP use Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 29/37] KVM: X86: Define new RMP check related #NPF error bits Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 30/37] KVM: X86: update page-fault trace to log the 64-bit error code Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 31/37] KVM: SVM: Add support to handle GHCB GPA register VMGEXIT Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 32/37] KVM: SVM: Add support to handle MSR based Page State Change VMGEXIT Brijesh Singh
2021-05-10 17:30   ` Peter Gonda
2021-05-10 17:51     ` Brijesh Singh
2021-05-10 19:59       ` Peter Gonda
2021-05-10 20:50         ` Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 33/37] KVM: SVM: Add support to handle " Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 34/37] KVM: X86: Export the kvm_zap_gfn_range() for the SNP use Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 35/37] KVM: SVM: Add support to handle the RMP nested page fault Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 36/37] KVM: SVM: Provide support for SNP_GUEST_REQUEST NAE event Brijesh Singh
2021-05-10 18:57   ` Peter Gonda
2021-05-10 20:14     ` Brijesh Singh
2021-05-10 21:17       ` Sean Christopherson
2021-05-11 18:34         ` Brijesh Singh
2021-04-30 12:38 ` [PATCH Part2 RFC v2 37/37] KVM: SVM: Advertise the SEV-SNP feature support Brijesh Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210430123822.13825-11-brijesh.singh@amd.com \
    --to=brijesh.singh@amd.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@intel.com \
    --cc=hpa@zytor.com \
    --cc=jroedel@suse.de \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).