[v3,51/75] x86/sev-es: Handle MMIO events
diff mbox series

Message ID 20200428151725.31091-52-joro@8bytes.org
State New
Headers show
Series
  • x86: SEV-ES Guest Support
Related show

Commit Message

Joerg Roedel April 28, 2020, 3:17 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

Add handler for VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/include/uapi/asm/svm.h |   5 +
 arch/x86/kernel/sev-es.c        | 188 ++++++++++++++++++++++++++++++++
 2 files changed, 193 insertions(+)

Comments

Sean Christopherson May 20, 2020, 6:32 a.m. UTC | #1
On Tue, Apr 28, 2020 at 05:17:01PM +0200, Joerg Roedel wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
> 
> Add handler for VC exceptions caused by MMIO intercepts. These
> intercepts come along as nested page faults on pages with reserved
> bits set.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> [ jroedel@suse.de: Adapt to VC handling framework ]
> Co-developed-by: Joerg Roedel <jroedel@suse.de>
> Signed-off-by: Joerg Roedel <jroedel@suse.de>
> ---

...

> diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
> index f4ce3b475464..e3662723ed76 100644
> --- a/arch/x86/kernel/sev-es.c
> +++ b/arch/x86/kernel/sev-es.c
> @@ -294,6 +294,25 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
>  	return ES_EXCEPTION;
>  }
>  
> +static phys_addr_t vc_slow_virt_to_phys(struct ghcb *ghcb, unsigned long vaddr)
> +{
> +	unsigned long va = (unsigned long)vaddr;
> +	unsigned int level;
> +	phys_addr_t pa;
> +	pgd_t *pgd;
> +	pte_t *pte;
> +
> +	pgd = pgd_offset(current->active_mm, va);
> +	pte = lookup_address_in_pgd(pgd, va, &level);
> +	if (!pte)
> +		return 0;

'0' is a valid physical address.  It happens to be reserved in the kernel
thanks to L1TF, but using '0' as an error code is ugly.  Not to mention
none of the callers actually check the result.

> +
> +	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
> +	pa |= va & ~page_level_mask(level);
> +
> +	return pa;
> +}
Borislav Petkov May 25, 2020, 8:02 a.m. UTC | #2
On Tue, Apr 28, 2020 at 05:17:01PM +0200, Joerg Roedel wrote:
> +static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
> +				 unsigned int bytes, bool read)
> +{
> +	u64 exit_code, exit_info_1, exit_info_2;
> +	unsigned long ghcb_pa = __pa(ghcb);
> +	void __user *ref;
> +
> +	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
> +	if (ref == (void __user *)-1L)
> +		return ES_UNSUPPORTED;
> +
> +	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
> +
> +	exit_info_1 = vc_slow_virt_to_phys(ghcb, (unsigned long)ref);
> +	exit_info_2 = bytes;    /* Can never be greater than 8 */

No trailing comments pls - put them over the line.

> +	ghcb->save.sw_scratch = ghcb_pa + offsetof(struct ghcb, shared_buffer);
> +
> +	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
> +}
> +
> +static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
> +						 struct es_em_ctxt *ctxt)
> +{
> +	struct insn *insn = &ctxt->insn;
> +	unsigned int bytes = 0;
> +	enum es_result ret;
> +	int sign_byte;
> +	long *reg_data;
> +
> +	switch (insn->opcode.bytes[1]) {
> +		/* MMIO Read w/ zero-extension */
> +	case 0xb6:
> +		bytes = 1;
> +		/* Fallthrough */

I'm guessing we're supposed to annotate it this way now:

WARNING: Prefer 'fallthrough;' over fallthrough comment
#139: FILE: arch/x86/kernel/sev-es.c:504:
+               /* Fallthrough */


> +	case 0xb7:
> +		if (!bytes)
> +			bytes = 2;
> +
> +		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
> +		if (ret)
> +			break;
> +
> +		/* Zero extend based on operand size */
> +		reg_data = vc_insn_get_reg(ctxt);

That function can return NULL - you need to test reg_data. Ditto for all
its invocations.
Joerg Roedel June 11, 2020, 12:40 p.m. UTC | #3
On Tue, May 19, 2020 at 11:32:02PM -0700, Sean Christopherson wrote:
> '0' is a valid physical address.  It happens to be reserved in the kernel
> thanks to L1TF, but using '0' as an error code is ugly.  Not to mention
> none of the callers actually check the result.

Right, I changed the function to better handle error cases and added
checks to the call-sites. It looks like below now:

static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
                                 unsigned long vaddr, phys_addr_t *paddr)
{
        unsigned long va = (unsigned long)vaddr;
        unsigned int level;
        phys_addr_t pa;
        pgd_t *pgd;
        pte_t *pte;

        pgd = pgd_offset(current->active_mm, va);
        pte = lookup_address_in_pgd(pgd, va, &level);
        if (!pte) {
                ctxt->fi.vector     = X86_TRAP_PF;
                ctxt->fi.cr2        = vaddr;
                ctxt->fi.error_code = 0;

                if (user_mode(ctxt->regs))
                        ctxt->fi.error_code |= X86_PF_USER;

                return false;
        }

        pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
        pa |= va & ~page_level_mask(level);

        *paddr = pa;

        return true;
}

Patch
diff mbox series

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index c68d1618c9b0..8f36ae021a7f 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -81,6 +81,11 @@ 
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
 
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ			0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE			0x80000002
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT		0x8000ffff
+
 #define SVM_EXIT_ERR           -1
 
 #define SVM_EXIT_REASONS \
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index f4ce3b475464..e3662723ed76 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -294,6 +294,25 @@  static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 	return ES_EXCEPTION;
 }
 
+static phys_addr_t vc_slow_virt_to_phys(struct ghcb *ghcb, unsigned long vaddr)
+{
+	unsigned long va = (unsigned long)vaddr;
+	unsigned int level;
+	phys_addr_t pa;
+	pgd_t *pgd;
+	pte_t *pte;
+
+	pgd = pgd_offset(current->active_mm, va);
+	pte = lookup_address_in_pgd(pgd, va, &level);
+	if (!pte)
+		return 0;
+
+	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+	pa |= va & ~page_level_mask(level);
+
+	return pa;
+}
+
 /* Include code shared with pre-decompression boot stage */
 #include "sev-es-shared.c"
 
@@ -432,6 +451,172 @@  static void __init vc_early_vc_forward_exception(struct es_em_ctxt *ctxt)
 	do_early_exception(ctxt->regs, trapnr);
 }
 
+static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
+{
+	long *reg_array;
+	int offset;
+
+	reg_array = (long *)ctxt->regs;
+	offset    = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
+
+	if (offset < 0)
+		return NULL;
+
+	offset /= sizeof(long);
+
+	return reg_array + offset;
+}
+
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+				 unsigned int bytes, bool read)
+{
+	u64 exit_code, exit_info_1, exit_info_2;
+	unsigned long ghcb_pa = __pa(ghcb);
+	void __user *ref;
+
+	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
+	if (ref == (void __user *)-1L)
+		return ES_UNSUPPORTED;
+
+	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
+
+	exit_info_1 = vc_slow_virt_to_phys(ghcb, (unsigned long)ref);
+	exit_info_2 = bytes;    /* Can never be greater than 8 */
+
+	ghcb->save.sw_scratch = ghcb_pa + offsetof(struct ghcb, shared_buffer);
+
+	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
+}
+
+static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
+						 struct es_em_ctxt *ctxt)
+{
+	struct insn *insn = &ctxt->insn;
+	unsigned int bytes = 0;
+	enum es_result ret;
+	int sign_byte;
+	long *reg_data;
+
+	switch (insn->opcode.bytes[1]) {
+		/* MMIO Read w/ zero-extension */
+	case 0xb6:
+		bytes = 1;
+		/* Fallthrough */
+	case 0xb7:
+		if (!bytes)
+			bytes = 2;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		/* Zero extend based on operand size */
+		reg_data = vc_insn_get_reg(ctxt);
+		memset(reg_data, 0, insn->opnd_bytes);
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+		/* MMIO Read w/ sign-extension */
+	case 0xbe:
+		bytes = 1;
+		/* Fallthrough */
+	case 0xbf:
+		if (!bytes)
+			bytes = 2;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		/* Sign extend based on operand size */
+		reg_data = vc_insn_get_reg(ctxt);
+		if (bytes == 1) {
+			u8 *val = (u8 *)ghcb->shared_buffer;
+
+			sign_byte = (*val & 0x80) ? 0xff : 0x00;
+		} else {
+			u16 *val = (u16 *)ghcb->shared_buffer;
+
+			sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+		}
+		memset(reg_data, sign_byte, insn->opnd_bytes);
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+	default:
+		ret = ES_UNSUPPORTED;
+	}
+
+	return ret;
+}
+
+static enum es_result vc_handle_mmio(struct ghcb *ghcb,
+				     struct es_em_ctxt *ctxt)
+{
+	struct insn *insn = &ctxt->insn;
+	unsigned int bytes = 0;
+	enum es_result ret;
+	long *reg_data;
+
+	switch (insn->opcode.bytes[0]) {
+	/* MMIO Write */
+	case 0x88:
+		bytes = 1;
+		/* Fallthrough */
+	case 0x89:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		reg_data = vc_insn_get_reg(ctxt);
+		memcpy(ghcb->shared_buffer, reg_data, bytes);
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+		break;
+
+	case 0xc6:
+		bytes = 1;
+		/* Fallthrough */
+	case 0xc7:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+		break;
+
+		/* MMIO Read */
+	case 0x8a:
+		bytes = 1;
+		/* Fallthrough */
+	case 0x8b:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		reg_data = vc_insn_get_reg(ctxt);
+		if (bytes == 4)
+			*reg_data = 0;  /* Zero-extend for 32-bit operation */
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+		/* Two-Byte Opcodes */
+	case 0x0f:
+		ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+		break;
+	default:
+		ret = ES_UNSUPPORTED;
+	}
+
+	return ret;
+}
+
 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
 					 struct ghcb *ghcb,
 					 unsigned long exit_code)
@@ -445,6 +630,9 @@  static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
 	case SVM_EXIT_IOIO:
 		result = vc_handle_ioio(ghcb, ctxt);
 		break;
+	case SVM_EXIT_NPF:
+		result = vc_handle_mmio(ghcb, ctxt);
+		break;
 	default:
 		/*
 		 * Unexpected #VC exception