All of lore.kernel.org
 help / color / mirror / Atom feed
From: Atish Patra <atishp@rivosinc.com>
To: linux-kernel@vger.kernel.org
Cc: "Atish Patra" <atishp@rivosinc.com>,
	"Alexandre Ghiti" <alex@ghiti.fr>,
	"Andrew Jones" <ajones@ventanamicro.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Anup Patel" <anup@brainfault.org>,
	"Atish Patra" <atishp@atishpatra.org>,
	"Björn Töpel" <bjorn@rivosinc.com>,
	"Suzuki K Poulose" <suzuki.poulose@arm.com>,
	"Will Deacon" <will@kernel.org>, "Marc Zyngier" <maz@kernel.org>,
	"Sean Christopherson" <seanjc@google.com>,
	linux-coco@lists.linux.dev, "Dylan Reid" <dylan@rivosinc.com>,
	abrestic@rivosinc.com, "Samuel Ortiz" <sameo@rivosinc.com>,
	"Christoph Hellwig" <hch@infradead.org>,
	"Conor Dooley" <conor.dooley@microchip.com>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Guo Ren" <guoren@kernel.org>, "Heiko Stuebner" <heiko@sntech.de>,
	"Jiri Slaby" <jirislaby@kernel.org>,
	kvm-riscv@lists.infradead.org, kvm@vger.kernel.org,
	linux-mm@kvack.org, linux-riscv@lists.infradead.org,
	"Mayuresh Chitale" <mchitale@ventanamicro.com>,
	"Palmer Dabbelt" <palmer@dabbelt.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Paul Walmsley" <paul.walmsley@sifive.com>,
	"Rajnesh Kanwal" <rkanwal@rivosinc.com>,
	"Uladzislau Rezki" <urezki@gmail.com>
Subject: [RFC 07/48] RISC-V: KVM: Add a barebone CoVE implementation
Date: Wed, 19 Apr 2023 15:16:35 -0700	[thread overview]
Message-ID: <20230419221716.3603068-8-atishp@rivosinc.com> (raw)
In-Reply-To: <20230419221716.3603068-1-atishp@rivosinc.com>

This patch just adds a barebone implementation of CoVE functionality
that exercises the COVH functions to create/manage pages for various
boot time operations such as page directory, page table management,
vcpu/vm state management etc.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 arch/riscv/include/asm/kvm_cove.h | 154 ++++++++++++
 arch/riscv/include/asm/kvm_host.h |   7 +
 arch/riscv/kvm/Makefile           |   2 +-
 arch/riscv/kvm/cove.c             | 401 ++++++++++++++++++++++++++++++
 arch/riscv/kvm/cove_sbi.c         |   2 -
 include/uapi/linux/kvm.h          |   6 +
 6 files changed, 569 insertions(+), 3 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_cove.h
 create mode 100644 arch/riscv/kvm/cove.c

diff --git a/arch/riscv/include/asm/kvm_cove.h b/arch/riscv/include/asm/kvm_cove.h
new file mode 100644
index 0000000..3bf1bcd
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_cove.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * COVE related header file.
+ *
+ * Copyright (c) 2023 RivosInc
+ *
+ * Authors:
+ *     Atish Patra <atishp@rivosinc.com>
+ */
+
+#ifndef __KVM_RISCV_COVE_H
+#define __KVM_RISCV_COVE_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/list.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+
+#define KVM_COVE_PAGE_SIZE_4K	(1UL << 12)
+#define KVM_COVE_PAGE_SIZE_2MB	(1UL << 21)
+#define KVM_COVE_PAGE_SIZE_1GB	(1UL << 30)
+#define KVM_COVE_PAGE_SIZE_512GB (1UL << 39)
+
+#define bytes_to_pages(n) ((n + PAGE_SIZE - 1) >> PAGE_SHIFT)
+
+/* Allocate 2MB(i.e. 512 pages) for the page table pool */
+#define KVM_COVE_PGTABLE_SIZE_MAX ((1UL << 10) * PAGE_SIZE)
+
+#define get_order_num_pages(n) (get_order(n << PAGE_SHIFT))
+
+/* Describe a confidential or shared memory region */
+struct kvm_riscv_cove_mem_region {
+	unsigned long hva;
+	unsigned long gpa;
+	unsigned long npages;
+};
+
+/* Page management structure for the host */
+struct kvm_riscv_cove_page {
+	struct list_head link;
+
+	/* Pointer to page allocated */
+	struct page *page;
+
+	/* number of pages allocated for page */
+	unsigned long npages;
+
+	/* Described the page type */
+	unsigned long ptype;
+
+	/* set if the page is mapped in guest physical address */
+	bool is_mapped;
+
+	/* The below two fileds are only valid if is_mapped is true */
+	/* host virtual address for the mapping */
+	unsigned long hva;
+	/* guest physical address for the mapping */
+	unsigned long gpa;
+};
+
+struct kvm_cove_tvm_vcpu_context {
+	struct kvm_vcpu *vcpu;
+	/* Pages storing each vcpu state of the TVM in TSM */
+	struct kvm_riscv_cove_page vcpu_state;
+};
+
+struct kvm_cove_tvm_context {
+	struct kvm *kvm;
+
+	/* TODO: This is not really a VMID as TSM returns the page owner ID instead of VMID */
+	unsigned long tvm_guest_id;
+
+	/* Pages where TVM page table is stored */
+	struct kvm_riscv_cove_page pgtable;
+
+	/* Pages storing the TVM state in TSM */
+	struct kvm_riscv_cove_page tvm_state;
+
+	/* Keep track of zero pages */
+	struct list_head zero_pages;
+
+	/* Pages where TVM image is measured & loaded */
+	struct list_head measured_pages;
+
+	/* keep track of shared pages */
+	struct list_head shared_pages;
+
+	/* keep track of pending reclaim confidential pages */
+	struct list_head reclaim_pending_pages;
+
+	struct kvm_riscv_cove_mem_region shared_region;
+	struct kvm_riscv_cove_mem_region confidential_region;
+
+	/* spinlock to protect the tvm fence sequence */
+	spinlock_t tvm_fence_lock;
+
+	/* Track TVM state */
+	bool finalized_done;
+};
+
+static inline bool is_cove_vm(struct kvm *kvm)
+{
+	return kvm->arch.vm_type == KVM_VM_TYPE_RISCV_COVE;
+}
+
+static inline bool is_cove_vcpu(struct kvm_vcpu *vcpu)
+{
+	return is_cove_vm(vcpu->kvm);
+}
+
+#ifdef CONFIG_RISCV_COVE_HOST
+
+bool kvm_riscv_cove_enabled(void);
+int kvm_riscv_cove_init(void);
+
+/* TVM related functions */
+void kvm_riscv_cove_vm_destroy(struct kvm *kvm);
+int kvm_riscv_cove_vm_init(struct kvm *kvm);
+
+/* TVM VCPU related functions */
+void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu);
+int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap);
+
+int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa, unsigned long size);
+int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva);
+#else
+static inline bool kvm_riscv_cove_enabled(void) {return false; };
+static inline int kvm_riscv_cove_init(void) { return -1; }
+static inline void kvm_riscv_cove_hardware_disable(void) {}
+static inline int kvm_riscv_cove_hardware_enable(void) {return 0; }
+
+/* TVM related functions */
+static inline void kvm_riscv_cove_vm_destroy(struct kvm *kvm) {}
+static inline int kvm_riscv_cove_vm_init(struct kvm *kvm) {return -1; }
+
+/* TVM VCPU related functions */
+static inline void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu) {}
+static inline int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu) {return -1; }
+static inline void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu) {}
+static inline void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) {}
+static inline int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa,
+					       unsigned long size) {return -1; }
+static inline int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu,
+					    gpa_t gpa, unsigned long hva) {return -1; }
+#endif /* CONFIG_RISCV_COVE_HOST */
+
+#endif /* __KVM_RISCV_COVE_H */
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 63c46af..ca2ebe3 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -88,6 +88,8 @@ struct kvm_vmid {
 };
 
 struct kvm_arch {
+	unsigned long vm_type;
+
 	/* G-stage vmid */
 	struct kvm_vmid vmid;
 
@@ -100,6 +102,9 @@ struct kvm_arch {
 
 	/* AIA Guest/VM context */
 	struct kvm_aia aia;
+
+	/* COVE guest/VM context */
+	struct kvm_cove_tvm_context *tvmc;
 };
 
 struct kvm_cpu_trap {
@@ -242,6 +247,8 @@ struct kvm_vcpu_arch {
 
 	/* Performance monitoring context */
 	struct kvm_pmu pmu_context;
+
+	struct kvm_cove_tvm_vcpu_context *tc;
 };
 
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 40dee04..8c91551 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -31,4 +31,4 @@ kvm-y += aia.o
 kvm-y += aia_device.o
 kvm-y += aia_aplic.o
 kvm-y += aia_imsic.o
-kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o
+kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o cove.o
diff --git a/arch/riscv/kvm/cove.c b/arch/riscv/kvm/cove.c
new file mode 100644
index 0000000..d001e36
--- /dev/null
+++ b/arch/riscv/kvm/cove.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * COVE related helper functions.
+ *
+ * Copyright (c) 2023 RivosInc
+ *
+ * Authors:
+ *     Atish Patra <atishp@rivosinc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_nacl.h>
+#include <asm/kvm_cove.h>
+#include <asm/kvm_cove_sbi.h>
+#include <asm/asm-offsets.h>
+
+static struct sbi_cove_tsm_info tinfo;
+struct sbi_cove_tvm_create_params params;
+
+/* We need a global lock as initiate fence can be invoked once per host */
+static DEFINE_SPINLOCK(cove_fence_lock);
+
+static bool riscv_cove_enabled;
+
+static void kvm_cove_local_fence(void *info)
+{
+	int rc;
+
+	rc = sbi_covh_tsm_local_fence();
+
+	if (rc)
+		kvm_err("local fence for TSM failed %d on cpu %d\n", rc, smp_processor_id());
+}
+
+static void cove_delete_page_list(struct kvm *kvm, struct list_head *tpages, bool unpin)
+{
+	struct kvm_riscv_cove_page *tpage, *temp;
+	int rc;
+
+	list_for_each_entry_safe(tpage, temp, tpages, link) {
+		rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tpage->page), tpage->npages);
+		if (rc)
+			kvm_err("Reclaiming page %llx failed\n", page_to_phys(tpage->page));
+		if (unpin)
+			unpin_user_pages_dirty_lock(&tpage->page, 1, true);
+		list_del(&tpage->link);
+		kfree(tpage);
+	}
+}
+
+static int kvm_riscv_cove_fence(void)
+{
+	int rc;
+
+	spin_lock(&cove_fence_lock);
+
+	rc = sbi_covh_tsm_initiate_fence();
+	if (rc) {
+		kvm_err("initiate fence for tsm failed %d\n", rc);
+		goto done;
+	}
+
+	/* initiate local fence on each online hart */
+	on_each_cpu(kvm_cove_local_fence, NULL, 1);
+done:
+	spin_unlock(&cove_fence_lock);
+	return rc;
+}
+
+static int cove_convert_pages(unsigned long phys_addr, unsigned long npages, bool fence)
+{
+	int rc;
+
+	if (!IS_ALIGNED(phys_addr, PAGE_SIZE))
+		return -EINVAL;
+
+	rc = sbi_covh_tsm_convert_pages(phys_addr, npages);
+	if (rc)
+		return rc;
+
+	/* Conversion was successful. Flush the TLB if caller requested */
+	if (fence)
+		rc = kvm_riscv_cove_fence();
+
+	return rc;
+}
+
+__always_inline bool kvm_riscv_cove_enabled(void)
+{
+	return riscv_cove_enabled;
+}
+
+void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu)
+{
+	/* TODO */
+}
+
+void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	/* TODO */
+}
+
+int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva)
+{
+	/* TODO */
+	return 0;
+}
+
+void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap)
+{
+	/* TODO */
+}
+
+void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cove_tvm_vcpu_context *tvcpuc = vcpu->arch.tc;
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * Just add the vcpu state pages to a list at this point as these can not
+	 * be claimed until tvm is destroyed. *
+	 */
+	list_add(&tvcpuc->vcpu_state.link, &kvm->arch.tvmc->reclaim_pending_pages);
+}
+
+int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	int rc;
+	struct kvm *kvm;
+	struct kvm_cove_tvm_vcpu_context *tvcpuc;
+	struct kvm_cove_tvm_context *tvmc;
+	struct page *vcpus_page;
+	unsigned long vcpus_phys_addr;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	kvm = vcpu->kvm;
+
+	if (!kvm->arch.tvmc)
+		return -EINVAL;
+
+	tvmc = kvm->arch.tvmc;
+
+	if (tvmc->finalized_done) {
+		kvm_err("vcpu init must not happen after finalize\n");
+		return -EINVAL;
+	}
+
+	tvcpuc = kzalloc(sizeof(*tvcpuc), GFP_KERNEL);
+	if (!tvcpuc)
+		return -ENOMEM;
+
+	vcpus_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				 get_order_num_pages(tinfo.tvcpu_pages_needed));
+	if (!vcpus_page) {
+		rc = -ENOMEM;
+		goto alloc_page_failed;
+	}
+
+	tvcpuc->vcpu = vcpu;
+	tvcpuc->vcpu_state.npages = tinfo.tvcpu_pages_needed;
+	tvcpuc->vcpu_state.page = vcpus_page;
+	vcpus_phys_addr = page_to_phys(vcpus_page);
+
+	rc = cove_convert_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages, true);
+	if (rc)
+		goto convert_failed;
+
+	rc = sbi_covh_create_tvm_vcpu(tvmc->tvm_guest_id, vcpu->vcpu_idx, vcpus_phys_addr);
+	if (rc)
+		goto vcpu_create_failed;
+
+	vcpu->arch.tc = tvcpuc;
+
+	return 0;
+
+vcpu_create_failed:
+	/* Reclaim all the pages or return to the confidential page pool */
+	sbi_covh_tsm_reclaim_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages);
+
+convert_failed:
+	__free_pages(vcpus_page, get_order_num_pages(tinfo.tvcpu_pages_needed));
+
+alloc_page_failed:
+	kfree(tvcpuc);
+	return rc;
+}
+
+int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa, unsigned long size)
+{
+	int rc;
+	struct kvm_cove_tvm_context *tvmc = kvm->arch.tvmc;
+
+	if (!tvmc)
+		return -EFAULT;
+
+	if (tvmc->finalized_done) {
+		kvm_err("Memory region can not be added after finalize\n");
+		return -EINVAL;
+	}
+
+	tvmc->confidential_region.gpa = gpa;
+	tvmc->confidential_region.npages = bytes_to_pages(size);
+
+	rc = sbi_covh_add_memory_region(tvmc->tvm_guest_id, gpa, size);
+	if (rc) {
+		kvm_err("Registering confidential memory region failed with rc %d\n", rc);
+		return rc;
+	}
+
+	kvm_info("%s: Success with gpa %lx size %lx\n", __func__, gpa, size);
+
+	return 0;
+}
+
+/*
+ * Destroying A TVM is expensive because we need to reclaim all the pages by iterating over it.
+ * Few ideas to improve:
+ * 1. At least do the reclaim part in a worker thread in the background
+ * 2. Define a page pool which can contain a pre-allocated/converted pages.
+ *    In this step, we just return to the confidential page pool. Thus, some other TVM
+ *    can use it.
+ */
+void kvm_riscv_cove_vm_destroy(struct kvm *kvm)
+{
+	int rc;
+	struct kvm_cove_tvm_context *tvmc = kvm->arch.tvmc;
+	unsigned long pgd_npages;
+
+	if (!tvmc)
+		return;
+
+	/* Release all the confidential pages using COVH SBI call */
+	rc = sbi_covh_tsm_destroy_tvm(tvmc->tvm_guest_id);
+	if (rc) {
+		kvm_err("TVM %ld destruction failed with rc = %d\n", tvmc->tvm_guest_id, rc);
+		return;
+	}
+
+	cove_delete_page_list(kvm, &tvmc->reclaim_pending_pages, false);
+
+	/* Reclaim and Free the pages for tvm state management */
+	rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tvmc->tvm_state.page), tvmc->tvm_state.npages);
+	if (rc)
+		goto reclaim_failed;
+
+	__free_pages(tvmc->tvm_state.page, get_order_num_pages(tvmc->tvm_state.npages));
+
+	/* Reclaim and Free the pages for gstage page table management */
+	rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tvmc->pgtable.page), tvmc->pgtable.npages);
+	if (rc)
+		goto reclaim_failed;
+
+	__free_pages(tvmc->pgtable.page, get_order_num_pages(tvmc->pgtable.npages));
+
+	/* Reclaim the confidential page for pgd */
+	pgd_npages = kvm_riscv_gstage_pgd_size() >> PAGE_SHIFT;
+	rc = sbi_covh_tsm_reclaim_pages(kvm->arch.pgd_phys, pgd_npages);
+	if (rc)
+		goto reclaim_failed;
+
+	kfree(tvmc);
+
+	return;
+
+reclaim_failed:
+	kvm_err("Memory reclaim failed with rc %d\n", rc);
+}
+
+int kvm_riscv_cove_vm_init(struct kvm *kvm)
+{
+	struct kvm_cove_tvm_context *tvmc;
+	struct page *tvms_page, *pgt_page;
+	unsigned long tvm_gid, pgt_phys_addr, tvms_phys_addr;
+	unsigned long gstage_pgd_size = kvm_riscv_gstage_pgd_size();
+	int rc = 0;
+
+	tvmc = kzalloc(sizeof(*tvmc), GFP_KERNEL);
+	if (!tvmc)
+		return -ENOMEM;
+
+	/* Allocate the pages required for gstage page table management */
+	/* TODO: Just give enough pages for page table pool for now */
+	pgt_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(KVM_COVE_PGTABLE_SIZE_MAX));
+	if (!pgt_page)
+		return -ENOMEM;
+
+	/* pgd is always 16KB aligned */
+	rc = cove_convert_pages(kvm->arch.pgd_phys, gstage_pgd_size >> PAGE_SHIFT, false);
+	if (rc)
+		goto done;
+
+	/* Convert the gstage page table pages */
+	tvmc->pgtable.page = pgt_page;
+	tvmc->pgtable.npages = KVM_COVE_PGTABLE_SIZE_MAX >> PAGE_SHIFT;
+	pgt_phys_addr = page_to_phys(pgt_page);
+
+	rc = cove_convert_pages(pgt_phys_addr, tvmc->pgtable.npages, false);
+	if (rc) {
+		kvm_err("%s: page table pool conversion failed rc %d\n", __func__, rc);
+		goto pgt_convert_failed;
+	}
+
+	/* Allocate and convert the pages required for TVM state management */
+	tvms_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				get_order_num_pages(tinfo.tvm_pages_needed));
+	if (!tvms_page) {
+		rc = -ENOMEM;
+		goto tvms_alloc_failed;
+	}
+
+	tvmc->tvm_state.page = tvms_page;
+	tvmc->tvm_state.npages = tinfo.tvm_pages_needed;
+	tvms_phys_addr = page_to_phys(tvms_page);
+
+	rc = cove_convert_pages(tvms_phys_addr, tinfo.tvm_pages_needed, false);
+	if (rc) {
+		kvm_err("%s: tvm state page conversion failed rc %d\n", __func__, rc);
+		goto tvms_convert_failed;
+	}
+
+	rc = kvm_riscv_cove_fence();
+	if (rc)
+		goto tvm_init_failed;
+
+	INIT_LIST_HEAD(&tvmc->measured_pages);
+	INIT_LIST_HEAD(&tvmc->zero_pages);
+	INIT_LIST_HEAD(&tvmc->shared_pages);
+	INIT_LIST_HEAD(&tvmc->reclaim_pending_pages);
+
+	/* The required pages have been converted to confidential memory. Create the TVM now */
+	params.tvm_page_directory_addr = kvm->arch.pgd_phys;
+	params.tvm_state_addr = tvms_phys_addr;
+
+	rc = sbi_covh_tsm_create_tvm(&params, &tvm_gid);
+	if (rc)
+		goto tvm_init_failed;
+
+	tvmc->tvm_guest_id = tvm_gid;
+	spin_lock_init(&tvmc->tvm_fence_lock);
+	kvm->arch.tvmc = tvmc;
+
+	rc = sbi_covh_add_pgt_pages(tvm_gid, pgt_phys_addr, tvmc->pgtable.npages);
+	if (rc)
+		goto tvm_init_failed;
+
+	tvmc->kvm = kvm;
+	kvm_info("Guest VM creation successful with guest id %lx\n", tvm_gid);
+
+	return 0;
+
+tvm_init_failed:
+	/* Reclaim tvm state pages */
+	sbi_covh_tsm_reclaim_pages(tvms_phys_addr, tvmc->tvm_state.npages);
+
+tvms_convert_failed:
+	__free_pages(tvms_page, get_order_num_pages(tinfo.tvm_pages_needed));
+
+tvms_alloc_failed:
+	/* Reclaim pgtable pages */
+	sbi_covh_tsm_reclaim_pages(pgt_phys_addr, tvmc->pgtable.npages);
+
+pgt_convert_failed:
+	__free_pages(pgt_page, get_order(KVM_COVE_PGTABLE_SIZE_MAX));
+	/* Reclaim pgd pages */
+	sbi_covh_tsm_reclaim_pages(kvm->arch.pgd_phys, gstage_pgd_size >> PAGE_SHIFT);
+
+done:
+	kfree(tvmc);
+	return rc;
+}
+
+int kvm_riscv_cove_init(void)
+{
+	int rc;
+
+	/* We currently support host in VS mode. Thus, NACL is mandatory */
+	if (sbi_probe_extension(SBI_EXT_COVH) <= 0 || !kvm_riscv_nacl_available())
+		return -EOPNOTSUPP;
+
+	rc = sbi_covh_tsm_get_info(&tinfo);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (tinfo.tstate != TSM_READY) {
+		kvm_err("TSM is not ready yet. Can't run TVMs\n");
+		return -EAGAIN;
+	}
+
+	riscv_cove_enabled = true;
+	kvm_info("The platform has confidential computing feature enabled\n");
+	kvm_info("TSM version %d is loaded and ready to run\n", tinfo.version);
+
+	return 0;
+}
diff --git a/arch/riscv/kvm/cove_sbi.c b/arch/riscv/kvm/cove_sbi.c
index c8c63fe..bf037f6 100644
--- a/arch/riscv/kvm/cove_sbi.c
+++ b/arch/riscv/kvm/cove_sbi.c
@@ -82,9 +82,7 @@ int sbi_covh_tsm_create_tvm(struct sbi_cove_tvm_create_params *tparam, unsigned
 		goto done;
 	}
 
-	kvm_info("%s: create_tvm tvmid %lx\n", __func__, ret.value);
 	*tvmid = ret.value;
-
 done:
 	return rc;
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8923319..a55a6a5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -914,6 +914,12 @@ struct kvm_ppc_resize_hpt {
 #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
 #define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
 	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+
+/*
+ * RISCV-V Confidential VM type. The large bit shift is chosen on purpose
+ * to allow other architectures to have their specific VM types if required.
+ */
+#define KVM_VM_TYPE_RISCV_COVE	(1UL << 9)
 /*
  * ioctls for /dev/kvm fds:
  */
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: Atish Patra <atishp@rivosinc.com>
To: linux-kernel@vger.kernel.org
Cc: "Atish Patra" <atishp@rivosinc.com>,
	"Alexandre Ghiti" <alex@ghiti.fr>,
	"Andrew Jones" <ajones@ventanamicro.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Anup Patel" <anup@brainfault.org>,
	"Atish Patra" <atishp@atishpatra.org>,
	"Björn Töpel" <bjorn@rivosinc.com>,
	"Suzuki K Poulose" <suzuki.poulose@arm.com>,
	"Will Deacon" <will@kernel.org>, "Marc Zyngier" <maz@kernel.org>,
	"Sean Christopherson" <seanjc@google.com>,
	linux-coco@lists.linux.dev, "Dylan Reid" <dylan@rivosinc.com>,
	abrestic@rivosinc.com, "Samuel Ortiz" <sameo@rivosinc.com>,
	"Christoph Hellwig" <hch@infradead.org>,
	"Conor Dooley" <conor.dooley@microchip.com>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Guo Ren" <guoren@kernel.org>, "Heiko Stuebner" <heiko@sntech.de>,
	"Jiri Slaby" <jirislaby@kernel.org>,
	kvm-riscv@lists.infradead.org, kvm@vger.kernel.org,
	linux-mm@kvack.org, linux-riscv@lists.infradead.org,
	"Mayuresh Chitale" <mchitale@ventanamicro.com>,
	"Palmer Dabbelt" <palmer@dabbelt.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Paul Walmsley" <paul.walmsley@sifive.com>,
	"Rajnesh Kanwal" <rkanwal@rivosinc.com>,
	"Uladzislau Rezki" <urezki@gmail.com>
Subject: [RFC 07/48] RISC-V: KVM: Add a barebone CoVE implementation
Date: Wed, 19 Apr 2023 15:16:35 -0700	[thread overview]
Message-ID: <20230419221716.3603068-8-atishp@rivosinc.com> (raw)
In-Reply-To: <20230419221716.3603068-1-atishp@rivosinc.com>

This patch just adds a barebone implementation of CoVE functionality
that exercises the COVH functions to create/manage pages for various
boot time operations such as page directory, page table management,
vcpu/vm state management etc.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 arch/riscv/include/asm/kvm_cove.h | 154 ++++++++++++
 arch/riscv/include/asm/kvm_host.h |   7 +
 arch/riscv/kvm/Makefile           |   2 +-
 arch/riscv/kvm/cove.c             | 401 ++++++++++++++++++++++++++++++
 arch/riscv/kvm/cove_sbi.c         |   2 -
 include/uapi/linux/kvm.h          |   6 +
 6 files changed, 569 insertions(+), 3 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_cove.h
 create mode 100644 arch/riscv/kvm/cove.c

diff --git a/arch/riscv/include/asm/kvm_cove.h b/arch/riscv/include/asm/kvm_cove.h
new file mode 100644
index 0000000..3bf1bcd
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_cove.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * COVE related header file.
+ *
+ * Copyright (c) 2023 RivosInc
+ *
+ * Authors:
+ *     Atish Patra <atishp@rivosinc.com>
+ */
+
+#ifndef __KVM_RISCV_COVE_H
+#define __KVM_RISCV_COVE_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/list.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+
+#define KVM_COVE_PAGE_SIZE_4K	(1UL << 12)
+#define KVM_COVE_PAGE_SIZE_2MB	(1UL << 21)
+#define KVM_COVE_PAGE_SIZE_1GB	(1UL << 30)
+#define KVM_COVE_PAGE_SIZE_512GB (1UL << 39)
+
+#define bytes_to_pages(n) ((n + PAGE_SIZE - 1) >> PAGE_SHIFT)
+
+/* Allocate 2MB(i.e. 512 pages) for the page table pool */
+#define KVM_COVE_PGTABLE_SIZE_MAX ((1UL << 10) * PAGE_SIZE)
+
+#define get_order_num_pages(n) (get_order(n << PAGE_SHIFT))
+
+/* Describe a confidential or shared memory region */
+struct kvm_riscv_cove_mem_region {
+	unsigned long hva;
+	unsigned long gpa;
+	unsigned long npages;
+};
+
+/* Page management structure for the host */
+struct kvm_riscv_cove_page {
+	struct list_head link;
+
+	/* Pointer to page allocated */
+	struct page *page;
+
+	/* number of pages allocated for page */
+	unsigned long npages;
+
+	/* Described the page type */
+	unsigned long ptype;
+
+	/* set if the page is mapped in guest physical address */
+	bool is_mapped;
+
+	/* The below two fileds are only valid if is_mapped is true */
+	/* host virtual address for the mapping */
+	unsigned long hva;
+	/* guest physical address for the mapping */
+	unsigned long gpa;
+};
+
+struct kvm_cove_tvm_vcpu_context {
+	struct kvm_vcpu *vcpu;
+	/* Pages storing each vcpu state of the TVM in TSM */
+	struct kvm_riscv_cove_page vcpu_state;
+};
+
+struct kvm_cove_tvm_context {
+	struct kvm *kvm;
+
+	/* TODO: This is not really a VMID as TSM returns the page owner ID instead of VMID */
+	unsigned long tvm_guest_id;
+
+	/* Pages where TVM page table is stored */
+	struct kvm_riscv_cove_page pgtable;
+
+	/* Pages storing the TVM state in TSM */
+	struct kvm_riscv_cove_page tvm_state;
+
+	/* Keep track of zero pages */
+	struct list_head zero_pages;
+
+	/* Pages where TVM image is measured & loaded */
+	struct list_head measured_pages;
+
+	/* keep track of shared pages */
+	struct list_head shared_pages;
+
+	/* keep track of pending reclaim confidential pages */
+	struct list_head reclaim_pending_pages;
+
+	struct kvm_riscv_cove_mem_region shared_region;
+	struct kvm_riscv_cove_mem_region confidential_region;
+
+	/* spinlock to protect the tvm fence sequence */
+	spinlock_t tvm_fence_lock;
+
+	/* Track TVM state */
+	bool finalized_done;
+};
+
+static inline bool is_cove_vm(struct kvm *kvm)
+{
+	return kvm->arch.vm_type == KVM_VM_TYPE_RISCV_COVE;
+}
+
+static inline bool is_cove_vcpu(struct kvm_vcpu *vcpu)
+{
+	return is_cove_vm(vcpu->kvm);
+}
+
+#ifdef CONFIG_RISCV_COVE_HOST
+
+bool kvm_riscv_cove_enabled(void);
+int kvm_riscv_cove_init(void);
+
+/* TVM related functions */
+void kvm_riscv_cove_vm_destroy(struct kvm *kvm);
+int kvm_riscv_cove_vm_init(struct kvm *kvm);
+
+/* TVM VCPU related functions */
+void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu);
+int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu);
+void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap);
+
+int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa, unsigned long size);
+int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva);
+#else
+static inline bool kvm_riscv_cove_enabled(void) {return false; };
+static inline int kvm_riscv_cove_init(void) { return -1; }
+static inline void kvm_riscv_cove_hardware_disable(void) {}
+static inline int kvm_riscv_cove_hardware_enable(void) {return 0; }
+
+/* TVM related functions */
+static inline void kvm_riscv_cove_vm_destroy(struct kvm *kvm) {}
+static inline int kvm_riscv_cove_vm_init(struct kvm *kvm) {return -1; }
+
+/* TVM VCPU related functions */
+static inline void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu) {}
+static inline int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu) {return -1; }
+static inline void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu) {}
+static inline void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) {}
+static inline int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa,
+					       unsigned long size) {return -1; }
+static inline int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu,
+					    gpa_t gpa, unsigned long hva) {return -1; }
+#endif /* CONFIG_RISCV_COVE_HOST */
+
+#endif /* __KVM_RISCV_COVE_H */
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 63c46af..ca2ebe3 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -88,6 +88,8 @@ struct kvm_vmid {
 };
 
 struct kvm_arch {
+	unsigned long vm_type;
+
 	/* G-stage vmid */
 	struct kvm_vmid vmid;
 
@@ -100,6 +102,9 @@ struct kvm_arch {
 
 	/* AIA Guest/VM context */
 	struct kvm_aia aia;
+
+	/* COVE guest/VM context */
+	struct kvm_cove_tvm_context *tvmc;
 };
 
 struct kvm_cpu_trap {
@@ -242,6 +247,8 @@ struct kvm_vcpu_arch {
 
 	/* Performance monitoring context */
 	struct kvm_pmu pmu_context;
+
+	struct kvm_cove_tvm_vcpu_context *tc;
 };
 
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 40dee04..8c91551 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -31,4 +31,4 @@ kvm-y += aia.o
 kvm-y += aia_device.o
 kvm-y += aia_aplic.o
 kvm-y += aia_imsic.o
-kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o
+kvm-$(CONFIG_RISCV_COVE_HOST) += cove_sbi.o cove.o
diff --git a/arch/riscv/kvm/cove.c b/arch/riscv/kvm/cove.c
new file mode 100644
index 0000000..d001e36
--- /dev/null
+++ b/arch/riscv/kvm/cove.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * COVE related helper functions.
+ *
+ * Copyright (c) 2023 RivosInc
+ *
+ * Authors:
+ *     Atish Patra <atishp@rivosinc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_nacl.h>
+#include <asm/kvm_cove.h>
+#include <asm/kvm_cove_sbi.h>
+#include <asm/asm-offsets.h>
+
+static struct sbi_cove_tsm_info tinfo;
+struct sbi_cove_tvm_create_params params;
+
+/* We need a global lock as initiate fence can be invoked once per host */
+static DEFINE_SPINLOCK(cove_fence_lock);
+
+static bool riscv_cove_enabled;
+
+static void kvm_cove_local_fence(void *info)
+{
+	int rc;
+
+	rc = sbi_covh_tsm_local_fence();
+
+	if (rc)
+		kvm_err("local fence for TSM failed %d on cpu %d\n", rc, smp_processor_id());
+}
+
+static void cove_delete_page_list(struct kvm *kvm, struct list_head *tpages, bool unpin)
+{
+	struct kvm_riscv_cove_page *tpage, *temp;
+	int rc;
+
+	list_for_each_entry_safe(tpage, temp, tpages, link) {
+		rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tpage->page), tpage->npages);
+		if (rc)
+			kvm_err("Reclaiming page %llx failed\n", page_to_phys(tpage->page));
+		if (unpin)
+			unpin_user_pages_dirty_lock(&tpage->page, 1, true);
+		list_del(&tpage->link);
+		kfree(tpage);
+	}
+}
+
+static int kvm_riscv_cove_fence(void)
+{
+	int rc;
+
+	spin_lock(&cove_fence_lock);
+
+	rc = sbi_covh_tsm_initiate_fence();
+	if (rc) {
+		kvm_err("initiate fence for tsm failed %d\n", rc);
+		goto done;
+	}
+
+	/* initiate local fence on each online hart */
+	on_each_cpu(kvm_cove_local_fence, NULL, 1);
+done:
+	spin_unlock(&cove_fence_lock);
+	return rc;
+}
+
+static int cove_convert_pages(unsigned long phys_addr, unsigned long npages, bool fence)
+{
+	int rc;
+
+	if (!IS_ALIGNED(phys_addr, PAGE_SIZE))
+		return -EINVAL;
+
+	rc = sbi_covh_tsm_convert_pages(phys_addr, npages);
+	if (rc)
+		return rc;
+
+	/* Conversion was successful. Flush the TLB if caller requested */
+	if (fence)
+		rc = kvm_riscv_cove_fence();
+
+	return rc;
+}
+
+__always_inline bool kvm_riscv_cove_enabled(void)
+{
+	return riscv_cove_enabled;
+}
+
+void kvm_riscv_cove_vcpu_load(struct kvm_vcpu *vcpu)
+{
+	/* TODO */
+}
+
+void kvm_riscv_cove_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	/* TODO */
+}
+
+int kvm_riscv_cove_gstage_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva)
+{
+	/* TODO */
+	return 0;
+}
+
+void kvm_riscv_cove_vcpu_switchto(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap)
+{
+	/* TODO */
+}
+
+void kvm_riscv_cove_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cove_tvm_vcpu_context *tvcpuc = vcpu->arch.tc;
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * Just add the vcpu state pages to a list at this point as these can not
+	 * be claimed until tvm is destroyed. *
+	 */
+	list_add(&tvcpuc->vcpu_state.link, &kvm->arch.tvmc->reclaim_pending_pages);
+}
+
+int kvm_riscv_cove_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	int rc;
+	struct kvm *kvm;
+	struct kvm_cove_tvm_vcpu_context *tvcpuc;
+	struct kvm_cove_tvm_context *tvmc;
+	struct page *vcpus_page;
+	unsigned long vcpus_phys_addr;
+
+	if (!vcpu)
+		return -EINVAL;
+
+	kvm = vcpu->kvm;
+
+	if (!kvm->arch.tvmc)
+		return -EINVAL;
+
+	tvmc = kvm->arch.tvmc;
+
+	if (tvmc->finalized_done) {
+		kvm_err("vcpu init must not happen after finalize\n");
+		return -EINVAL;
+	}
+
+	tvcpuc = kzalloc(sizeof(*tvcpuc), GFP_KERNEL);
+	if (!tvcpuc)
+		return -ENOMEM;
+
+	vcpus_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				 get_order_num_pages(tinfo.tvcpu_pages_needed));
+	if (!vcpus_page) {
+		rc = -ENOMEM;
+		goto alloc_page_failed;
+	}
+
+	tvcpuc->vcpu = vcpu;
+	tvcpuc->vcpu_state.npages = tinfo.tvcpu_pages_needed;
+	tvcpuc->vcpu_state.page = vcpus_page;
+	vcpus_phys_addr = page_to_phys(vcpus_page);
+
+	rc = cove_convert_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages, true);
+	if (rc)
+		goto convert_failed;
+
+	rc = sbi_covh_create_tvm_vcpu(tvmc->tvm_guest_id, vcpu->vcpu_idx, vcpus_phys_addr);
+	if (rc)
+		goto vcpu_create_failed;
+
+	vcpu->arch.tc = tvcpuc;
+
+	return 0;
+
+vcpu_create_failed:
+	/* Reclaim all the pages or return to the confidential page pool */
+	sbi_covh_tsm_reclaim_pages(vcpus_phys_addr, tvcpuc->vcpu_state.npages);
+
+convert_failed:
+	__free_pages(vcpus_page, get_order_num_pages(tinfo.tvcpu_pages_needed));
+
+alloc_page_failed:
+	kfree(tvcpuc);
+	return rc;
+}
+
+int kvm_riscv_cove_vm_add_memreg(struct kvm *kvm, unsigned long gpa, unsigned long size)
+{
+	int rc;
+	struct kvm_cove_tvm_context *tvmc = kvm->arch.tvmc;
+
+	if (!tvmc)
+		return -EFAULT;
+
+	if (tvmc->finalized_done) {
+		kvm_err("Memory region can not be added after finalize\n");
+		return -EINVAL;
+	}
+
+	tvmc->confidential_region.gpa = gpa;
+	tvmc->confidential_region.npages = bytes_to_pages(size);
+
+	rc = sbi_covh_add_memory_region(tvmc->tvm_guest_id, gpa, size);
+	if (rc) {
+		kvm_err("Registering confidential memory region failed with rc %d\n", rc);
+		return rc;
+	}
+
+	kvm_info("%s: Success with gpa %lx size %lx\n", __func__, gpa, size);
+
+	return 0;
+}
+
+/*
+ * Destroying A TVM is expensive because we need to reclaim all the pages by iterating over it.
+ * Few ideas to improve:
+ * 1. At least do the reclaim part in a worker thread in the background
+ * 2. Define a page pool which can contain a pre-allocated/converted pages.
+ *    In this step, we just return to the confidential page pool. Thus, some other TVM
+ *    can use it.
+ */
+void kvm_riscv_cove_vm_destroy(struct kvm *kvm)
+{
+	int rc;
+	struct kvm_cove_tvm_context *tvmc = kvm->arch.tvmc;
+	unsigned long pgd_npages;
+
+	if (!tvmc)
+		return;
+
+	/* Release all the confidential pages using COVH SBI call */
+	rc = sbi_covh_tsm_destroy_tvm(tvmc->tvm_guest_id);
+	if (rc) {
+		kvm_err("TVM %ld destruction failed with rc = %d\n", tvmc->tvm_guest_id, rc);
+		return;
+	}
+
+	cove_delete_page_list(kvm, &tvmc->reclaim_pending_pages, false);
+
+	/* Reclaim and Free the pages for tvm state management */
+	rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tvmc->tvm_state.page), tvmc->tvm_state.npages);
+	if (rc)
+		goto reclaim_failed;
+
+	__free_pages(tvmc->tvm_state.page, get_order_num_pages(tvmc->tvm_state.npages));
+
+	/* Reclaim and Free the pages for gstage page table management */
+	rc = sbi_covh_tsm_reclaim_pages(page_to_phys(tvmc->pgtable.page), tvmc->pgtable.npages);
+	if (rc)
+		goto reclaim_failed;
+
+	__free_pages(tvmc->pgtable.page, get_order_num_pages(tvmc->pgtable.npages));
+
+	/* Reclaim the confidential page for pgd */
+	pgd_npages = kvm_riscv_gstage_pgd_size() >> PAGE_SHIFT;
+	rc = sbi_covh_tsm_reclaim_pages(kvm->arch.pgd_phys, pgd_npages);
+	if (rc)
+		goto reclaim_failed;
+
+	kfree(tvmc);
+
+	return;
+
+reclaim_failed:
+	kvm_err("Memory reclaim failed with rc %d\n", rc);
+}
+
+int kvm_riscv_cove_vm_init(struct kvm *kvm)
+{
+	struct kvm_cove_tvm_context *tvmc;
+	struct page *tvms_page, *pgt_page;
+	unsigned long tvm_gid, pgt_phys_addr, tvms_phys_addr;
+	unsigned long gstage_pgd_size = kvm_riscv_gstage_pgd_size();
+	int rc = 0;
+
+	tvmc = kzalloc(sizeof(*tvmc), GFP_KERNEL);
+	if (!tvmc)
+		return -ENOMEM;
+
+	/* Allocate the pages required for gstage page table management */
+	/* TODO: Just give enough pages for page table pool for now */
+	pgt_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(KVM_COVE_PGTABLE_SIZE_MAX));
+	if (!pgt_page)
+		return -ENOMEM;
+
+	/* pgd is always 16KB aligned */
+	rc = cove_convert_pages(kvm->arch.pgd_phys, gstage_pgd_size >> PAGE_SHIFT, false);
+	if (rc)
+		goto done;
+
+	/* Convert the gstage page table pages */
+	tvmc->pgtable.page = pgt_page;
+	tvmc->pgtable.npages = KVM_COVE_PGTABLE_SIZE_MAX >> PAGE_SHIFT;
+	pgt_phys_addr = page_to_phys(pgt_page);
+
+	rc = cove_convert_pages(pgt_phys_addr, tvmc->pgtable.npages, false);
+	if (rc) {
+		kvm_err("%s: page table pool conversion failed rc %d\n", __func__, rc);
+		goto pgt_convert_failed;
+	}
+
+	/* Allocate and convert the pages required for TVM state management */
+	tvms_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				get_order_num_pages(tinfo.tvm_pages_needed));
+	if (!tvms_page) {
+		rc = -ENOMEM;
+		goto tvms_alloc_failed;
+	}
+
+	tvmc->tvm_state.page = tvms_page;
+	tvmc->tvm_state.npages = tinfo.tvm_pages_needed;
+	tvms_phys_addr = page_to_phys(tvms_page);
+
+	rc = cove_convert_pages(tvms_phys_addr, tinfo.tvm_pages_needed, false);
+	if (rc) {
+		kvm_err("%s: tvm state page conversion failed rc %d\n", __func__, rc);
+		goto tvms_convert_failed;
+	}
+
+	rc = kvm_riscv_cove_fence();
+	if (rc)
+		goto tvm_init_failed;
+
+	INIT_LIST_HEAD(&tvmc->measured_pages);
+	INIT_LIST_HEAD(&tvmc->zero_pages);
+	INIT_LIST_HEAD(&tvmc->shared_pages);
+	INIT_LIST_HEAD(&tvmc->reclaim_pending_pages);
+
+	/* The required pages have been converted to confidential memory. Create the TVM now */
+	params.tvm_page_directory_addr = kvm->arch.pgd_phys;
+	params.tvm_state_addr = tvms_phys_addr;
+
+	rc = sbi_covh_tsm_create_tvm(&params, &tvm_gid);
+	if (rc)
+		goto tvm_init_failed;
+
+	tvmc->tvm_guest_id = tvm_gid;
+	spin_lock_init(&tvmc->tvm_fence_lock);
+	kvm->arch.tvmc = tvmc;
+
+	rc = sbi_covh_add_pgt_pages(tvm_gid, pgt_phys_addr, tvmc->pgtable.npages);
+	if (rc)
+		goto tvm_init_failed;
+
+	tvmc->kvm = kvm;
+	kvm_info("Guest VM creation successful with guest id %lx\n", tvm_gid);
+
+	return 0;
+
+tvm_init_failed:
+	/* Reclaim tvm state pages */
+	sbi_covh_tsm_reclaim_pages(tvms_phys_addr, tvmc->tvm_state.npages);
+
+tvms_convert_failed:
+	__free_pages(tvms_page, get_order_num_pages(tinfo.tvm_pages_needed));
+
+tvms_alloc_failed:
+	/* Reclaim pgtable pages */
+	sbi_covh_tsm_reclaim_pages(pgt_phys_addr, tvmc->pgtable.npages);
+
+pgt_convert_failed:
+	__free_pages(pgt_page, get_order(KVM_COVE_PGTABLE_SIZE_MAX));
+	/* Reclaim pgd pages */
+	sbi_covh_tsm_reclaim_pages(kvm->arch.pgd_phys, gstage_pgd_size >> PAGE_SHIFT);
+
+done:
+	kfree(tvmc);
+	return rc;
+}
+
+int kvm_riscv_cove_init(void)
+{
+	int rc;
+
+	/* We currently support host in VS mode. Thus, NACL is mandatory */
+	if (sbi_probe_extension(SBI_EXT_COVH) <= 0 || !kvm_riscv_nacl_available())
+		return -EOPNOTSUPP;
+
+	rc = sbi_covh_tsm_get_info(&tinfo);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (tinfo.tstate != TSM_READY) {
+		kvm_err("TSM is not ready yet. Can't run TVMs\n");
+		return -EAGAIN;
+	}
+
+	riscv_cove_enabled = true;
+	kvm_info("The platform has confidential computing feature enabled\n");
+	kvm_info("TSM version %d is loaded and ready to run\n", tinfo.version);
+
+	return 0;
+}
diff --git a/arch/riscv/kvm/cove_sbi.c b/arch/riscv/kvm/cove_sbi.c
index c8c63fe..bf037f6 100644
--- a/arch/riscv/kvm/cove_sbi.c
+++ b/arch/riscv/kvm/cove_sbi.c
@@ -82,9 +82,7 @@ int sbi_covh_tsm_create_tvm(struct sbi_cove_tvm_create_params *tparam, unsigned
 		goto done;
 	}
 
-	kvm_info("%s: create_tvm tvmid %lx\n", __func__, ret.value);
 	*tvmid = ret.value;
-
 done:
 	return rc;
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8923319..a55a6a5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -914,6 +914,12 @@ struct kvm_ppc_resize_hpt {
 #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
 #define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
 	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+
+/*
+ * RISCV-V Confidential VM type. The large bit shift is chosen on purpose
+ * to allow other architectures to have their specific VM types if required.
+ */
+#define KVM_VM_TYPE_RISCV_COVE	(1UL << 9)
 /*
  * ioctls for /dev/kvm fds:
  */
-- 
2.25.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2023-04-19 22:17 UTC|newest]

Thread overview: 134+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-19 22:16 [RFC 00/48] RISC-V CoVE support Atish Patra
2023-04-19 22:16 ` Atish Patra
2023-04-19 22:16 ` [RFC 01/48] mm/vmalloc: Introduce arch hooks to notify ioremap/unmap changes Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-20 19:42   ` Lorenzo Stoakes
2023-04-20 19:42     ` Lorenzo Stoakes
2023-04-20 22:01     ` Atish Kumar Patra
2023-04-20 22:01       ` Atish Kumar Patra
2023-04-19 22:16 ` [RFC 02/48] RISC-V: KVM: Improve KVM error reporting to the user space Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 03/48] RISC-V: KVM: Invoke aia_update with preempt disabled/irq enabled Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 04/48] RISC-V: KVM: Add a helper function to get pgd size Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 05/48] RISC-V: Add COVH SBI extensions definitions Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 06/48] RISC-V: KVM: Implement COVH SBI extension Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` Atish Patra [this message]
2023-04-19 22:16   ` [RFC 07/48] RISC-V: KVM: Add a barebone CoVE implementation Atish Patra
2023-04-19 22:16 ` [RFC 08/48] RISC-V: KVM: Add UABI to support static memory region attestation Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 09/48] RISC-V: KVM: Add CoVE related nacl helpers Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 10/48] RISC-V: KVM: Implement static memory region measurement Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-20 15:17   ` Sean Christopherson
2023-04-20 15:17     ` Sean Christopherson
2023-04-21 18:50     ` Atish Kumar Patra
2023-04-21 18:50       ` Atish Kumar Patra
2023-04-19 22:16 ` [RFC 11/48] RISC-V: KVM: Use the new VM IOCTL for measuring pages Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 12/48] RISC-V: KVM: Exit to the user space for trap redirection Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 13/48] RISC-V: KVM: Return early for gstage modifications Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 14/48] RISC-V: KVM: Skip dirty logging updates for TVM Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 15/48] RISC-V: KVM: Add a helper function to trigger fence ops Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 16/48] RISC-V: KVM: Skip most VCPU requests for TVMs Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 17/48] RISC-V : KVM: Skip vmid/hgatp management " Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 18/48] RISC-V: KVM: Skip TLB " Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 19/48] RISC-V: KVM: Register memory regions as confidential " Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 20/48] RISC-V: KVM: Add gstage mapping " Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 21/48] RISC-V: KVM: Handle SBI call forward from the TSM Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 22/48] RISC-V: KVM: Implement vcpu load/put functions for CoVE guests Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 23/48] RISC-V: KVM: Wireup TVM world switch Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 24/48] RISC-V: KVM: Update timer functionality for TVMs Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 25/48] RISC-V: KVM: Skip HVIP update " Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 26/48] RISC-V: Add COVI extension definitions Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 27/48] RISC-V: KVM: Implement COVI SBI extension Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 28/48] RISC-V: KVM: Add interrupt management functions for TVM Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 29/48] RISC-V: KVM: Skip AIA CSR updates for TVMs Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 30/48] RISC-V: KVM: Perform limited operations in hardware enable/disable Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:16 ` [RFC 31/48] RISC-V: KVM: Indicate no support user space emulated IRQCHIP Atish Patra
2023-04-19 22:16   ` Atish Patra
2023-04-19 22:17 ` [RFC 32/48] RISC-V: KVM: Add AIA support for TVMs Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 33/48] RISC-V: KVM: Hookup TVM VCPU init/destroy Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 34/48] RISC-V: KVM: Initialize CoVE Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 35/48] RISC-V: KVM: Add TVM init/destroy calls Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 36/48] RISC-V: KVM: Read/write gprs from/to shmem in case of TVM VCPU Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 37/48] RISC-V: Add COVG SBI extension definitions Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 38/48] RISC-V: Add CoVE guest config and helper functions Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 39/48] RISC-V: Implement COVG SBI extension Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 40/48] RISC-V: COVE: Add COVH invalidate, validate, promote, demote and remove APIs Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 41/48] RISC-V: KVM: Add host side support to handle COVG SBI calls Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 42/48] RISC-V: Allow host to inject any ext interrupt id to a CoVE guest Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 43/48] RISC-V: Add base memory encryption functions Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 44/48] RISC-V: Add cc_platform_has() for RISC-V for CoVE Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 45/48] RISC-V: ioremap: Implement for arch specific ioremap hooks Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-20 22:15   ` Dave Hansen
2023-04-20 22:15     ` Dave Hansen
2023-04-21 19:24     ` Atish Kumar Patra
2023-04-21 19:24       ` Atish Kumar Patra
2023-04-24 13:48       ` Dave Hansen
2023-04-24 13:48         ` Dave Hansen
2023-04-25  8:00         ` Atish Kumar Patra
2023-04-25  8:00           ` Atish Kumar Patra
2023-04-25 13:10           ` Dave Hansen
2023-04-25 13:10             ` Dave Hansen
2023-04-26  8:02             ` Atish Kumar Patra
2023-04-26  8:02               ` Atish Kumar Patra
2023-04-26 10:30               ` Anup Patel
2023-04-26 10:30                 ` Anup Patel
2023-04-26 13:55                 ` Andrew Bresticker
2023-04-26 13:55                   ` Andrew Bresticker
2023-04-19 22:17 ` [RFC 46/48] riscv/virtio: Have CoVE guests enforce restricted virtio memory access Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 47/48] RISC-V: Add shared bounce buffer to support DBCN for CoVE Guest Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:17 ` [RFC 48/48] drivers/hvc: sbi: Disable HVC console for TVMs Atish Patra
2023-04-19 22:17   ` Atish Patra
2023-04-19 22:58 ` [RFC 00/48] RISC-V CoVE support Atish Patra
2023-04-19 22:58   ` Atish Patra
2023-04-20 16:30 ` Sean Christopherson
2023-04-20 16:30   ` Sean Christopherson
2023-04-20 19:13   ` Atish Kumar Patra
2023-04-20 19:13     ` Atish Kumar Patra
2023-04-20 20:21     ` Sean Christopherson
2023-04-20 20:21       ` Sean Christopherson
2023-04-21 15:35   ` Michael Roth
2023-04-21 15:35     ` Michael Roth
2023-04-24 12:23 ` Christophe de Dinechin
2023-04-24 12:23   ` Christophe de Dinechin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230419221716.3603068-8-atishp@rivosinc.com \
    --to=atishp@rivosinc.com \
    --cc=abrestic@rivosinc.com \
    --cc=ajones@ventanamicro.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex@ghiti.fr \
    --cc=anup@brainfault.org \
    --cc=atishp@atishpatra.org \
    --cc=bjorn@rivosinc.com \
    --cc=conor.dooley@microchip.com \
    --cc=dylan@rivosinc.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=guoren@kernel.org \
    --cc=hch@infradead.org \
    --cc=heiko@sntech.de \
    --cc=jirislaby@kernel.org \
    --cc=kvm-riscv@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=maz@kernel.org \
    --cc=mchitale@ventanamicro.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=pbonzini@redhat.com \
    --cc=rkanwal@rivosinc.com \
    --cc=sameo@rivosinc.com \
    --cc=seanjc@google.com \
    --cc=suzuki.poulose@arm.com \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.