From: Anup Patel <anup.patel@wdc.com>
To: Palmer Dabbelt <palmer@dabbelt.com>,
Palmer Dabbelt <palmerdabbelt@google.com>,
Paul Walmsley <paul.walmsley@sifive.com>,
Albert Ou <aou@eecs.berkeley.edu>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <graf@amazon.com>,
Atish Patra <atish.patra@wdc.com>,
Alistair Francis <Alistair.Francis@wdc.com>,
Damien Le Moal <damien.lemoal@wdc.com>,
Anup Patel <anup@brainfault.org>,
kvm@vger.kernel.org, kvm-riscv@lists.infradead.org,
linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
Anup Patel <anup.patel@wdc.com>
Subject: [PATCH v19 11/17] RISC-V: KVM: Implement MMU notifiers
Date: Tue, 27 Jul 2021 11:24:44 +0530 [thread overview]
Message-ID: <20210727055450.2742868-12-anup.patel@wdc.com> (raw)
In-Reply-To: <20210727055450.2742868-1-anup.patel@wdc.com>
This patch implements MMU notifiers for KVM RISC-V so that Guest
physical address space is in-sync with Host physical address space.
This will allow swapping, page migration, etc to work transparently
with KVM RISC-V.
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
---
arch/riscv/include/asm/kvm_host.h | 2 +
arch/riscv/kvm/Kconfig | 1 +
arch/riscv/kvm/mmu.c | 90 +++++++++++++++++++++++++++++--
arch/riscv/kvm/vm.c | 1 +
4 files changed, 89 insertions(+), 5 deletions(-)
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 33255c5dd555..a54a58a4026d 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -196,6 +196,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa, unsigned long vmid);
void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 633063edaee8..a712bb910cda 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
+ select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_MMIO
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index fa9a4f9b9542..4b294113c63b 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -300,7 +300,8 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
}
}
-static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size)
+static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
+ gpa_t size, bool may_block)
{
int ret;
pte_t *ptep;
@@ -325,6 +326,13 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size)
next:
addr += page_size;
+
+ /*
+ * If the range is too large, release the kvm->mmu_lock
+ * to prevent starvation and lockup detector warnings.
+ */
+ if (may_block && addr < end)
+ cond_resched_lock(&kvm->mmu_lock);
}
}
@@ -405,7 +413,6 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
out:
stage2_cache_flush(&pcache);
return ret;
-
}
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
@@ -547,7 +554,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
stage2_unmap_range(kvm, mem->guest_phys_addr,
- mem->memory_size);
+ mem->memory_size, false);
spin_unlock(&kvm->mmu_lock);
out:
@@ -555,6 +562,73 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return ret;
}
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ if (!kvm->arch.pgd)
+ return 0;
+
+ stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
+ (range->end - range->start) << PAGE_SHIFT,
+ range->may_block);
+ return 0;
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ int ret;
+ kvm_pfn_t pfn = pte_pfn(range->pte);
+
+ if (!kvm->arch.pgd)
+ return 0;
+
+ WARN_ON(range->end - range->start != 1);
+
+ ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+ __pfn_to_phys(pfn), PAGE_SIZE, true, true);
+ if (ret) {
+ kvm_err("Failed to map stage2 page (error %d)\n", ret);
+ return 1;
+ }
+
+ return 0;
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return 0;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return 0;
+
+ return ptep_test_and_clear_young(NULL, 0, ptep);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ pte_t *ptep;
+ u32 ptep_level = 0;
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+ if (!kvm->arch.pgd)
+ return 0;
+
+ WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+ if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ &ptep, &ptep_level))
+ return 0;
+
+ return pte_young(*ptep);
+}
+
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write)
@@ -569,7 +643,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
- unsigned long vma_pagesize;
+ unsigned long vma_pagesize, mmu_seq;
mmap_read_lock(current->mm);
@@ -608,6 +682,8 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
return ret;
}
+ mmu_seq = kvm->mmu_notifier_seq;
+
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
if (hfn == KVM_PFN_ERR_HWPOISON) {
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
@@ -626,6 +702,9 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq))
+ goto out_unlock;
+
if (writeable) {
kvm_set_pfn_dirty(hfn);
mark_page_dirty(kvm, gfn);
@@ -639,6 +718,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
if (ret)
kvm_err("Failed to map in stage2\n");
+out_unlock:
spin_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(hfn);
kvm_release_pfn_clean(hfn);
@@ -675,7 +755,7 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
- stage2_unmap_range(kvm, 0UL, stage2_gpa_size);
+ stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 38a644417627..0110267eb7e3 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -64,6 +64,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_READONLY_MEM:
--
2.25.1
next prev parent reply other threads:[~2021-07-27 5:56 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-27 5:54 [PATCH v19 00/17] KVM RISC-V Support Anup Patel
2021-07-27 5:54 ` [PATCH v19 01/17] RISC-V: Add hypervisor extension related CSR defines Anup Patel
2021-07-27 5:54 ` [PATCH v19 02/17] RISC-V: Add initial skeletal KVM support Anup Patel
2021-07-27 5:54 ` [PATCH v19 03/17] RISC-V: KVM: Implement VCPU create, init and destroy functions Anup Patel
2021-07-27 5:54 ` [PATCH v19 04/17] RISC-V: KVM: Implement VCPU interrupts and requests handling Anup Patel
2021-07-27 5:54 ` [PATCH v19 05/17] RISC-V: KVM: Implement KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls Anup Patel
2021-07-27 5:54 ` [PATCH v19 06/17] RISC-V: KVM: Implement VCPU world-switch Anup Patel
2021-07-27 5:54 ` [PATCH v19 07/17] RISC-V: KVM: Handle MMIO exits for VCPU Anup Patel
2021-07-27 5:54 ` [PATCH v19 08/17] RISC-V: KVM: Handle WFI " Anup Patel
2021-07-27 5:54 ` [PATCH v19 09/17] RISC-V: KVM: Implement VMID allocator Anup Patel
2021-07-27 5:54 ` [PATCH v19 10/17] RISC-V: KVM: Implement stage2 page table programming Anup Patel
2021-07-27 5:54 ` Anup Patel [this message]
2021-08-03 13:19 ` [PATCH v19 11/17] RISC-V: KVM: Implement MMU notifiers limingwang (A)
2021-08-04 7:16 ` Anup Patel
2021-07-27 5:54 ` [PATCH v19 12/17] RISC-V: KVM: Add timer functionality Anup Patel
2021-07-27 5:54 ` [PATCH v19 13/17] RISC-V: KVM: FP lazy save/restore Anup Patel
2021-09-13 4:30 ` Vincent Chen
2021-09-13 5:04 ` Anup Patel
2021-09-13 7:11 ` Vincent Chen
2021-07-27 5:54 ` [PATCH v19 14/17] RISC-V: KVM: Implement ONE REG interface for FP registers Anup Patel
2021-07-27 5:54 ` [PATCH v19 15/17] RISC-V: KVM: Add SBI v0.1 support Anup Patel
2021-07-27 5:54 ` [PATCH v19 16/17] RISC-V: KVM: Document RISC-V specific parts of KVM API Anup Patel
2021-07-27 5:54 ` [PATCH v19 17/17] RISC-V: KVM: Add MAINTAINERS entry Anup Patel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210727055450.2742868-12-anup.patel@wdc.com \
--to=anup.patel@wdc.com \
--cc=Alistair.Francis@wdc.com \
--cc=anup@brainfault.org \
--cc=aou@eecs.berkeley.edu \
--cc=atish.patra@wdc.com \
--cc=damien.lemoal@wdc.com \
--cc=graf@amazon.com \
--cc=kvm-riscv@lists.infradead.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=palmerdabbelt@google.com \
--cc=paul.walmsley@sifive.com \
--cc=pbonzini@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).