linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip
@ 2023-05-17 10:51 Anup Patel
  2023-05-17 10:51 ` [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management Anup Patel
                   ` (9 more replies)
  0 siblings, 10 replies; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

This series adds in-kernel AIA irqchip which only trap-n-emulate IMSIC and
APLIC MSI-mode for Guest. The APLIC MSI-mode trap-n-emulate is optional so
KVM user space can emulate APLIC entirely in user space.

The use of IMSIC HW guest files to accelerate IMSIC virtualization will be
done as separate series since this depends on AIA irqchip drivers to be
upstreamed. This series has no dependency on the AIA irqchip drivers.

There is also a KVM AIA irq-bypass (or device MSI virtualization) series
under development which depends on this series and upcoming IOMMU driver
series.

These patches (or this series) can also be found in the
riscv_kvm_aia_irqchip_v1 branch at: https://github.com/avpatel/linux.git

Anup Patel (10):
  RISC-V: KVM: Implement guest external interrupt line management
  RISC-V: KVM: Add IMSIC related defines
  RISC-V: KVM: Add APLIC related defines
  RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
  RISC-V: KVM: Skeletal in-kernel AIA irqchip support
  RISC-V: KVM: Implement device interface for AIA irqchip
  RISC-V: KVM: Add in-kernel emulation of AIA APLIC
  RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
  RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
  RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip

 arch/riscv/include/asm/kvm_aia.h       |  107 ++-
 arch/riscv/include/asm/kvm_aia_aplic.h |   58 ++
 arch/riscv/include/asm/kvm_aia_imsic.h |   38 +
 arch/riscv/include/asm/kvm_host.h      |    4 +
 arch/riscv/include/uapi/asm/kvm.h      |   54 ++
 arch/riscv/kvm/Kconfig                 |    4 +
 arch/riscv/kvm/Makefile                |    3 +
 arch/riscv/kvm/aia.c                   |  274 +++++-
 arch/riscv/kvm/aia_aplic.c             |  617 ++++++++++++++
 arch/riscv/kvm/aia_device.c            |  672 +++++++++++++++
 arch/riscv/kvm/aia_imsic.c             | 1083 ++++++++++++++++++++++++
 arch/riscv/kvm/main.c                  |    3 +-
 arch/riscv/kvm/vcpu.c                  |    2 +
 arch/riscv/kvm/vm.c                    |  115 +++
 include/uapi/linux/kvm.h               |    2 +
 15 files changed, 3003 insertions(+), 33 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_aia_aplic.h
 create mode 100644 arch/riscv/include/asm/kvm_aia_imsic.h
 create mode 100644 arch/riscv/kvm/aia_aplic.c
 create mode 100644 arch/riscv/kvm/aia_device.c
 create mode 100644 arch/riscv/kvm/aia_imsic.c

-- 
2.34.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-06 22:49   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines Anup Patel
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

The RISC-V host will have one guest external interrupt line for each
VS-level IMSICs associated with a HART. The guest external interrupt
lines are per-HART resources and hypervisor can use HGEIE, HGEIP, and
HIE CSRs to manage these guest external interrupt lines.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h |  10 ++
 arch/riscv/kvm/aia.c             | 244 +++++++++++++++++++++++++++++++
 arch/riscv/kvm/main.c            |   3 +-
 arch/riscv/kvm/vcpu.c            |   2 +
 4 files changed, 258 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 1de0717112e5..0938e0cadf80 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -44,10 +44,15 @@ struct kvm_vcpu_aia {
 
 #define irqchip_in_kernel(k)		((k)->arch.aia.in_kernel)
 
+extern unsigned int kvm_riscv_aia_nr_hgei;
 DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 #define kvm_riscv_aia_available() \
 	static_branch_unlikely(&kvm_riscv_aia_available)
 
+static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
+{
+}
+
 #define KVM_RISCV_AIA_IMSIC_TOPEI	(ISELECT_MASK + 1)
 static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
 					       unsigned long isel,
@@ -119,6 +124,11 @@ static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
 {
 }
 
+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+			     void __iomem **hgei_va, phys_addr_t *hgei_pa);
+void kvm_riscv_aia_free_hgei(int cpu, int hgei);
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
+
 void kvm_riscv_aia_enable(void);
 void kvm_riscv_aia_disable(void);
 int kvm_riscv_aia_init(void);
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 4f1286fc7f17..1cee75a8c883 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -8,11 +8,47 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
 #include <asm/hwcap.h>
 
+struct aia_hgei_control {
+	raw_spinlock_t lock;
+	unsigned long free_bitmap;
+	struct kvm_vcpu *owners[BITS_PER_LONG];
+};
+static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
+static int hgei_parent_irq;
+
+unsigned int kvm_riscv_aia_nr_hgei;
 DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 
+static int aia_find_hgei(struct kvm_vcpu *owner)
+{
+	int i, hgei;
+	unsigned long flags;
+	struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+	raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+	hgei = -1;
+	for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
+		if (hgctrl->owners[i] == owner) {
+			hgei = i;
+			break;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+	put_cpu_ptr(&aia_hgei);
+	return hgei;
+}
+
 static void aia_set_hvictl(bool ext_irq_pending)
 {
 	unsigned long hvictl;
@@ -56,6 +92,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
 
 bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
 {
+	int hgei;
 	unsigned long seip;
 
 	if (!kvm_riscv_aia_available())
@@ -74,6 +111,10 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
 	if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
 		return false;
 
+	hgei = aia_find_hgei(vcpu);
+	if (hgei > 0)
+		return !!(csr_read(CSR_HGEIP) & BIT(hgei));
+
 	return false;
 }
 
@@ -348,6 +389,143 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
 	return KVM_INSN_EXIT_TO_USER_SPACE;
 }
 
+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+			     void __iomem **hgei_va, phys_addr_t *hgei_pa)
+{
+	int ret = -ENOENT;
+	unsigned long flags;
+	struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+	if (!kvm_riscv_aia_available() || !hgctrl)
+		return -ENODEV;
+
+	raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+	if (hgctrl->free_bitmap) {
+		ret = __ffs(hgctrl->free_bitmap);
+		hgctrl->free_bitmap &= ~BIT(ret);
+		hgctrl->owners[ret] = owner;
+	}
+
+	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+	/* TODO: To be updated later by AIA in-kernel irqchip support */
+	if (hgei_va)
+		*hgei_va = NULL;
+	if (hgei_pa)
+		*hgei_pa = 0;
+
+	return ret;
+}
+
+void kvm_riscv_aia_free_hgei(int cpu, int hgei)
+{
+	unsigned long flags;
+	struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+	if (!kvm_riscv_aia_available() || !hgctrl)
+		return;
+
+	raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+	if (hgei > 0 && hgei <= kvm_riscv_aia_nr_hgei) {
+		if (!(hgctrl->free_bitmap & BIT(hgei))) {
+			hgctrl->free_bitmap |= BIT(hgei);
+			hgctrl->owners[hgei] = NULL;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+}
+
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
+{
+	int hgei;
+
+	if (!kvm_riscv_aia_available())
+		return;
+
+	hgei = aia_find_hgei(owner);
+	if (hgei > 0) {
+		if (enable)
+			csr_set(CSR_HGEIE, BIT(hgei));
+		else
+			csr_clear(CSR_HGEIE, BIT(hgei));
+	}
+}
+
+static irqreturn_t hgei_interrupt(int irq, void *dev_id)
+{
+	int i;
+	unsigned long hgei_mask, flags;
+	struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+	hgei_mask = csr_read(CSR_HGEIP) & csr_read(CSR_HGEIE);
+	csr_clear(CSR_HGEIE, hgei_mask);
+
+	raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+	for_each_set_bit(i, &hgei_mask, BITS_PER_LONG) {
+		if (hgctrl->owners[i])
+			kvm_vcpu_kick(hgctrl->owners[i]);
+	}
+
+	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+	put_cpu_ptr(&aia_hgei);
+	return IRQ_HANDLED;
+}
+
+static int aia_hgei_init(void)
+{
+	int cpu, rc;
+	struct irq_domain *domain;
+	struct aia_hgei_control *hgctrl;
+
+	/* Initialize per-CPU guest external interrupt line management */
+	for_each_possible_cpu(cpu) {
+		hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+		raw_spin_lock_init(&hgctrl->lock);
+		if (kvm_riscv_aia_nr_hgei) {
+			hgctrl->free_bitmap =
+				BIT(kvm_riscv_aia_nr_hgei + 1) - 1;
+			hgctrl->free_bitmap &= ~BIT(0);
+		} else
+			hgctrl->free_bitmap = 0;
+	}
+
+	/* Find INTC irq domain */
+	domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+					  DOMAIN_BUS_ANY);
+	if (!domain) {
+		kvm_err("unable to find INTC domain\n");
+		return -ENOENT;
+	}
+
+	/* Map per-CPU SGEI interrupt from INTC domain */
+	hgei_parent_irq = irq_create_mapping(domain, IRQ_S_GEXT);
+	if (!hgei_parent_irq) {
+		kvm_err("unable to map SGEI IRQ\n");
+		return -ENOMEM;
+	}
+
+	/* Request per-CPU SGEI interrupt */
+	rc = request_percpu_irq(hgei_parent_irq, hgei_interrupt,
+				"riscv-kvm", &aia_hgei);
+	if (rc) {
+		kvm_err("failed to request SGEI IRQ\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static void aia_hgei_exit(void)
+{
+	/* Free per-CPU SGEI interrupt */
+	free_percpu_irq(hgei_parent_irq, &aia_hgei);
+}
+
 void kvm_riscv_aia_enable(void)
 {
 	if (!kvm_riscv_aia_available())
@@ -362,21 +540,82 @@ void kvm_riscv_aia_enable(void)
 	csr_write(CSR_HVIPRIO1H, 0x0);
 	csr_write(CSR_HVIPRIO2H, 0x0);
 #endif
+
+	/* Enable per-CPU SGEI interrupt */
+	enable_percpu_irq(hgei_parent_irq,
+			  irq_get_trigger_type(hgei_parent_irq));
+	csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
 }
 
 void kvm_riscv_aia_disable(void)
 {
+	int i;
+	unsigned long flags;
+	struct kvm_vcpu *vcpu;
+	struct aia_hgei_control *hgctrl;
+
 	if (!kvm_riscv_aia_available())
 		return;
+	hgctrl = get_cpu_ptr(&aia_hgei);
+
+	/* Disable per-CPU SGEI interrupt */
+	csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
+	disable_percpu_irq(hgei_parent_irq);
 
 	aia_set_hvictl(false);
+
+	raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+	for (i = 0; i <= kvm_riscv_aia_nr_hgei; i++) {
+		vcpu = hgctrl->owners[i];
+		if (!vcpu)
+			continue;
+
+		/*
+		 * We release hgctrl->lock before notifying IMSIC
+		 * so that we don't have lock ordering issues.
+		 */
+		raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+		/* Notify IMSIC */
+		kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+		/*
+		 * Wakeup VCPU if it was blocked so that it can
+		 * run on other HARTs
+		 */
+		if (csr_read(CSR_HGEIE) & BIT(i)) {
+			csr_clear(CSR_HGEIE, BIT(i));
+			kvm_vcpu_kick(vcpu);
+		}
+
+		raw_spin_lock_irqsave(&hgctrl->lock, flags);
+	}
+
+	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+	put_cpu_ptr(&aia_hgei);
 }
 
 int kvm_riscv_aia_init(void)
 {
+	int rc;
+
 	if (!riscv_isa_extension_available(NULL, SxAIA))
 		return -ENODEV;
 
+	/* Figure-out number of bits in HGEIE */
+	csr_write(CSR_HGEIE, -1UL);
+	kvm_riscv_aia_nr_hgei = fls_long(csr_read(CSR_HGEIE));
+	csr_write(CSR_HGEIE, 0);
+	if (kvm_riscv_aia_nr_hgei)
+		kvm_riscv_aia_nr_hgei--;
+
+	/* Initialize guest external interrupt line management */
+	rc = aia_hgei_init();
+	if (rc)
+		return rc;
+
 	/* Enable KVM AIA support */
 	static_branch_enable(&kvm_riscv_aia_available);
 
@@ -385,4 +624,9 @@ int kvm_riscv_aia_init(void)
 
 void kvm_riscv_aia_exit(void)
 {
+	if (!kvm_riscv_aia_available())
+		return;
+
+	/* Cleanup the HGEI state */
+	aia_hgei_exit();
 }
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index a7112d583637..48ae0d4b3932 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -116,7 +116,8 @@ static int __init riscv_kvm_init(void)
 	kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
 
 	if (kvm_riscv_aia_available())
-		kvm_info("AIA available\n");
+		kvm_info("AIA available with %d guest external interrupts\n",
+			 kvm_riscv_aia_nr_hgei);
 
 	rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 	if (rc) {
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8bd9f2a8a0b9..2db62c6c0d3e 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -250,10 +250,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
+	kvm_riscv_aia_wakeon_hgei(vcpu, true);
 }
 
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
+	kvm_riscv_aia_wakeon_hgei(vcpu, false);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
  2023-05-17 10:51 ` [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-06 22:51   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 03/10] RISC-V: KVM: Add APLIC " Anup Patel
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We add IMSIC related defines in a separate header so that different
parts of KVM code can share it. Once AIA drivers are merged will
have a common IMSIC header shared by both KVM and IRQCHIP driver.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia_imsic.h | 38 ++++++++++++++++++++++++++
 arch/riscv/kvm/aia.c                   |  3 +-
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/kvm_aia_imsic.h

diff --git a/arch/riscv/include/asm/kvm_aia_imsic.h b/arch/riscv/include/asm/kvm_aia_imsic.h
new file mode 100644
index 000000000000..da5881d2bde0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_aia_imsic.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __KVM_RISCV_AIA_IMSIC_H
+#define __KVM_RISCV_AIA_IMSIC_H
+
+#include <linux/types.h>
+#include <asm/csr.h>
+
+#define IMSIC_MMIO_PAGE_SHIFT		12
+#define IMSIC_MMIO_PAGE_SZ		(1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_PAGE_LE		0x00
+#define IMSIC_MMIO_PAGE_BE		0x04
+
+#define IMSIC_MIN_ID			63
+#define IMSIC_MAX_ID			2048
+
+#define IMSIC_EIDELIVERY		0x70
+
+#define IMSIC_EITHRESHOLD		0x72
+
+#define IMSIC_EIP0			0x80
+#define IMSIC_EIP63			0xbf
+#define IMSIC_EIPx_BITS			32
+
+#define IMSIC_EIE0			0xc0
+#define IMSIC_EIE63			0xff
+#define IMSIC_EIEx_BITS			32
+
+#define IMSIC_FIRST			IMSIC_EIDELIVERY
+#define IMSIC_LAST			IMSIC_EIE63
+
+#define IMSIC_MMIO_SETIPNUM_LE		0x00
+#define IMSIC_MMIO_SETIPNUM_BE		0x04
+
+#endif
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 1cee75a8c883..c78c06d99e39 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -15,6 +15,7 @@
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
 #include <asm/hwcap.h>
+#include <asm/kvm_aia_imsic.h>
 
 struct aia_hgei_control {
 	raw_spinlock_t lock;
@@ -364,8 +365,6 @@ static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
 	return KVM_INSN_CONTINUE_NEXT_SEPC;
 }
 
-#define IMSIC_FIRST	0x70
-#define IMSIC_LAST	0xff
 int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
 				unsigned long *val, unsigned long new_val,
 				unsigned long wr_mask)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 03/10] RISC-V: KVM: Add APLIC related defines
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
  2023-05-17 10:51 ` [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management Anup Patel
  2023-05-17 10:51 ` [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-06 22:51   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero Anup Patel
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We add APLIC related defines in a separate header so that different
parts of KVM code can share it. Once AIA drivers are merged will
have a common APLIC header shared by both KVM and IRQCHIP driver.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia_aplic.h | 58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 arch/riscv/include/asm/kvm_aia_aplic.h

diff --git a/arch/riscv/include/asm/kvm_aia_aplic.h b/arch/riscv/include/asm/kvm_aia_aplic.h
new file mode 100644
index 000000000000..6dd1a4809ec1
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_aia_aplic.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __KVM_RISCV_AIA_IMSIC_H
+#define __KVM_RISCV_AIA_IMSIC_H
+
+#include <linux/bitops.h>
+
+#define APLIC_MAX_IDC			BIT(14)
+#define APLIC_MAX_SOURCE		1024
+
+#define APLIC_DOMAINCFG			0x0000
+#define APLIC_DOMAINCFG_RDONLY		0x80000000
+#define APLIC_DOMAINCFG_IE		BIT(8)
+#define APLIC_DOMAINCFG_DM		BIT(2)
+#define APLIC_DOMAINCFG_BE		BIT(0)
+
+#define APLIC_SOURCECFG_BASE		0x0004
+#define APLIC_SOURCECFG_D		BIT(10)
+#define APLIC_SOURCECFG_CHILDIDX_MASK	0x000003ff
+#define APLIC_SOURCECFG_SM_MASK	0x00000007
+#define APLIC_SOURCECFG_SM_INACTIVE	0x0
+#define APLIC_SOURCECFG_SM_DETACH	0x1
+#define APLIC_SOURCECFG_SM_EDGE_RISE	0x4
+#define APLIC_SOURCECFG_SM_EDGE_FALL	0x5
+#define APLIC_SOURCECFG_SM_LEVEL_HIGH	0x6
+#define APLIC_SOURCECFG_SM_LEVEL_LOW	0x7
+
+#define APLIC_IRQBITS_PER_REG		32
+
+#define APLIC_SETIP_BASE		0x1c00
+#define APLIC_SETIPNUM			0x1cdc
+
+#define APLIC_CLRIP_BASE		0x1d00
+#define APLIC_CLRIPNUM			0x1ddc
+
+#define APLIC_SETIE_BASE		0x1e00
+#define APLIC_SETIENUM			0x1edc
+
+#define APLIC_CLRIE_BASE		0x1f00
+#define APLIC_CLRIENUM			0x1fdc
+
+#define APLIC_SETIPNUM_LE		0x2000
+#define APLIC_SETIPNUM_BE		0x2004
+
+#define APLIC_GENMSI			0x3000
+
+#define APLIC_TARGET_BASE		0x3004
+#define APLIC_TARGET_HART_IDX_SHIFT	18
+#define APLIC_TARGET_HART_IDX_MASK	0x3fff
+#define APLIC_TARGET_GUEST_IDX_SHIFT	12
+#define APLIC_TARGET_GUEST_IDX_MASK	0x3f
+#define APLIC_TARGET_IPRIO_MASK	0xff
+#define APLIC_TARGET_EIID_MASK	0x7ff
+
+#endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (2 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 03/10] RISC-V: KVM: Add APLIC " Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-06 22:53   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support Anup Patel
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We hard-code the kvm_riscv_aia_nr_hgei to zero until IMSIC HW
guest file support is added in KVM RISC-V.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/kvm/aia.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index c78c06d99e39..3f97575707eb 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -408,7 +408,7 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
 
 	raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
 
-	/* TODO: To be updated later by AIA in-kernel irqchip support */
+	/* TODO: To be updated later by AIA IMSIC HW guest file support */
 	if (hgei_va)
 		*hgei_va = NULL;
 	if (hgei_pa)
@@ -610,6 +610,14 @@ int kvm_riscv_aia_init(void)
 	if (kvm_riscv_aia_nr_hgei)
 		kvm_riscv_aia_nr_hgei--;
 
+	/*
+	 * Number of usable HGEI lines should be minimum of per-HART
+	 * IMSIC guest files and number of bits in HGEIE
+	 *
+	 * TODO: To be updated later by AIA IMSIC HW guest file support
+	 */
+	kvm_riscv_aia_nr_hgei = 0;
+
 	/* Initialize guest external interrupt line management */
 	rc = aia_hgei_init();
 	if (rc)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (3 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-06 23:19   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip Anup Patel
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

To incrementally implement in-kernel AIA irqchip support, we first
add minimal skeletal support which only compiles but does not provide
any functionality.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h  |  20 ++++++
 arch/riscv/include/asm/kvm_host.h |   4 ++
 arch/riscv/include/uapi/asm/kvm.h |   4 ++
 arch/riscv/kvm/Kconfig            |   4 ++
 arch/riscv/kvm/aia.c              |   8 +++
 arch/riscv/kvm/vm.c               | 115 ++++++++++++++++++++++++++++++
 6 files changed, 155 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 0938e0cadf80..3bc0a0e47a15 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -45,6 +45,7 @@ struct kvm_vcpu_aia {
 #define irqchip_in_kernel(k)		((k)->arch.aia.in_kernel)
 
 extern unsigned int kvm_riscv_aia_nr_hgei;
+extern unsigned int kvm_riscv_aia_max_ids;
 DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 #define kvm_riscv_aia_available() \
 	static_branch_unlikely(&kvm_riscv_aia_available)
@@ -116,6 +117,25 @@ static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm,
+						 u32 hart_index,
+						 u32 guest_index, u32 iid)
+{
+	return 0;
+}
+
+static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm,
+					   struct kvm_msi *msi)
+{
+	return 0;
+}
+
+static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm,
+					   unsigned int irq, bool level)
+{
+	return 0;
+}
+
 static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
 {
 }
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index ee0acccb1d3b..871432586a63 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,8 @@
 
 #define KVM_VCPU_MAX_FEATURES		0
 
+#define KVM_IRQCHIP_NUM_PINS		1024
+
 #define KVM_REQ_SLEEP \
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_VCPU_RESET		KVM_ARCH_REQ(1)
@@ -318,6 +320,8 @@ int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
 bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
 void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
 
+int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines);
+
 void __kvm_riscv_unpriv_trap(void);
 
 unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index f92790c9481a..332d4a274891 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -15,6 +15,7 @@
 #include <asm/bitsperlong.h>
 #include <asm/ptrace.h>
 
+#define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -203,6 +204,9 @@ enum KVM_RISCV_SBI_EXT_ID {
 #define KVM_REG_RISCV_SBI_MULTI_REG_LAST	\
 		KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
 
+/* One single KVM irqchip, ie. the AIA */
+#define KVM_NR_IRQCHIPS			1
+
 #endif
 
 #endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 28891e583259..dfc237d7875b 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -21,6 +21,10 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
 	depends on RISCV_SBI && MMU
 	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_MSI
 	select HAVE_KVM_VCPU_ASYNC_IOCTL
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 3f97575707eb..18c442c15ff2 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -26,6 +26,7 @@ static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
 static int hgei_parent_irq;
 
 unsigned int kvm_riscv_aia_nr_hgei;
+unsigned int kvm_riscv_aia_max_ids;
 DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 
 static int aia_find_hgei(struct kvm_vcpu *owner)
@@ -618,6 +619,13 @@ int kvm_riscv_aia_init(void)
 	 */
 	kvm_riscv_aia_nr_hgei = 0;
 
+	/*
+	 * Find number of guest MSI IDs
+	 *
+	 * TODO: To be updated later by AIA IMSIC HW guest file support
+	 */
+	kvm_riscv_aia_max_ids = IMSIC_MAX_ID;
+
 	/* Initialize guest external interrupt line management */
 	rc = aia_hgei_init();
 	if (rc)
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 6ef15f78e80f..d2349326b2ce 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -55,6 +55,121 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_riscv_aia_destroy_vm(kvm);
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irql,
+			  bool line_status)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	return kvm_riscv_aia_inject_irq(kvm, irql->irq, irql->level);
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id,
+		int level, bool line_status)
+{
+	struct kvm_msi msi;
+
+	if (!level)
+		return -1;
+
+	msi.address_lo = e->msi.address_lo;
+	msi.address_hi = e->msi.address_hi;
+	msi.data = e->msi.data;
+	msi.flags = e->msi.flags;
+	msi.devid = e->msi.devid;
+
+	return kvm_riscv_aia_inject_msi(kvm, &msi);
+}
+
+static int kvm_riscv_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			     struct kvm *kvm, int irq_source_id,
+			     int level, bool line_status)
+{
+	return kvm_riscv_aia_inject_irq(kvm, e->irqchip.pin, level);
+}
+
+int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines)
+{
+	struct kvm_irq_routing_entry *ents;
+	int i, rc;
+
+	ents = kcalloc(lines, sizeof(*ents), GFP_KERNEL);
+	if (!ents)
+		return -ENOMEM;
+
+	for (i = 0; i < lines; i++) {
+		ents[i].gsi = i;
+		ents[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+		ents[i].u.irqchip.irqchip = 0;
+		ents[i].u.irqchip.pin = i;
+	}
+	rc = kvm_set_irq_routing(kvm, ents, lines, 0);
+	kfree(ents);
+
+	return rc;
+}
+
+bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		e->set = kvm_riscv_set_irq;
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin;
+		if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
+		    (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
+			goto out;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		e->msi.flags = ue->flags;
+		e->msi.devid = ue->u.msi.devid;
+		break;
+	default:
+		goto out;
+	}
+	r = 0;
+out:
+	return r;
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
+{
+	if (!level)
+		return -EWOULDBLOCK;
+
+	switch (e->type) {
+	case KVM_IRQ_ROUTING_MSI:
+		return kvm_set_msi(e, kvm, irq_source_id, level, line_status);
+
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		return kvm_riscv_set_irq(e, kvm, irq_source_id,
+					 level, line_status);
+	}
+
+	return -EWOULDBLOCK;
+}
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (4 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-07  0:13   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC Anup Patel
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We implement KVM device interface for in-kernel AIA irqchip so that
user-space can use KVM device ioctls to create, configure, and destroy
in-kernel AIA irqchip.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h  | 132 +++++--
 arch/riscv/include/uapi/asm/kvm.h |  36 ++
 arch/riscv/kvm/Makefile           |   1 +
 arch/riscv/kvm/aia.c              |  11 +
 arch/riscv/kvm/aia_device.c       | 622 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |   2 +
 6 files changed, 762 insertions(+), 42 deletions(-)
 create mode 100644 arch/riscv/kvm/aia_device.c

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 3bc0a0e47a15..a1281ebc9b92 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -20,6 +20,33 @@ struct kvm_aia {
 
 	/* In-kernel irqchip initialized */
 	bool		initialized;
+
+	/* Virtualization mode (Emulation, HW Accelerated, or Auto) */
+	u32		mode;
+
+	/* Number of MSIs */
+	u32		nr_ids;
+
+	/* Number of wired IRQs */
+	u32		nr_sources;
+
+	/* Number of group bits in IMSIC address */
+	u32		nr_group_bits;
+
+	/* Position of group bits in IMSIC address */
+	u32		nr_group_shift;
+
+	/* Number of hart bits in IMSIC address */
+	u32		nr_hart_bits;
+
+	/* Number of guest bits in IMSIC address */
+	u32		nr_guest_bits;
+
+	/* Guest physical address of APLIC */
+	gpa_t		aplic_addr;
+
+	/* Internal state of APLIC */
+	void		*aplic_state;
 };
 
 struct kvm_vcpu_aia_csr {
@@ -38,8 +65,19 @@ struct kvm_vcpu_aia {
 
 	/* CPU AIA CSR context upon Guest VCPU reset */
 	struct kvm_vcpu_aia_csr guest_reset_csr;
+
+	/* Guest physical address of IMSIC for this VCPU */
+	gpa_t		imsic_addr;
+
+	/* HART index of IMSIC extacted from guest physical address */
+	u32		hart_index;
+
+	/* Internal state of IMSIC for this VCPU */
+	void		*imsic_state;
 };
 
+#define KVM_RISCV_AIA_UNDEF_ADDR	(-1)
+
 #define kvm_riscv_aia_initialized(k)	((k)->arch.aia.initialized)
 
 #define irqchip_in_kernel(k)		((k)->arch.aia.in_kernel)
@@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 #define kvm_riscv_aia_available() \
 	static_branch_unlikely(&kvm_riscv_aia_available)
 
+extern struct kvm_device_ops kvm_riscv_aia_device_ops;
+
 static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 #define KVM_RISCV_AIA_IMSIC_TOPEI	(ISELECT_MASK + 1)
 static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
 					       unsigned long isel,
@@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+						  u32 guest_index, u32 offset,
+						  u32 iid)
+{
+	return 0;
+}
+
+static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
+					     u32 source, bool level)
+{
+	return 0;
+}
+
+static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
+{
+}
+
 #ifdef CONFIG_32BIT
 void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu);
@@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
 { .base = CSR_SIREG,      .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \
 { .base = CSR_STOPEI,     .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei },
 
-static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
-{
-	return 1;
-}
-
-static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm,
-						 u32 hart_index,
-						 u32 guest_index, u32 iid)
-{
-	return 0;
-}
-
-static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm,
-					   struct kvm_msi *msi)
-{
-	return 0;
-}
+int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu);
 
-static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm,
-					   unsigned int irq, bool level)
-{
-	return 0;
-}
+int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
+				   u32 guest_index, u32 iid);
+int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
+int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level);
 
-static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
-{
-}
-
-static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
-{
-}
+void kvm_riscv_aia_init_vm(struct kvm *kvm);
+void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
 
 int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
 			     void __iomem **hgei_va, phys_addr_t *hgei_pa);
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 332d4a274891..57f8d8bb498e 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -204,6 +204,42 @@ enum KVM_RISCV_SBI_EXT_ID {
 #define KVM_REG_RISCV_SBI_MULTI_REG_LAST	\
 		KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
 
+/* Device Control API: RISC-V AIA */
+#define KVM_DEV_RISCV_APLIC_ALIGN		0x1000
+#define KVM_DEV_RISCV_APLIC_SIZE		0x4000
+#define KVM_DEV_RISCV_APLIC_MAX_HARTS		0x4000
+#define KVM_DEV_RISCV_IMSIC_ALIGN		0x1000
+#define KVM_DEV_RISCV_IMSIC_SIZE		0x1000
+
+#define KVM_DEV_RISCV_AIA_GRP_CONFIG		0
+#define KVM_DEV_RISCV_AIA_CONFIG_MODE		0
+#define KVM_DEV_RISCV_AIA_CONFIG_IDS		1
+#define KVM_DEV_RISCV_AIA_CONFIG_SRCS		2
+#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS	3
+#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT	4
+#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS	5
+#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS	6
+#define KVM_DEV_RISCV_AIA_MODE_EMUL		0
+#define KVM_DEV_RISCV_AIA_MODE_HWACCEL		1
+#define KVM_DEV_RISCV_AIA_MODE_AUTO		2
+#define KVM_DEV_RISCV_AIA_IDS_MIN		63
+#define KVM_DEV_RISCV_AIA_IDS_MAX		2048
+#define KVM_DEV_RISCV_AIA_SRCS_MAX		1024
+#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX	8
+#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN	24
+#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX	56
+#define KVM_DEV_RISCV_AIA_HART_BITS_MAX	16
+#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX	8
+
+#define KVM_DEV_RISCV_AIA_GRP_ADDR		1
+#define KVM_DEV_RISCV_AIA_ADDR_APLIC		0
+#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu)	(1 + (__vcpu))
+#define KVM_DEV_RISCV_AIA_ADDR_MAX		\
+		(1 + KVM_DEV_RISCV_APLIC_MAX_HARTS)
+
+#define KVM_DEV_RISCV_AIA_GRP_CTRL		2
+#define KVM_DEV_RISCV_AIA_CTRL_INIT		0
+
 /* One single KVM irqchip, ie. the AIA */
 #define KVM_NR_IRQCHIPS			1
 
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 8031b8912a0d..dd69ebe098bd 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o
 kvm-y += vcpu_timer.o
 kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
 kvm-y += aia.o
+kvm-y += aia_device.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 18c442c15ff2..585a3b42c52c 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void)
 	if (rc)
 		return rc;
 
+	/* Register device operations */
+	rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops,
+				     KVM_DEV_TYPE_RISCV_AIA);
+	if (rc) {
+		aia_hgei_exit();
+		return rc;
+	}
+
 	/* Enable KVM AIA support */
 	static_branch_enable(&kvm_riscv_aia_available);
 
@@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void)
 	if (!kvm_riscv_aia_available())
 		return;
 
+	/* Unregister device operations */
+	kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA);
+
 	/* Cleanup the HGEI state */
 	aia_hgei_exit();
 }
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
new file mode 100644
index 000000000000..a151fb357887
--- /dev/null
+++ b/arch/riscv/kvm/aia_device.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *	Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_aia_imsic.h>
+
+static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
+{
+	struct kvm_vcpu *tmp_vcpu;
+
+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+		tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+		mutex_unlock(&tmp_vcpu->mutex);
+	}
+}
+
+static void unlock_all_vcpus(struct kvm *kvm)
+{
+	unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
+}
+
+static bool lock_all_vcpus(struct kvm *kvm)
+{
+	struct kvm_vcpu *tmp_vcpu;
+	unsigned long c;
+
+	kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
+		if (!mutex_trylock(&tmp_vcpu->mutex)) {
+			unlock_vcpus(kvm, c - 1);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int aia_create(struct kvm_device *dev, u32 type)
+{
+	int ret;
+	unsigned long i;
+	struct kvm *kvm = dev->kvm;
+	struct kvm_vcpu *vcpu;
+
+	if (irqchip_in_kernel(kvm))
+		return -EEXIST;
+
+	ret = -EBUSY;
+	if (!lock_all_vcpus(kvm))
+		return ret;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.ran_atleast_once)
+			goto out_unlock;
+	}
+	ret = 0;
+
+	kvm->arch.aia.in_kernel = true;
+
+out_unlock:
+	unlock_all_vcpus(kvm);
+	return ret;
+}
+
+static void aia_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int aia_config(struct kvm *kvm, unsigned long type,
+		      u32 *nr, bool write)
+{
+	struct kvm_aia *aia = &kvm->arch.aia;
+
+	/* Writes can only be done before irqchip is initialized */
+	if (write && kvm_riscv_aia_initialized(kvm))
+		return -EBUSY;
+
+	switch (type) {
+	case KVM_DEV_RISCV_AIA_CONFIG_MODE:
+		if (write) {
+			switch (*nr) {
+			case KVM_DEV_RISCV_AIA_MODE_EMUL:
+				break;
+			case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
+			case KVM_DEV_RISCV_AIA_MODE_AUTO:
+				/*
+				 * HW Acceleration and Auto modes only
+				 * supported on host with non-zero guest
+				 * external interrupts (i.e. non-zero
+				 * VS-level IMSIC pages).
+				 */
+				if (!kvm_riscv_aia_nr_hgei)
+					return -EINVAL;
+				break;
+			default:
+				return -EINVAL;
+			};
+			aia->mode = *nr;
+		} else
+			*nr = aia->mode;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_IDS:
+		if (write) {
+			if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) ||
+			    (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) ||
+			    (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) ||
+			    (kvm_riscv_aia_max_ids <= *nr))
+				return -EINVAL;
+			aia->nr_ids = *nr;
+		} else
+			*nr = aia->nr_ids;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
+		if (write) {
+			if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) ||
+			    (*nr >= kvm_riscv_aia_max_ids))
+				return -EINVAL;
+			aia->nr_sources = *nr;
+		} else
+			*nr = aia->nr_sources;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
+		if (write) {
+			if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX)
+				return -EINVAL;
+			aia->nr_group_bits = *nr;
+		} else
+			*nr = aia->nr_group_bits;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
+		if (write) {
+			if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) ||
+			    (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX))
+				return -EINVAL;
+			aia->nr_group_shift = *nr;
+		} else
+			*nr = aia->nr_group_shift;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
+		if (write) {
+			if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX)
+				return -EINVAL;
+			aia->nr_hart_bits = *nr;
+		} else
+			*nr = aia->nr_hart_bits;
+		break;
+	case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
+		if (write) {
+			if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX)
+				return -EINVAL;
+			aia->nr_guest_bits = *nr;
+		} else
+			*nr = aia->nr_guest_bits;
+		break;
+	default:
+		return -ENXIO;
+	};
+
+	return 0;
+}
+
+static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write)
+{
+	struct kvm_aia *aia = &kvm->arch.aia;
+
+	if (write) {
+		/* Writes can only be done before irqchip is initialized */
+		if (kvm_riscv_aia_initialized(kvm))
+			return -EBUSY;
+
+		if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1))
+			return -EINVAL;
+
+		aia->aplic_addr = *addr;
+	} else
+		*addr = aia->aplic_addr;
+
+	return 0;
+}
+
+static int aia_imsic_addr(struct kvm *kvm, u64 *addr,
+			  unsigned long vcpu_idx, bool write)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu_aia *vcpu_aia;
+
+	vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+	if (!vcpu)
+		return -EINVAL;
+	vcpu_aia = &vcpu->arch.aia_context;
+
+	if (write) {
+		/* Writes can only be done before irqchip is initialized */
+		if (kvm_riscv_aia_initialized(kvm))
+			return -EBUSY;
+
+		if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1))
+			return -EINVAL;
+	}
+
+	mutex_lock(&vcpu->mutex);
+	if (write)
+		vcpu_aia->imsic_addr = *addr;
+	else
+		*addr = vcpu_aia->imsic_addr;
+	mutex_unlock(&vcpu->mutex);
+
+	return 0;
+}
+
+static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
+{
+	u32 h, l;
+	gpa_t mask = 0;
+
+	h = aia->nr_hart_bits + aia->nr_guest_bits +
+	    IMSIC_MMIO_PAGE_SHIFT - 1;
+	mask = GENMASK_ULL(h, 0);
+
+	if (aia->nr_group_bits) {
+		h = aia->nr_group_bits + aia->nr_group_shift - 1;
+		l = aia->nr_group_shift;
+		mask |= GENMASK_ULL(h, l);
+	}
+
+	return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT;
+}
+
+static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
+{
+	u32 hart, group = 0;
+
+	hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+		GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+	if (aia->nr_group_bits)
+		group = (addr >> aia->nr_group_shift) &
+			GENMASK_ULL(aia->nr_group_bits - 1, 0);
+
+	return (group << aia->nr_hart_bits) | hart;
+}
+
+static int aia_init(struct kvm *kvm)
+{
+	int ret, i;
+	unsigned long idx;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu_aia *vaia;
+	struct kvm_aia *aia = &kvm->arch.aia;
+	gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR;
+
+	/* Irqchip can be initialized only once */
+	if (kvm_riscv_aia_initialized(kvm))
+		return -EBUSY;
+
+	/* We might be in the middle of creating a VCPU? */
+	if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+		return -EBUSY;
+
+	/* Number of sources should be less than or equals number of IDs */
+	if (aia->nr_ids < aia->nr_sources)
+		return -EINVAL;
+
+	/* APLIC base is required for non-zero number of sources */
+	if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR)
+		return -EINVAL;
+
+	/* Initialize APLIC */
+	ret = kvm_riscv_aia_aplic_init(kvm);
+	if (ret)
+		return ret;
+
+	/* Iterate over each VCPU */
+	kvm_for_each_vcpu(idx, vcpu, kvm) {
+		vaia = &vcpu->arch.aia_context;
+
+		/* IMSIC base is required */
+		if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) {
+			ret = -EINVAL;
+			goto fail_cleanup_imsics;
+		}
+
+		/* All IMSICs should have matching base PPN */
+		if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR)
+			base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr);
+		if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) {
+			ret = -EINVAL;
+			goto fail_cleanup_imsics;
+		}
+
+		/* Update HART index of the IMSIC based on IMSIC base */
+		vaia->hart_index = aia_imsic_hart_index(aia,
+							vaia->imsic_addr);
+
+		/* Initialize IMSIC for this VCPU */
+		ret = kvm_riscv_vcpu_aia_imsic_init(vcpu);
+		if (ret)
+			goto fail_cleanup_imsics;
+	}
+
+	/* Set the initialized flag */
+	kvm->arch.aia.initialized = true;
+
+	return 0;
+
+fail_cleanup_imsics:
+	for (i = idx - 1; i >= 0; i--) {
+		vcpu = kvm_get_vcpu(kvm, i);
+		if (!vcpu)
+			continue;
+		kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
+	}
+	kvm_riscv_aia_aplic_cleanup(kvm);
+	return ret;
+}
+
+static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	u32 nr;
+	u64 addr;
+	int nr_vcpus, r = -ENXIO;
+	unsigned long type = (unsigned long)attr->attr;
+	void __user *uaddr = (void __user *)(long)attr->addr;
+
+	switch (attr->group) {
+	case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+		if (copy_from_user(&nr, uaddr, sizeof(nr)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = aia_config(dev->kvm, type, &nr, true);
+		mutex_unlock(&dev->kvm->lock);
+
+		break;
+
+	case KVM_DEV_RISCV_AIA_GRP_ADDR:
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+		mutex_lock(&dev->kvm->lock);
+		if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+			r = aia_aplic_addr(dev->kvm, &addr, true);
+		else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+			r = aia_imsic_addr(dev->kvm, &addr,
+			    type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true);
+		mutex_unlock(&dev->kvm->lock);
+
+		break;
+
+	case KVM_DEV_RISCV_AIA_GRP_CTRL:
+		switch (type) {
+		case KVM_DEV_RISCV_AIA_CTRL_INIT:
+			mutex_lock(&dev->kvm->lock);
+			r = aia_init(dev->kvm);
+			mutex_unlock(&dev->kvm->lock);
+			break;
+		}
+
+		break;
+	}
+
+	return r;
+}
+
+static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	u32 nr;
+	u64 addr;
+	int nr_vcpus, r = -ENXIO;
+	void __user *uaddr = (void __user *)(long)attr->addr;
+	unsigned long type = (unsigned long)attr->attr;
+
+	switch (attr->group) {
+	case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+		if (copy_from_user(&nr, uaddr, sizeof(nr)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = aia_config(dev->kvm, type, &nr, false);
+		mutex_unlock(&dev->kvm->lock);
+		if (r)
+			return r;
+
+		if (copy_to_user(uaddr, &nr, sizeof(nr)))
+			return -EFAULT;
+
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_ADDR:
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+		mutex_lock(&dev->kvm->lock);
+		if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+			r = aia_aplic_addr(dev->kvm, &addr, false);
+		else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+			r = aia_imsic_addr(dev->kvm, &addr,
+			    type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false);
+		mutex_unlock(&dev->kvm->lock);
+		if (r)
+			return r;
+
+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
+			return -EFAULT;
+
+		break;
+	}
+
+	return r;
+}
+
+static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int nr_vcpus;
+
+	switch (attr->group) {
+	case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+		switch (attr->attr) {
+		case KVM_DEV_RISCV_AIA_CONFIG_MODE:
+		case KVM_DEV_RISCV_AIA_CONFIG_IDS:
+		case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
+		case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
+		case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
+		case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
+		case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
+			return 0;
+		}
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_ADDR:
+		nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+		if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+			return 0;
+		else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+			return 0;
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_RISCV_AIA_CTRL_INIT:
+			return 0;
+		}
+		break;
+	}
+
+	return -ENXIO;
+}
+
+struct kvm_device_ops kvm_riscv_aia_device_ops = {
+	.name = "kvm-riscv-aia",
+	.create = aia_create,
+	.destroy = aia_destroy,
+	.set_attr = aia_set_attr,
+	.get_attr = aia_get_attr,
+	.has_attr = aia_has_attr,
+};
+
+int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
+{
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(vcpu->kvm))
+		return 1;
+
+	/* Update the IMSIC HW state before entering guest mode */
+	return kvm_riscv_vcpu_aia_imsic_update(vcpu);
+}
+
+void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
+	struct kvm_vcpu_aia_csr *reset_csr =
+				&vcpu->arch.aia_context.guest_reset_csr;
+
+	if (!kvm_riscv_aia_available())
+		return;
+	memcpy(csr, reset_csr, sizeof(*csr));
+
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(vcpu->kvm))
+		return;
+
+	/* Reset the IMSIC context */
+	kvm_riscv_vcpu_aia_imsic_reset(vcpu);
+}
+
+int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
+
+	if (!kvm_riscv_aia_available())
+		return 0;
+
+	/*
+	 * We don't do any memory allocations over here because these
+	 * will be done after AIA device is initialized by the user-space.
+	 *
+	 * Refer, aia_init() implementation for more details.
+	 */
+
+	/* Initialize default values in AIA vcpu context */
+	vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
+	vaia->hart_index = vcpu->vcpu_idx;
+
+	return 0;
+}
+
+void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
+{
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(vcpu->kvm))
+		return;
+
+	/* Cleanup IMSIC context */
+	kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
+}
+
+int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
+				   u32 guest_index, u32 iid)
+{
+	unsigned long idx;
+	struct kvm_vcpu *vcpu;
+
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(kvm))
+		return -EBUSY;
+
+	/* Inject MSI to matching VCPU */
+	kvm_for_each_vcpu(idx, vcpu, kvm) {
+		if (vcpu->arch.aia_context.hart_index == hart_index)
+			return kvm_riscv_vcpu_aia_imsic_inject(vcpu,
+							       guest_index,
+							       0, iid);
+	}
+
+	return 0;
+}
+
+int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+	gpa_t tppn, ippn;
+	unsigned long idx;
+	struct kvm_vcpu *vcpu;
+	u32 g, toff, iid = msi->data;
+	struct kvm_aia *aia = &kvm->arch.aia;
+	gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo;
+
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(kvm))
+		return -EBUSY;
+
+	/* Convert target address to target PPN */
+	tppn = target >> IMSIC_MMIO_PAGE_SHIFT;
+
+	/* Extract and clear Guest ID from target PPN */
+	g = tppn & (BIT(aia->nr_guest_bits) - 1);
+	tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1));
+
+	/* Inject MSI to matching VCPU */
+	kvm_for_each_vcpu(idx, vcpu, kvm) {
+		ippn = vcpu->arch.aia_context.imsic_addr >>
+					IMSIC_MMIO_PAGE_SHIFT;
+		if (ippn == tppn) {
+			toff = target & (IMSIC_MMIO_PAGE_SZ - 1);
+			return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g,
+							       toff, iid);
+		}
+	}
+
+	return 0;
+}
+
+int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level)
+{
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(kvm))
+		return -EBUSY;
+
+	/* Inject interrupt level change in APLIC */
+	return kvm_riscv_aia_aplic_inject(kvm, irq, level);
+}
+
+void kvm_riscv_aia_init_vm(struct kvm *kvm)
+{
+	struct kvm_aia *aia = &kvm->arch.aia;
+
+	if (!kvm_riscv_aia_available())
+		return;
+
+	/*
+	 * We don't do any memory allocations over here because these
+	 * will be done after AIA device is initialized by the user-space.
+	 *
+	 * Refer, aia_init() implementation for more details.
+	 */
+
+	/* Initialize default values in AIA global context */
+	aia->mode = (kvm_riscv_aia_nr_hgei) ?
+		KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL;
+	aia->nr_ids = kvm_riscv_aia_max_ids - 1;
+	aia->nr_sources = 0;
+	aia->nr_group_bits = 0;
+	aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN;
+	aia->nr_hart_bits = 0;
+	aia->nr_guest_bits = 0;
+	aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
+}
+
+void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
+{
+	/* Proceed only if AIA was initialized successfully */
+	if (!kvm_riscv_aia_initialized(kvm))
+		return;
+
+	/* Cleanup APLIC context */
+	kvm_riscv_aia_aplic_cleanup(kvm);
+}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 737318b1c1d9..27ccd07898e1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1442,6 +1442,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_XIVE		KVM_DEV_TYPE_XIVE
 	KVM_DEV_TYPE_ARM_PV_TIME,
 #define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
+	KVM_DEV_TYPE_RISCV_AIA,
+#define KVM_DEV_TYPE_RISCV_AIA		KVM_DEV_TYPE_RISCV_AIA
 	KVM_DEV_TYPE_MAX,
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (5 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-05-17 10:51 ` [PATCH 08/10] RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip Anup Patel
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

There is no virtualization support in AIA APLIC so we add in-kernel
emulation of AIA APLIC which only supports MSI-mode (i.e. wired
interrupts forwarded to AIA IMSIC as MSIs).

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h |  17 +-
 arch/riscv/kvm/Makefile          |   1 +
 arch/riscv/kvm/aia_aplic.c       | 574 +++++++++++++++++++++++++++++++
 3 files changed, 578 insertions(+), 14 deletions(-)
 create mode 100644 arch/riscv/kvm/aia_aplic.c

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index a1281ebc9b92..f6bd8523395f 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -129,20 +129,9 @@ static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
 {
 }
 
-static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
-					     u32 source, bool level)
-{
-	return 0;
-}
-
-static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
-{
-}
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
+int kvm_riscv_aia_aplic_init(struct kvm *kvm);
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);
 
 #ifdef CONFIG_32BIT
 void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index dd69ebe098bd..94c43702c765 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -28,3 +28,4 @@ kvm-y += vcpu_timer.o
 kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
 kvm-y += aia.o
 kvm-y += aia_device.o
+kvm-y += aia_aplic.o
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
new file mode 100644
index 000000000000..1b0a4df64815
--- /dev/null
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -0,0 +1,574 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *	Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/kvm_aia_aplic.h>
+
+struct aplic_irq {
+	raw_spinlock_t lock;
+	u32 sourcecfg;
+	u32 state;
+#define APLIC_IRQ_STATE_PENDING		BIT(0)
+#define APLIC_IRQ_STATE_ENABLED		BIT(1)
+#define APLIC_IRQ_STATE_ENPEND		(APLIC_IRQ_STATE_PENDING | \
+					 APLIC_IRQ_STATE_ENABLED)
+#define APLIC_IRQ_STATE_INPUT		BIT(8)
+	u32 target;
+};
+
+struct aplic {
+	struct kvm_io_device iodev;
+
+	u32 domaincfg;
+	u32 genmsi;
+
+	u32 nr_irqs;
+	u32 nr_words;
+	struct aplic_irq *irqs;
+};
+
+static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
+{
+	u32 ret;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return 0;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	ret = irqd->sourcecfg;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	return ret;
+}
+
+static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
+{
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return;
+	irqd = &aplic->irqs[irq];
+
+	if (val & APLIC_SOURCECFG_D)
+		val = 0;
+	else
+		val &= APLIC_SOURCECFG_SM_MASK;
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	irqd->sourcecfg = val;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static u32 aplic_read_target(struct aplic *aplic, u32 irq)
+{
+	u32 ret;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return 0;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	ret = irqd->target;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	return ret;
+}
+
+static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
+{
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return;
+	irqd = &aplic->irqs[irq];
+
+	val &= APLIC_TARGET_EIID_MASK |
+	       (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
+	       (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	irqd->target = val;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_pending(struct aplic *aplic, u32 irq)
+{
+	bool ret;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return false;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	return ret;
+}
+
+static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+{
+	unsigned long flags, sm;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+
+	sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+	if (!pending &&
+	    ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+	     (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+		goto skip_write_pending;
+
+	if (pending)
+		irqd->state |= APLIC_IRQ_STATE_PENDING;
+	else
+		irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+
+skip_write_pending:
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
+{
+	bool ret;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return false;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	return ret;
+}
+
+static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+{
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	if (enabled)
+		irqd->state |= APLIC_IRQ_STATE_ENABLED;
+	else
+		irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_input(struct aplic *aplic, u32 irq)
+{
+	bool ret;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+
+	if (!irq || aplic->nr_irqs <= irq)
+		return false;
+	irqd = &aplic->irqs[irq];
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+	ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	return ret;
+}
+
+static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
+{
+	u32 hart_idx, guest_idx, eiid;
+
+	hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
+	hart_idx &= APLIC_TARGET_HART_IDX_MASK;
+	guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
+	guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
+	eiid = target & APLIC_TARGET_EIID_MASK;
+	kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
+}
+
+static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
+{
+	bool inject;
+	u32 irq, target;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+	struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+	if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
+		return;
+
+	for (irq = first; irq <= last; irq++) {
+		if (!irq || aplic->nr_irqs <= irq)
+			continue;
+		irqd = &aplic->irqs[irq];
+
+		raw_spin_lock_irqsave(&irqd->lock, flags);
+
+		inject = false;
+		target = irqd->target;
+		if (irqd->state & APLIC_IRQ_STATE_ENPEND) {
+			irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+			inject = true;
+		}
+
+		raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+		if (inject)
+			aplic_inject_msi(kvm, irq, target);
+	}
+}
+
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
+{
+	u32 target;
+	bool inject = false, ie;
+	unsigned long flags;
+	struct aplic_irq *irqd;
+	struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+	if (!aplic || !source || (aplic->nr_irqs <= source))
+		return -ENODEV;
+	irqd = &aplic->irqs[source];
+	ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
+
+	raw_spin_lock_irqsave(&irqd->lock, flags);
+
+	if (irqd->sourcecfg & APLIC_SOURCECFG_D)
+		goto skip_unlock;
+
+	switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
+	case APLIC_SOURCECFG_SM_EDGE_RISE:
+		if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+		    !(irqd->state & APLIC_IRQ_STATE_PENDING))
+			irqd->state |= APLIC_IRQ_STATE_PENDING;
+		break;
+	case APLIC_SOURCECFG_SM_EDGE_FALL:
+		if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
+		    !(irqd->state & APLIC_IRQ_STATE_PENDING))
+			irqd->state |= APLIC_IRQ_STATE_PENDING;
+		break;
+	case APLIC_SOURCECFG_SM_LEVEL_HIGH:
+		if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+			irqd->state |= APLIC_IRQ_STATE_PENDING;
+		break;
+	case APLIC_SOURCECFG_SM_LEVEL_LOW:
+		if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+			irqd->state |= APLIC_IRQ_STATE_PENDING;
+		break;
+	}
+
+	if (level)
+		irqd->state |= APLIC_IRQ_STATE_INPUT;
+	else
+		irqd->state &= ~APLIC_IRQ_STATE_INPUT;
+
+	target = irqd->target;
+	if (ie && (irqd->state & APLIC_IRQ_STATE_ENPEND)) {
+		irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+		inject = true;
+	}
+
+skip_unlock:
+	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+	if (inject)
+		aplic_inject_msi(kvm, source, target);
+
+	return 0;
+}
+
+static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
+{
+	u32 i, ret = 0;
+
+	for (i = 0; i < 32; i++)
+		ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
+
+	return ret;
+}
+
+static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
+{
+	u32 i, ret = 0;
+
+	for (i = 0; i < 32; i++)
+		ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
+
+	return ret;
+}
+
+static void aplic_write_pending_word(struct aplic *aplic, u32 word,
+				     u32 val, bool pending)
+{
+	u32 i;
+
+	for (i = 0; i < 32; i++) {
+		if (val & BIT(i))
+			aplic_write_pending(aplic, word * 32 + i, pending);
+	}
+}
+
+static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
+{
+	u32 i, ret = 0;
+
+	for (i = 0; i < 32; i++)
+		ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
+
+	return ret;
+}
+
+static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
+				     u32 val, bool enabled)
+{
+	u32 i;
+
+	for (i = 0; i < 32; i++) {
+		if (val & BIT(i))
+			aplic_write_enabled(aplic, word * 32 + i, enabled);
+	}
+}
+
+static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
+{
+	u32 i;
+	struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+	if ((off & 0x3) != 0)
+		return -EOPNOTSUPP;
+
+	if (off == APLIC_DOMAINCFG) {
+		*val32 = APLIC_DOMAINCFG_RDONLY |
+			 aplic->domaincfg | APLIC_DOMAINCFG_DM;
+	} else if ((off >= APLIC_SOURCECFG_BASE) &&
+		 (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+		i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+		*val32 = aplic_read_sourcecfg(aplic, i);
+	} else if ((off >= APLIC_SETIP_BASE) &&
+		   (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_SETIP_BASE) >> 2;
+		*val32 = aplic_read_pending_word(aplic, i);
+	} else if (off == APLIC_SETIPNUM) {
+		*val32 = 0;
+	} else if ((off >= APLIC_CLRIP_BASE) &&
+		   (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_CLRIP_BASE) >> 2;
+		*val32 = aplic_read_input_word(aplic, i);
+	} else if (off == APLIC_CLRIPNUM) {
+		*val32 = 0;
+	} else if ((off >= APLIC_SETIE_BASE) &&
+		   (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_SETIE_BASE) >> 2;
+		*val32 = aplic_read_enabled_word(aplic, i);
+	} else if (off == APLIC_SETIENUM) {
+		*val32 = 0;
+	} else if ((off >= APLIC_CLRIE_BASE) &&
+		   (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+		*val32 = 0;
+	} else if (off == APLIC_CLRIENUM) {
+		*val32 = 0;
+	} else if (off == APLIC_SETIPNUM_LE) {
+		*val32 = 0;
+	} else if (off == APLIC_SETIPNUM_BE) {
+		*val32 = 0;
+	} else if (off == APLIC_GENMSI) {
+		*val32 = aplic->genmsi;
+	} else if ((off >= APLIC_TARGET_BASE) &&
+		   (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+		i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+		*val32 = aplic_read_target(aplic, i);
+	} else
+		return -ENODEV;
+
+	return 0;
+}
+
+static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			   gpa_t addr, int len, void *val)
+{
+	if (len != 4)
+		return -EOPNOTSUPP;
+
+	return aplic_mmio_read_offset(vcpu->kvm,
+				      addr - vcpu->kvm->arch.aia.aplic_addr,
+				      val);
+}
+
+static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
+{
+	u32 i;
+	struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+	if ((off & 0x3) != 0)
+		return -EOPNOTSUPP;
+
+	if (off == APLIC_DOMAINCFG) {
+		/* Only IE bit writeable */
+		aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
+	} else if ((off >= APLIC_SOURCECFG_BASE) &&
+		 (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+		i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+		aplic_write_sourcecfg(aplic, i, val32);
+	} else if ((off >= APLIC_SETIP_BASE) &&
+		   (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_SETIP_BASE) >> 2;
+		aplic_write_pending_word(aplic, i, val32, true);
+	} else if (off == APLIC_SETIPNUM) {
+		aplic_write_pending(aplic, val32, true);
+	} else if ((off >= APLIC_CLRIP_BASE) &&
+		   (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_CLRIP_BASE) >> 2;
+		aplic_write_pending_word(aplic, i, val32, false);
+	} else if (off == APLIC_CLRIPNUM) {
+		aplic_write_pending(aplic, val32, false);
+	} else if ((off >= APLIC_SETIE_BASE) &&
+		   (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_SETIE_BASE) >> 2;
+		aplic_write_enabled_word(aplic, i, val32, true);
+	} else if (off == APLIC_SETIENUM) {
+		aplic_write_enabled(aplic, val32, true);
+	} else if ((off >= APLIC_CLRIE_BASE) &&
+		   (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+		i = (off - APLIC_CLRIE_BASE) >> 2;
+		aplic_write_enabled_word(aplic, i, val32, false);
+	} else if (off == APLIC_CLRIENUM) {
+		aplic_write_enabled(aplic, val32, false);
+	} else if (off == APLIC_SETIPNUM_LE) {
+		aplic_write_pending(aplic, val32, true);
+	} else if (off == APLIC_SETIPNUM_BE) {
+		aplic_write_pending(aplic, __swab32(val32), true);
+	} else if (off == APLIC_GENMSI) {
+		aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
+					  APLIC_TARGET_GUEST_IDX_SHIFT);
+		kvm_riscv_aia_inject_msi_by_id(kvm,
+				val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
+				val32 & APLIC_TARGET_EIID_MASK);
+	} else if ((off >= APLIC_TARGET_BASE) &&
+		   (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+		i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+		aplic_write_target(aplic, i, val32);
+	} else
+		return -ENODEV;
+
+	aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
+
+	return 0;
+}
+
+static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			    gpa_t addr, int len, const void *val)
+{
+	if (len != 4)
+		return -EOPNOTSUPP;
+
+	return aplic_mmio_write_offset(vcpu->kvm,
+				       addr - vcpu->kvm->arch.aia.aplic_addr,
+				       *((const u32 *)val));
+}
+
+static struct kvm_io_device_ops aplic_iodoev_ops = {
+	.read = aplic_mmio_read,
+	.write = aplic_mmio_write,
+};
+
+int kvm_riscv_aia_aplic_init(struct kvm *kvm)
+{
+	int i, ret = 0;
+	struct aplic *aplic;
+
+	/* Do nothing if we have zero sources */
+	if (!kvm->arch.aia.nr_sources)
+		return 0;
+
+	/* Allocate APLIC global state */
+	aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
+	if (!aplic)
+		return -ENOMEM;
+	kvm->arch.aia.aplic_state = aplic;
+
+	/* Setup APLIC IRQs */
+	aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
+	aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
+	aplic->irqs = kcalloc(aplic->nr_irqs,
+			      sizeof(*aplic->irqs), GFP_KERNEL);
+	if (!aplic->irqs) {
+		ret = -ENOMEM;
+		goto fail_free_aplic;
+	}
+	for (i = 0; i < aplic->nr_irqs; i++)
+		raw_spin_lock_init(&aplic->irqs[i].lock);
+
+	/* Setup IO device */
+	kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+				      kvm->arch.aia.aplic_addr,
+				      KVM_DEV_RISCV_APLIC_SIZE,
+				      &aplic->iodev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret)
+		goto fail_free_aplic_irqs;
+
+	/* Setup default IRQ routing */
+	ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
+	if (ret)
+		goto fail_unreg_iodev;
+
+	return 0;
+
+fail_unreg_iodev:
+	mutex_lock(&kvm->slots_lock);
+	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+	mutex_unlock(&kvm->slots_lock);
+fail_free_aplic_irqs:
+	kfree(aplic->irqs);
+fail_free_aplic:
+	kvm->arch.aia.aplic_state = NULL;
+	kfree(aplic);
+	return ret;
+}
+
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
+{
+	struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+	if (!aplic)
+		return;
+
+	mutex_lock(&kvm->slots_lock);
+	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+	mutex_unlock(&kvm->slots_lock);
+
+	kfree(aplic->irqs);
+
+	kvm->arch.aia.aplic_state = NULL;
+	kfree(aplic);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 08/10] RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (6 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-05-17 10:51 ` [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC Anup Patel
  2023-05-17 10:51 ` [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip Anup Patel
  9 siblings, 0 replies; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We expose APLIC registers as KVM device attributes of the in-kernel
AIA irqchip device. This will allow KVM user-space to save/restore
APLIC state using KVM device ioctls().

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h  |  3 +++
 arch/riscv/include/uapi/asm/kvm.h |  2 ++
 arch/riscv/kvm/aia_aplic.c        | 43 +++++++++++++++++++++++++++++++
 arch/riscv/kvm/aia_device.c       | 25 ++++++++++++++++++
 4 files changed, 73 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index f6bd8523395f..ba939c0054aa 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -129,6 +129,9 @@ static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
 {
 }
 
+int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v);
+int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v);
+int kvm_riscv_aia_aplic_has_attr(struct kvm *kvm, unsigned long type);
 int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
 int kvm_riscv_aia_aplic_init(struct kvm *kvm);
 void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 57f8d8bb498e..e80210c2220b 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -240,6 +240,8 @@ enum KVM_RISCV_SBI_EXT_ID {
 #define KVM_DEV_RISCV_AIA_GRP_CTRL		2
 #define KVM_DEV_RISCV_AIA_CTRL_INIT		0
 
+#define KVM_DEV_RISCV_AIA_GRP_APLIC		3
+
 /* One single KVM irqchip, ie. the AIA */
 #define KVM_NR_IRQCHIPS			1
 
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 1b0a4df64815..ed9102dfba77 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -499,6 +499,49 @@ static struct kvm_io_device_ops aplic_iodoev_ops = {
 	.write = aplic_mmio_write,
 };
 
+int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v)
+{
+	int rc;
+
+	if (!kvm->arch.aia.aplic_state)
+		return -ENODEV;
+
+	rc = aplic_mmio_write_offset(kvm, type, v);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v)
+{
+	int rc;
+
+	if (!kvm->arch.aia.aplic_state)
+		return -ENODEV;
+
+	rc = aplic_mmio_read_offset(kvm, type, v);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+int kvm_riscv_aia_aplic_has_attr(struct kvm *kvm, unsigned long type)
+{
+	int rc;
+	u32 val;
+
+	if (!kvm->arch.aia.aplic_state)
+		return -ENODEV;
+
+	rc = aplic_mmio_read_offset(kvm, type, &val);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 int kvm_riscv_aia_aplic_init(struct kvm *kvm)
 {
 	int i, ret = 0;
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index a151fb357887..17dba92a90e1 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -364,6 +364,15 @@ static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 			break;
 		}
 
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_APLIC:
+		if (copy_from_user(&nr, uaddr, sizeof(nr)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = kvm_riscv_aia_aplic_set_attr(dev->kvm, type, nr);
+		mutex_unlock(&dev->kvm->lock);
+
 		break;
 	}
 
@@ -411,6 +420,20 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		if (copy_to_user(uaddr, &addr, sizeof(addr)))
 			return -EFAULT;
 
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_APLIC:
+		if (copy_from_user(&nr, uaddr, sizeof(nr)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = kvm_riscv_aia_aplic_get_attr(dev->kvm, type, &nr);
+		mutex_unlock(&dev->kvm->lock);
+		if (r)
+			return r;
+
+		if (copy_to_user(uaddr, &nr, sizeof(nr)))
+			return -EFAULT;
+
 		break;
 	}
 
@@ -447,6 +470,8 @@ static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 			return 0;
 		}
 		break;
+	case KVM_DEV_RISCV_AIA_GRP_APLIC:
+		return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr);
 	}
 
 	return -ENXIO;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (7 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 08/10] RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-07 23:16   ` Atish Patra
  2023-05-17 10:51 ` [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip Anup Patel
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We can have AIA IMSIC support for both HS-level and VS-level but
the VS-level IMSICs are optional. We use the VS-level IMSICs for
Guest/VM whenever available otherwise we fallback to software
emulation of AIA IMSIC.

This patch adds in-kernel virtualization of AIA IMSIC.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h |  46 +-
 arch/riscv/kvm/Makefile          |   1 +
 arch/riscv/kvm/aia_imsic.c       | 913 +++++++++++++++++++++++++++++++
 3 files changed, 924 insertions(+), 36 deletions(-)
 create mode 100644 arch/riscv/kvm/aia_imsic.c

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index ba939c0054aa..a4f6ebf90e31 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -90,44 +90,18 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
 
 extern struct kvm_device_ops kvm_riscv_aia_device_ops;
 
-static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
-{
-	return 1;
-}
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
 
 #define KVM_RISCV_AIA_IMSIC_TOPEI	(ISELECT_MASK + 1)
-static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
-					       unsigned long isel,
-					       unsigned long *val,
-					       unsigned long new_val,
-					       unsigned long wr_mask)
-{
-	return 0;
-}
-
-static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
-						  u32 guest_index, u32 offset,
-						  u32 iid)
-{
-	return 0;
-}
-
-static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
-{
-}
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+				 unsigned long *val, unsigned long new_val,
+				 unsigned long wr_mask);
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+				    u32 guest_index, u32 offset, u32 iid);
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu);
 
 int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v);
 int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v);
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 94c43702c765..c1d1356387ff 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -29,3 +29,4 @@ kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
 kvm-y += aia.o
 kvm-y += aia_device.o
 kvm-y += aia_aplic.o
+kvm-y += aia_imsic.o
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
new file mode 100644
index 000000000000..2dc09dcb8ab5
--- /dev/null
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -0,0 +1,913 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *	Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/csr.h>
+#include <asm/kvm_aia_imsic.h>
+
+#define IMSIC_MAX_EIX	(IMSIC_MAX_ID / BITS_PER_TYPE(u64))
+
+struct imsic_mrif_eix {
+	unsigned long eip[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+	unsigned long eie[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+};
+
+struct imsic_mrif {
+	struct imsic_mrif_eix eix[IMSIC_MAX_EIX];
+	unsigned long eithreshold;
+	unsigned long eidelivery;
+};
+
+struct imsic {
+	struct kvm_io_device iodev;
+
+	u32 nr_msis;
+	u32 nr_eix;
+	u32 nr_hw_eix;
+
+	/*
+	 * At any point in time, the register state is in
+	 * one of the following places:
+	 *
+	 * 1) Hardware: IMSIC VS-file (vsfile_cpu >= 0)
+	 * 2) Software: IMSIC SW-file (vsfile_cpu < 0)
+	 */
+
+	/* IMSIC VS-file */
+	rwlock_t vsfile_lock;
+	int vsfile_cpu;
+	int vsfile_hgei;
+	void __iomem *vsfile_va;
+	phys_addr_t vsfile_pa;
+
+	/* IMSIC SW-file */
+	struct imsic_mrif *swfile;
+	phys_addr_t swfile_pa;
+};
+
+#define imsic_vs_csr_read(__c)			\
+({						\
+	unsigned long __r;			\
+	csr_write(CSR_VSISELECT, __c);		\
+	__r = csr_read(CSR_VSIREG);		\
+	__r;					\
+})
+
+#define imsic_read_switchcase(__ireg)			\
+	case __ireg:					\
+		return imsic_vs_csr_read(__ireg);
+#define imsic_read_switchcase_2(__ireg)			\
+	imsic_read_switchcase(__ireg + 0)		\
+	imsic_read_switchcase(__ireg + 1)
+#define imsic_read_switchcase_4(__ireg)			\
+	imsic_read_switchcase_2(__ireg + 0)		\
+	imsic_read_switchcase_2(__ireg + 2)
+#define imsic_read_switchcase_8(__ireg)			\
+	imsic_read_switchcase_4(__ireg + 0)		\
+	imsic_read_switchcase_4(__ireg + 4)
+#define imsic_read_switchcase_16(__ireg)		\
+	imsic_read_switchcase_8(__ireg + 0)		\
+	imsic_read_switchcase_8(__ireg + 8)
+#define imsic_read_switchcase_32(__ireg)		\
+	imsic_read_switchcase_16(__ireg + 0)		\
+	imsic_read_switchcase_16(__ireg + 16)
+#define imsic_read_switchcase_64(__ireg)		\
+	imsic_read_switchcase_32(__ireg + 0)		\
+	imsic_read_switchcase_32(__ireg + 32)
+
+static unsigned long imsic_eix_read(int ireg)
+{
+	switch (ireg) {
+	imsic_read_switchcase_64(IMSIC_EIP0)
+	imsic_read_switchcase_64(IMSIC_EIE0)
+	};
+
+	return 0;
+}
+
+#define imsic_vs_csr_swap(__c, __v)		\
+({						\
+	unsigned long __r;			\
+	csr_write(CSR_VSISELECT, __c);		\
+	__r = csr_swap(CSR_VSIREG, __v);	\
+	__r;					\
+})
+
+#define imsic_swap_switchcase(__ireg, __v)		\
+	case __ireg:					\
+		return imsic_vs_csr_swap(__ireg, __v);
+#define imsic_swap_switchcase_2(__ireg, __v)		\
+	imsic_swap_switchcase(__ireg + 0, __v)		\
+	imsic_swap_switchcase(__ireg + 1, __v)
+#define imsic_swap_switchcase_4(__ireg, __v)		\
+	imsic_swap_switchcase_2(__ireg + 0, __v)	\
+	imsic_swap_switchcase_2(__ireg + 2, __v)
+#define imsic_swap_switchcase_8(__ireg, __v)		\
+	imsic_swap_switchcase_4(__ireg + 0, __v)	\
+	imsic_swap_switchcase_4(__ireg + 4, __v)
+#define imsic_swap_switchcase_16(__ireg, __v)		\
+	imsic_swap_switchcase_8(__ireg + 0, __v)	\
+	imsic_swap_switchcase_8(__ireg + 8, __v)
+#define imsic_swap_switchcase_32(__ireg, __v)		\
+	imsic_swap_switchcase_16(__ireg + 0, __v)	\
+	imsic_swap_switchcase_16(__ireg + 16, __v)
+#define imsic_swap_switchcase_64(__ireg, __v)		\
+	imsic_swap_switchcase_32(__ireg + 0, __v)	\
+	imsic_swap_switchcase_32(__ireg + 32, __v)
+
+static unsigned long imsic_eix_swap(int ireg, unsigned long val)
+{
+	switch (ireg) {
+	imsic_swap_switchcase_64(IMSIC_EIP0, val)
+	imsic_swap_switchcase_64(IMSIC_EIE0, val)
+	};
+
+	return 0;
+}
+
+#define imsic_vs_csr_write(__c, __v)		\
+do {						\
+	csr_write(CSR_VSISELECT, __c);		\
+	csr_write(CSR_VSIREG, __v);		\
+} while (0)
+
+#define imsic_write_switchcase(__ireg, __v)		\
+	case __ireg:					\
+		imsic_vs_csr_write(__ireg, __v);	\
+		break;
+#define imsic_write_switchcase_2(__ireg, __v)		\
+	imsic_write_switchcase(__ireg + 0, __v)		\
+	imsic_write_switchcase(__ireg + 1, __v)
+#define imsic_write_switchcase_4(__ireg, __v)		\
+	imsic_write_switchcase_2(__ireg + 0, __v)	\
+	imsic_write_switchcase_2(__ireg + 2, __v)
+#define imsic_write_switchcase_8(__ireg, __v)		\
+	imsic_write_switchcase_4(__ireg + 0, __v)	\
+	imsic_write_switchcase_4(__ireg + 4, __v)
+#define imsic_write_switchcase_16(__ireg, __v)		\
+	imsic_write_switchcase_8(__ireg + 0, __v)	\
+	imsic_write_switchcase_8(__ireg + 8, __v)
+#define imsic_write_switchcase_32(__ireg, __v)		\
+	imsic_write_switchcase_16(__ireg + 0, __v)	\
+	imsic_write_switchcase_16(__ireg + 16, __v)
+#define imsic_write_switchcase_64(__ireg, __v)		\
+	imsic_write_switchcase_32(__ireg + 0, __v)	\
+	imsic_write_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_write(int ireg, unsigned long val)
+{
+	switch (ireg) {
+	imsic_write_switchcase_64(IMSIC_EIP0, val)
+	imsic_write_switchcase_64(IMSIC_EIE0, val)
+	};
+}
+
+#define imsic_vs_csr_set(__c, __v)		\
+do {						\
+	csr_write(CSR_VSISELECT, __c);		\
+	csr_set(CSR_VSIREG, __v);		\
+} while (0)
+
+#define imsic_set_switchcase(__ireg, __v)		\
+	case __ireg:					\
+		imsic_vs_csr_set(__ireg, __v);		\
+		break;
+#define imsic_set_switchcase_2(__ireg, __v)		\
+	imsic_set_switchcase(__ireg + 0, __v)		\
+	imsic_set_switchcase(__ireg + 1, __v)
+#define imsic_set_switchcase_4(__ireg, __v)		\
+	imsic_set_switchcase_2(__ireg + 0, __v)		\
+	imsic_set_switchcase_2(__ireg + 2, __v)
+#define imsic_set_switchcase_8(__ireg, __v)		\
+	imsic_set_switchcase_4(__ireg + 0, __v)		\
+	imsic_set_switchcase_4(__ireg + 4, __v)
+#define imsic_set_switchcase_16(__ireg, __v)		\
+	imsic_set_switchcase_8(__ireg + 0, __v)		\
+	imsic_set_switchcase_8(__ireg + 8, __v)
+#define imsic_set_switchcase_32(__ireg, __v)		\
+	imsic_set_switchcase_16(__ireg + 0, __v)	\
+	imsic_set_switchcase_16(__ireg + 16, __v)
+#define imsic_set_switchcase_64(__ireg, __v)		\
+	imsic_set_switchcase_32(__ireg + 0, __v)	\
+	imsic_set_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_set(int ireg, unsigned long val)
+{
+	switch (ireg) {
+	imsic_set_switchcase_64(IMSIC_EIP0, val)
+	imsic_set_switchcase_64(IMSIC_EIE0, val)
+	};
+}
+
+static unsigned long imsic_mrif_atomic_rmw(struct imsic_mrif *mrif,
+					   unsigned long *ptr,
+					   unsigned long new_val,
+					   unsigned long wr_mask)
+{
+	unsigned long old_val = 0, tmp = 0;
+
+	__asm__ __volatile__ (
+		"0:	lr.w.aq   %1, %0\n"
+		"	and       %2, %1, %3\n"
+		"	or        %2, %2, %4\n"
+		"	sc.w.rl   %2, %2, %0\n"
+		"	bnez      %2, 0b"
+		: "+A" (*ptr), "+r" (old_val), "+r" (tmp)
+		: "r" (~wr_mask), "r" (new_val & wr_mask)
+		: "memory");
+
+	return old_val;
+}
+
+static unsigned long imsic_mrif_atomic_or(struct imsic_mrif *mrif,
+					  unsigned long *ptr,
+					  unsigned long val)
+{
+	return arch_atomic_long_fetch_or(val, (atomic_long_t *)ptr);
+}
+
+#define imsic_mrif_atomic_write(__mrif, __ptr, __new_val)	\
+		imsic_mrif_atomic_rmw(__mrif, __ptr, __new_val, -1UL)
+#define imsic_mrif_atomic_read(__mrif, __ptr)			\
+		imsic_mrif_atomic_or(__mrif, __ptr, 0)
+
+static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
+{
+	struct imsic_mrif_eix *eix;
+	u32 i, imin, imax, ei, max_msi;
+	unsigned long eipend[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+	unsigned long eithreshold = imsic_mrif_atomic_read(mrif,
+							&mrif->eithreshold);
+
+	max_msi = (eithreshold && (eithreshold <= nr_msis)) ?
+		   eithreshold : nr_msis;
+	for (ei = 0; ei < nr_eix; ei++) {
+		eix = &mrif->eix[ei];
+		eipend[0] = imsic_mrif_atomic_read(mrif, &eix->eie[0]) &
+			    imsic_mrif_atomic_read(mrif, &eix->eip[0]);
+#ifdef CONFIG_32BIT
+		eipend[1] = imsic_mrif_atomic_read(mrif, &eix->eie[1]) &
+			    imsic_mrif_atomic_read(mrif, &eix->eip[1]);
+		if (!eipend[0] && !eipend[1])
+#else
+		if (!eipend[0])
+#endif
+			continue;
+
+		imin = ei * BITS_PER_TYPE(u64);
+		imax = ((imin + BITS_PER_TYPE(u64)) < max_msi) ?
+			imin + BITS_PER_TYPE(u64) : max_msi;
+		for (i = (!imin) ? 1 : imin; i < imax; i++) {
+			if (test_bit(i - imin, eipend))
+				return (i << TOPEI_ID_SHIFT) | i;
+		}
+	}
+
+	return 0;
+}
+
+static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
+			  unsigned long isel, unsigned long *val,
+			  unsigned long new_val, unsigned long wr_mask)
+{
+	bool pend;
+	struct imsic_mrif_eix *eix;
+	unsigned long *ei, num, old_val = 0;
+
+	switch (isel) {
+	case IMSIC_EIDELIVERY:
+		old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eidelivery,
+						new_val, wr_mask & 0x1);
+		break;
+	case IMSIC_EITHRESHOLD:
+		old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eithreshold,
+				new_val, wr_mask & (IMSIC_MAX_ID - 1));
+		break;
+	case IMSIC_EIP0 ... IMSIC_EIP63:
+	case IMSIC_EIE0 ... IMSIC_EIE63:
+		if (isel >= IMSIC_EIP0 && isel <= IMSIC_EIP63) {
+			pend = true;
+			num = isel - IMSIC_EIP0;
+		} else {
+			pend = false;
+			num = isel - IMSIC_EIE0;
+		}
+
+		if ((num / 2) >= nr_eix)
+			return -EINVAL;
+		eix = &mrif->eix[num / 2];
+
+#ifndef CONFIG_32BIT
+		if (num & 0x1)
+			return -EINVAL;
+		ei = (pend) ? &eix->eip[0] : &eix->eie[0];
+#else
+		ei = (pend) ? &eix->eip[num & 0x1] : &eix->eie[num & 0x1];
+#endif
+
+		/* Bit0 of EIP0 or EIE0 is read-only */
+		if (!num)
+			wr_mask &= ~BIT(0);
+
+		old_val = imsic_mrif_atomic_rmw(mrif, ei, new_val, wr_mask);
+		break;
+	default:
+		return -ENOENT;
+	};
+
+	if (val)
+		*val = old_val;
+
+	return 0;
+}
+
+struct imsic_vsfile_read_data {
+	int hgei;
+	u32 nr_eix;
+	bool clear;
+	struct imsic_mrif *mrif;
+};
+
+static void imsic_vsfile_local_read(void *data)
+{
+	u32 i;
+	struct imsic_mrif_eix *eix;
+	struct imsic_vsfile_read_data *idata = data;
+	struct imsic_mrif *mrif = idata->mrif;
+	unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+	old_vsiselect = csr_read(CSR_VSISELECT);
+	old_hstatus = csr_read(CSR_HSTATUS);
+	new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+	new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
+	csr_write(CSR_HSTATUS, new_hstatus);
+
+	/*
+	 * We don't use imsic_mrif_atomic_xyz() functions to store
+	 * values in MRIF because imsic_vsfile_read() is always called
+	 * with pointer to temporary MRIF on stack.
+	 */
+
+	if (idata->clear) {
+		mrif->eidelivery = imsic_vs_csr_swap(IMSIC_EIDELIVERY, 0);
+		mrif->eithreshold = imsic_vs_csr_swap(IMSIC_EITHRESHOLD, 0);
+		for (i = 0; i < idata->nr_eix; i++) {
+			eix = &mrif->eix[i];
+			eix->eip[0] = imsic_eix_swap(IMSIC_EIP0 + i * 2, 0);
+			eix->eie[0] = imsic_eix_swap(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+			eix->eip[1] = imsic_eix_swap(IMSIC_EIP0 + i * 2 + 1, 0);
+			eix->eie[1] = imsic_eix_swap(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+		}
+	} else {
+		mrif->eidelivery = imsic_vs_csr_read(IMSIC_EIDELIVERY);
+		mrif->eithreshold = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
+		for (i = 0; i < idata->nr_eix; i++) {
+			eix = &mrif->eix[i];
+			eix->eip[0] = imsic_eix_read(IMSIC_EIP0 + i * 2);
+			eix->eie[0] = imsic_eix_read(IMSIC_EIE0 + i * 2);
+#ifdef CONFIG_32BIT
+			eix->eip[1] = imsic_eix_read(IMSIC_EIP0 + i * 2 + 1);
+			eix->eie[1] = imsic_eix_read(IMSIC_EIE0 + i * 2 + 1);
+#endif
+		}
+	}
+
+	csr_write(CSR_HSTATUS, old_hstatus);
+	csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
+			      bool clear, struct imsic_mrif *mrif)
+{
+	struct imsic_vsfile_read_data idata;
+
+	/* We can only read clear if we have a IMSIC VS-file */
+	if (vsfile_cpu < 0 || vsfile_hgei <= 0)
+		return;
+
+	/* We can only read clear on local CPU */
+	idata.hgei = vsfile_hgei;
+	idata.nr_eix = nr_eix;
+	idata.clear = clear;
+	idata.mrif = mrif;
+	on_each_cpu_mask(cpumask_of(vsfile_cpu),
+			 imsic_vsfile_local_read, &idata, 1);
+}
+
+static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
+{
+	u32 i;
+	unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+	/* We can only zero-out if we have a IMSIC VS-file */
+	if (vsfile_hgei <= 0)
+		return;
+
+	old_vsiselect = csr_read(CSR_VSISELECT);
+	old_hstatus = csr_read(CSR_HSTATUS);
+	new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+	new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+	csr_write(CSR_HSTATUS, new_hstatus);
+
+	imsic_vs_csr_write(IMSIC_EIDELIVERY, 0);
+	imsic_vs_csr_write(IMSIC_EITHRESHOLD, 0);
+	for (i = 0; i < nr_eix; i++) {
+		imsic_eix_write(IMSIC_EIP0 + i * 2, 0);
+		imsic_eix_write(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+		imsic_eix_write(IMSIC_EIP0 + i * 2 + 1, 0);
+		imsic_eix_write(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+	}
+
+	csr_write(CSR_HSTATUS, old_hstatus);
+	csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_local_update(int vsfile_hgei, u32 nr_eix,
+				      struct imsic_mrif *mrif)
+{
+	u32 i;
+	struct imsic_mrif_eix *eix;
+	unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+	/* We can only update if we have a HW IMSIC context */
+	if (vsfile_hgei <= 0)
+		return;
+
+	/*
+	 * We don't use imsic_mrif_atomic_xyz() functions to read values
+	 * from MRIF in this function because it is always called with
+	 * pointer to temporary MRIF on stack.
+	 */
+
+	old_vsiselect = csr_read(CSR_VSISELECT);
+	old_hstatus = csr_read(CSR_HSTATUS);
+	new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+	new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+	csr_write(CSR_HSTATUS, new_hstatus);
+
+	for (i = 0; i < nr_eix; i++) {
+		eix = &mrif->eix[i];
+		imsic_eix_set(IMSIC_EIP0 + i * 2, eix->eip[0]);
+		imsic_eix_set(IMSIC_EIE0 + i * 2, eix->eie[0]);
+#ifdef CONFIG_32BIT
+		imsic_eix_set(IMSIC_EIP0 + i * 2 + 1, eix->eip[1]);
+		imsic_eix_set(IMSIC_EIE0 + i * 2 + 1, eix->eie[1]);
+#endif
+	}
+	imsic_vs_csr_write(IMSIC_EITHRESHOLD, mrif->eithreshold);
+	imsic_vs_csr_write(IMSIC_EIDELIVERY, mrif->eidelivery);
+
+	csr_write(CSR_HSTATUS, old_hstatus);
+	csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_cleanup(struct imsic *imsic)
+{
+	int old_vsfile_hgei, old_vsfile_cpu;
+	unsigned long flags;
+
+	/*
+	 * We don't use imsic_mrif_atomic_xyz() functions to clear the
+	 * SW-file in this function because it is always called when the
+	 * VCPU is being destroyed.
+	 */
+
+	write_lock_irqsave(&imsic->vsfile_lock, flags);
+	old_vsfile_hgei = imsic->vsfile_hgei;
+	old_vsfile_cpu = imsic->vsfile_cpu;
+	imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+	imsic->vsfile_va = NULL;
+	imsic->vsfile_pa = 0;
+	write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+
+	if (old_vsfile_cpu >= 0)
+		kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
+{
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+	struct imsic_mrif *mrif = imsic->swfile;
+
+	if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
+	    imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
+		kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
+	else
+		kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+}
+
+static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
+			      struct imsic_mrif *mrif)
+{
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	/*
+	 * We don't use imsic_mrif_atomic_xyz() functions to read and
+	 * write SW-file and MRIF in this function because it is always
+	 * called when VCPU is not using SW-file and the MRIF points to
+	 * a temporary MRIF on stack.
+	 */
+
+	memcpy(mrif, imsic->swfile, sizeof(*mrif));
+	if (clear) {
+		memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+		kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+	}
+}
+
+static void imsic_swfile_update(struct kvm_vcpu *vcpu,
+				struct imsic_mrif *mrif)
+{
+	u32 i;
+	struct imsic_mrif_eix *seix, *eix;
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+	struct imsic_mrif *smrif = imsic->swfile;
+
+	imsic_mrif_atomic_write(smrif, &smrif->eidelivery, mrif->eidelivery);
+	imsic_mrif_atomic_write(smrif, &smrif->eithreshold, mrif->eithreshold);
+	for (i = 0; i < imsic->nr_eix; i++) {
+		seix = &smrif->eix[i];
+		eix = &mrif->eix[i];
+		imsic_mrif_atomic_or(smrif, &seix->eip[0], eix->eip[0]);
+		imsic_mrif_atomic_or(smrif, &seix->eie[0], eix->eie[0]);
+#ifdef CONFIG_32BIT
+		imsic_mrif_atomic_or(smrif, &seix->eip[1], eix->eip[1]);
+		imsic_mrif_atomic_or(smrif, &seix->eie[1], eix->eie[1]);
+#endif
+	}
+
+	imsic_swfile_extirq_update(vcpu);
+}
+
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	struct imsic_mrif tmrif;
+	int old_vsfile_hgei, old_vsfile_cpu;
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	/* Read and clear IMSIC VS-file details */
+	write_lock_irqsave(&imsic->vsfile_lock, flags);
+	old_vsfile_hgei = imsic->vsfile_hgei;
+	old_vsfile_cpu = imsic->vsfile_cpu;
+	imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+	imsic->vsfile_va = NULL;
+	imsic->vsfile_pa = 0;
+	write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	/* Do nothing, if no IMSIC VS-file to release */
+	if (old_vsfile_cpu < 0)
+		return;
+
+	/*
+	 * At this point, all interrupt producers are still using
+	 * the old IMSIC VS-file so we first re-direct all interrupt
+	 * producers.
+	 */
+
+	/* Purge the G-stage mapping */
+	kvm_riscv_gstage_iounmap(vcpu->kvm,
+				 vcpu->arch.aia_context.imsic_addr,
+				 IMSIC_MMIO_PAGE_SZ);
+
+	/* TODO: Purge the IOMMU mapping ??? */
+
+	/*
+	 * At this point, all interrupt producers have been re-directed
+	 * to somewhere else so we move register state from the old IMSIC
+	 * VS-file to the IMSIC SW-file.
+	 */
+
+	/* Read and clear register state from old IMSIC VS-file */
+	memset(&tmrif, 0, sizeof(tmrif));
+	imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu, imsic->nr_hw_eix,
+			  true, &tmrif);
+
+	/* Update register state in IMSIC SW-file */
+	imsic_swfile_update(vcpu, &tmrif);
+
+	/* Free-up old IMSIC VS-file */
+	kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	phys_addr_t new_vsfile_pa;
+	struct imsic_mrif tmrif;
+	void __iomem *new_vsfile_va;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_run *run = vcpu->run;
+	struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
+	struct imsic *imsic = vaia->imsic_state;
+	int ret = 0, new_vsfile_hgei = -1, old_vsfile_hgei, old_vsfile_cpu;
+
+	/* Do nothing for emulation mode */
+	if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
+		return 1;
+
+	/* Read old IMSIC VS-file details */
+	read_lock_irqsave(&imsic->vsfile_lock, flags);
+	old_vsfile_hgei = imsic->vsfile_hgei;
+	old_vsfile_cpu = imsic->vsfile_cpu;
+	read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	/* Do nothing if we are continuing on same CPU */
+	if (old_vsfile_cpu == vcpu->cpu)
+		return 1;
+
+	/* Allocate new IMSIC VS-file */
+	ret = kvm_riscv_aia_alloc_hgei(vcpu->cpu, vcpu,
+				       &new_vsfile_va, &new_vsfile_pa);
+	if (ret <= 0) {
+		/* For HW acceleration mode, we can't continue */
+		if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) {
+			run->fail_entry.hardware_entry_failure_reason =
+								CSR_HSTATUS;
+			run->fail_entry.cpu = vcpu->cpu;
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			return 0;
+		}
+
+		/* Release old IMSIC VS-file */
+		if (old_vsfile_cpu >= 0)
+			kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+		/* For automatic mode, we continue */
+		goto done;
+	}
+	new_vsfile_hgei = ret;
+
+	/*
+	 * At this point, all interrupt producers are still using
+	 * to the old IMSIC VS-file so we first move all interrupt
+	 * producers to the new IMSIC VS-file.
+	 */
+
+	/* Zero-out new IMSIC VS-file */
+	imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
+
+	/* Update G-stage mapping for the new IMSIC VS-file */
+	ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
+				       new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
+				       true, true);
+	if (ret)
+		goto fail_free_vsfile_hgei;
+
+	/* TODO: Update the IOMMU mapping ??? */
+
+	/* Update new IMSIC VS-file details in IMSIC context */
+	write_lock_irqsave(&imsic->vsfile_lock, flags);
+	imsic->vsfile_hgei = new_vsfile_hgei;
+	imsic->vsfile_cpu = vcpu->cpu;
+	imsic->vsfile_va = new_vsfile_va;
+	imsic->vsfile_pa = new_vsfile_pa;
+	write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	/*
+	 * At this point, all interrupt producers have been moved
+	 * to the new IMSIC VS-file so we move register state from
+	 * the old IMSIC VS/SW-file to the new IMSIC VS-file.
+	 */
+
+	memset(&tmrif, 0, sizeof(tmrif));
+	if (old_vsfile_cpu >= 0) {
+		/* Read and clear register state from old IMSIC VS-file */
+		imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu,
+				  imsic->nr_hw_eix, true, &tmrif);
+
+		/* Free-up old IMSIC VS-file */
+		kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+	} else {
+		/* Read and clear register state from IMSIC SW-file */
+		imsic_swfile_read(vcpu, true, &tmrif);
+	}
+
+	/* Restore register state in the new IMSIC VS-file */
+	imsic_vsfile_local_update(new_vsfile_hgei, imsic->nr_hw_eix, &tmrif);
+
+done:
+	/* Set VCPU HSTATUS.VGEIN to new IMSIC VS-file */
+	vcpu->arch.guest_context.hstatus &= ~HSTATUS_VGEIN;
+	if (new_vsfile_hgei > 0)
+		vcpu->arch.guest_context.hstatus |=
+			((unsigned long)new_vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+
+	/* Continue run-loop */
+	return 1;
+
+fail_free_vsfile_hgei:
+	kvm_riscv_aia_free_hgei(vcpu->cpu, new_vsfile_hgei);
+	return ret;
+}
+
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+				 unsigned long *val, unsigned long new_val,
+				 unsigned long wr_mask)
+{
+	u32 topei;
+	struct imsic_mrif_eix *eix;
+	int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC;
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) {
+		/* Read pending and enabled interrupt with highest priority */
+		topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix,
+					 imsic->nr_msis);
+		if (val)
+			*val = topei;
+
+		/* Writes ignore value and clear top pending interrupt */
+		if (topei && wr_mask) {
+			topei >>= TOPEI_ID_SHIFT;
+			if (topei) {
+				eix = &imsic->swfile->eix[topei /
+							  BITS_PER_TYPE(u64)];
+				clear_bit(topei & (BITS_PER_TYPE(u64) - 1),
+					  eix->eip);
+			}
+		}
+	} else {
+		r = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix, isel,
+				   val, new_val, wr_mask);
+		/* Forward unknown IMSIC register to user-space */
+		if (r)
+			rc = (r == -ENOENT) ? 0 : KVM_INSN_ILLEGAL_TRAP;
+	}
+
+	if (wr_mask)
+		imsic_swfile_extirq_update(vcpu);
+
+	return rc;
+}
+
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
+{
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	if (!imsic)
+		return;
+
+	kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+	memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+}
+
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+				    u32 guest_index, u32 offset, u32 iid)
+{
+	unsigned long flags;
+	struct imsic_mrif_eix *eix;
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	/* We only emulate one IMSIC MMIO page for each Guest VCPU */
+	if (!imsic || !iid || guest_index ||
+	    (offset != IMSIC_MMIO_SETIPNUM_LE &&
+	     offset != IMSIC_MMIO_SETIPNUM_BE))
+		return -ENODEV;
+
+	iid = (offset == IMSIC_MMIO_SETIPNUM_BE) ? __swab32(iid) : iid;
+	if (imsic->nr_msis <= iid)
+		return -EINVAL;
+
+	read_lock_irqsave(&imsic->vsfile_lock, flags);
+
+	if (imsic->vsfile_cpu >= 0) {
+		writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
+		kvm_vcpu_kick(vcpu);
+	} else {
+		eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
+		set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);
+		imsic_swfile_extirq_update(vcpu);
+	}
+
+	read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	return 0;
+}
+
+static int imsic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			   gpa_t addr, int len, void *val)
+{
+	if (len != 4 || (addr & 0x3) != 0)
+		return -EOPNOTSUPP;
+
+	*((u32 *)val) = 0;
+
+	return 0;
+}
+
+static int imsic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			    gpa_t addr, int len, const void *val)
+{
+	struct kvm_msi msi = { 0 };
+
+	if (len != 4 || (addr & 0x3) != 0)
+		return -EOPNOTSUPP;
+
+	msi.address_hi = addr >> 32;
+	msi.address_lo = (u32)addr;
+	msi.data = *((const u32 *)val);
+	kvm_riscv_aia_inject_msi(vcpu->kvm, &msi);
+
+	return 0;
+};
+
+static struct kvm_io_device_ops imsic_iodoev_ops = {
+	.read = imsic_mmio_read,
+	.write = imsic_mmio_write,
+};
+
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
+{
+	int ret = 0;
+	struct imsic *imsic;
+	struct page *swfile_page;
+	struct kvm *kvm = vcpu->kvm;
+
+	/* Fail if we have zero IDs */
+	if (!kvm->arch.aia.nr_ids)
+		return -EINVAL;
+
+	/* Allocate IMSIC context */
+	imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
+	if (!imsic)
+		return -ENOMEM;
+	vcpu->arch.aia_context.imsic_state = imsic;
+
+	/* Setup IMSIC context  */
+	imsic->nr_msis = kvm->arch.aia.nr_ids + 1;
+	rwlock_init(&imsic->vsfile_lock);
+	imsic->nr_eix = BITS_TO_U64(imsic->nr_msis);
+	imsic->nr_hw_eix = BITS_TO_U64(kvm_riscv_aia_max_ids);
+	imsic->vsfile_hgei = imsic->vsfile_cpu = -1;
+
+	/* Setup IMSIC SW-file */
+	swfile_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				  get_order(sizeof(*imsic->swfile)));
+	if (!swfile_page) {
+		ret = -ENOMEM;
+		goto fail_free_imsic;
+	}
+	imsic->swfile = page_to_virt(swfile_page);
+	imsic->swfile_pa = page_to_phys(swfile_page);
+
+	/* Setup IO device */
+	kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+				      vcpu->arch.aia_context.imsic_addr,
+				      KVM_DEV_RISCV_IMSIC_SIZE,
+				      &imsic->iodev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret)
+		goto fail_free_swfile;
+
+	return 0;
+
+fail_free_swfile:
+	free_pages((unsigned long)imsic->swfile,
+		   get_order(sizeof(*imsic->swfile)));
+fail_free_imsic:
+	vcpu->arch.aia_context.imsic_state = NULL;
+	kfree(imsic);
+	return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+	if (!imsic)
+		return;
+
+	imsic_vsfile_cleanup(imsic);
+
+	mutex_lock(&kvm->slots_lock);
+	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &imsic->iodev);
+	mutex_unlock(&kvm->slots_lock);
+
+	free_pages((unsigned long)imsic->swfile,
+		   get_order(sizeof(*imsic->swfile)));
+
+	vcpu->arch.aia_context.imsic_state = NULL;
+	kfree(imsic);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
  2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
                   ` (8 preceding siblings ...)
  2023-05-17 10:51 ` [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC Anup Patel
@ 2023-05-17 10:51 ` Anup Patel
  2023-06-07 23:17   ` Atish Patra
  9 siblings, 1 reply; 20+ messages in thread
From: Anup Patel @ 2023-05-17 10:51 UTC (permalink / raw)
  To: Paolo Bonzini, Atish Patra
  Cc: Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm, kvm-riscv,
	linux-riscv, linux-kernel, Anup Patel

We expose IMSIC registers as KVM device attributes of the in-kernel
AIA irqchip device. This will allow KVM user-space to save/restore
IMISC state of each VCPU using KVM device ioctls().

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_aia.h  |   3 +
 arch/riscv/include/uapi/asm/kvm.h |  12 +++
 arch/riscv/kvm/aia_device.c       |  29 ++++-
 arch/riscv/kvm/aia_imsic.c        | 170 ++++++++++++++++++++++++++++++
 4 files changed, 212 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index a4f6ebf90e31..1f37b600ca47 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -97,6 +97,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
 int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
 				 unsigned long *val, unsigned long new_val,
 				 unsigned long wr_mask);
+int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
+				bool write, unsigned long *val);
+int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type);
 void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
 int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
 				    u32 guest_index, u32 offset, u32 iid);
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index e80210c2220b..624784bb21dd 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -242,6 +242,18 @@ enum KVM_RISCV_SBI_EXT_ID {
 
 #define KVM_DEV_RISCV_AIA_GRP_APLIC		3
 
+#define KVM_DEV_RISCV_AIA_GRP_IMSIC		4
+#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS	12
+#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK	\
+		((1U << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) - 1)
+#define KVM_DEV_RISCV_AIA_IMSIC_MKATTR(__vcpu, __isel)	\
+		(((__vcpu) << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) | \
+		 ((__isel) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK))
+#define KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(__attr)	\
+		((__attr) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK)
+#define KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(__attr)	\
+		((__attr) >> KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS)
+
 /* One single KVM irqchip, ie. the AIA */
 #define KVM_NR_IRQCHIPS			1
 
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 17dba92a90e1..ac7bd98301a3 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -326,7 +326,7 @@ static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	u32 nr;
 	u64 addr;
 	int nr_vcpus, r = -ENXIO;
-	unsigned long type = (unsigned long)attr->attr;
+	unsigned long v, type = (unsigned long)attr->attr;
 	void __user *uaddr = (void __user *)(long)attr->addr;
 
 	switch (attr->group) {
@@ -373,6 +373,15 @@ static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = kvm_riscv_aia_aplic_set_attr(dev->kvm, type, nr);
 		mutex_unlock(&dev->kvm->lock);
 
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+		if (copy_from_user(&v, uaddr, sizeof(v)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, true, &v);
+		mutex_unlock(&dev->kvm->lock);
+
 		break;
 	}
 
@@ -385,7 +394,7 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	u64 addr;
 	int nr_vcpus, r = -ENXIO;
 	void __user *uaddr = (void __user *)(long)attr->addr;
-	unsigned long type = (unsigned long)attr->attr;
+	unsigned long v, type = (unsigned long)attr->attr;
 
 	switch (attr->group) {
 	case KVM_DEV_RISCV_AIA_GRP_CONFIG:
@@ -434,6 +443,20 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		if (copy_to_user(uaddr, &nr, sizeof(nr)))
 			return -EFAULT;
 
+		break;
+	case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+		if (copy_from_user(&v, uaddr, sizeof(v)))
+			return -EFAULT;
+
+		mutex_lock(&dev->kvm->lock);
+		r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, false, &v);
+		mutex_unlock(&dev->kvm->lock);
+		if (r)
+			return r;
+
+		if (copy_to_user(uaddr, &v, sizeof(v)))
+			return -EFAULT;
+
 		break;
 	}
 
@@ -472,6 +495,8 @@ static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		break;
 	case KVM_DEV_RISCV_AIA_GRP_APLIC:
 		return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr);
+	case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+		return kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr);
 	}
 
 	return -ENXIO;
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 2dc09dcb8ab5..8f108cfa80e5 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -277,6 +277,33 @@ static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
 	return 0;
 }
 
+static int imsic_mrif_isel_check(u32 nr_eix, unsigned long isel)
+{
+	u32 num = 0;
+
+	switch (isel) {
+	case IMSIC_EIDELIVERY:
+	case IMSIC_EITHRESHOLD:
+		break;
+	case IMSIC_EIP0 ... IMSIC_EIP63:
+		num = isel - IMSIC_EIP0;
+		break;
+	case IMSIC_EIE0 ... IMSIC_EIE63:
+		num = isel - IMSIC_EIE0;
+		break;
+	default:
+		return -ENOENT;
+	};
+#ifndef CONFIG_32BIT
+	if (num & 0x1)
+		return -EINVAL;
+#endif
+	if ((num / 2) >= nr_eix)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
 			  unsigned long isel, unsigned long *val,
 			  unsigned long new_val, unsigned long wr_mask)
@@ -407,6 +434,86 @@ static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
 			 imsic_vsfile_local_read, &idata, 1);
 }
 
+struct imsic_vsfile_rw_data {
+	int hgei;
+	int isel;
+	bool write;
+	unsigned long val;
+};
+
+static void imsic_vsfile_local_rw(void *data)
+{
+	struct imsic_vsfile_rw_data *idata = data;
+	unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+	old_vsiselect = csr_read(CSR_VSISELECT);
+	old_hstatus = csr_read(CSR_HSTATUS);
+	new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+	new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
+	csr_write(CSR_HSTATUS, new_hstatus);
+
+	switch (idata->isel) {
+	case IMSIC_EIDELIVERY:
+		if (idata->write)
+			imsic_vs_csr_write(IMSIC_EIDELIVERY, idata->val);
+		else
+			idata->val = imsic_vs_csr_read(IMSIC_EIDELIVERY);
+		break;
+	case IMSIC_EITHRESHOLD:
+		if (idata->write)
+			imsic_vs_csr_write(IMSIC_EITHRESHOLD, idata->val);
+		else
+			idata->val = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
+		break;
+	case IMSIC_EIP0 ... IMSIC_EIP63:
+	case IMSIC_EIE0 ... IMSIC_EIE63:
+#ifndef CONFIG_32BIT
+		if (idata->isel & 0x1)
+			break;
+#endif
+		if (idata->write)
+			imsic_eix_write(idata->isel, idata->val);
+		else
+			idata->val = imsic_eix_read(idata->isel);
+		break;
+	default:
+		break;
+	}
+
+	csr_write(CSR_HSTATUS, old_hstatus);
+	csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static int imsic_vsfile_rw(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
+			   unsigned long isel, bool write,
+			   unsigned long *val)
+{
+	int rc;
+	struct imsic_vsfile_rw_data rdata;
+
+	/* We can only access register if we have a IMSIC VS-file */
+	if (vsfile_cpu < 0 || vsfile_hgei <= 0)
+		return -EINVAL;
+
+	/* Check IMSIC register iselect */
+	rc = imsic_mrif_isel_check(nr_eix, isel);
+	if (rc)
+		return rc;
+
+	/* We can only access register on local CPU */
+	rdata.hgei = vsfile_hgei;
+	rdata.isel = isel;
+	rdata.write = write;
+	rdata.val = (write) ? *val : 0;
+	on_each_cpu_mask(cpumask_of(vsfile_cpu),
+			 imsic_vsfile_local_rw, &rdata, 1);
+
+	if (!write)
+		*val = rdata.val;
+
+	return 0;
+}
+
 static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
 {
 	u32 i;
@@ -758,6 +865,69 @@ int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
 	return rc;
 }
 
+int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
+				bool write, unsigned long *val)
+{
+	u32 isel, vcpu_id;
+	unsigned long flags;
+	struct imsic *imsic;
+	struct kvm_vcpu *vcpu;
+	int rc, vsfile_hgei, vsfile_cpu;
+
+	if (!kvm_riscv_aia_initialized(kvm))
+		return -ENODEV;
+
+	vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
+	vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+	if (!vcpu)
+		return -ENODEV;
+
+	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
+	imsic = vcpu->arch.aia_context.imsic_state;
+
+	read_lock_irqsave(&imsic->vsfile_lock, flags);
+
+	rc = 0;
+	vsfile_hgei = imsic->vsfile_hgei;
+	vsfile_cpu = imsic->vsfile_cpu;
+	if (vsfile_cpu < 0) {
+		if (write) {
+			rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
+					    isel, NULL, *val, -1UL);
+			imsic_swfile_extirq_update(vcpu);
+		} else
+			rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
+					    isel, val, 0, 0);
+	}
+
+	read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+	if (!rc && vsfile_cpu >= 0)
+		rc = imsic_vsfile_rw(vsfile_hgei, vsfile_cpu, imsic->nr_eix,
+				     isel, write, val);
+
+	return rc;
+}
+
+int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
+{
+	u32 isel, vcpu_id;
+	struct imsic *imsic;
+	struct kvm_vcpu *vcpu;
+
+	if (!kvm_riscv_aia_initialized(kvm))
+		return -ENODEV;
+
+	vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
+	vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+	if (!vcpu)
+		return -ENODEV;
+
+	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
+	imsic = vcpu->arch.aia_context.imsic_state;
+	return imsic_mrif_isel_check(imsic->nr_eix, isel);
+}
+
 void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
 {
 	struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management
  2023-05-17 10:51 ` [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management Anup Patel
@ 2023-06-06 22:49   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-06 22:49 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:51 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> The RISC-V host will have one guest external interrupt line for each
> VS-level IMSICs associated with a HART. The guest external interrupt
> lines are per-HART resources and hypervisor can use HGEIE, HGEIP, and
> HIE CSRs to manage these guest external interrupt lines.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia.h |  10 ++
>  arch/riscv/kvm/aia.c             | 244 +++++++++++++++++++++++++++++++
>  arch/riscv/kvm/main.c            |   3 +-
>  arch/riscv/kvm/vcpu.c            |   2 +
>  4 files changed, 258 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index 1de0717112e5..0938e0cadf80 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -44,10 +44,15 @@ struct kvm_vcpu_aia {
>
>  #define irqchip_in_kernel(k)           ((k)->arch.aia.in_kernel)
>
> +extern unsigned int kvm_riscv_aia_nr_hgei;
>  DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>  #define kvm_riscv_aia_available() \
>         static_branch_unlikely(&kvm_riscv_aia_available)
>
> +static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
> +{
> +}
> +
>  #define KVM_RISCV_AIA_IMSIC_TOPEI      (ISELECT_MASK + 1)
>  static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
>                                                unsigned long isel,
> @@ -119,6 +124,11 @@ static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
>  {
>  }
>
> +int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
> +                            void __iomem **hgei_va, phys_addr_t *hgei_pa);
> +void kvm_riscv_aia_free_hgei(int cpu, int hgei);
> +void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
> +
>  void kvm_riscv_aia_enable(void);
>  void kvm_riscv_aia_disable(void);
>  int kvm_riscv_aia_init(void);
> diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> index 4f1286fc7f17..1cee75a8c883 100644
> --- a/arch/riscv/kvm/aia.c
> +++ b/arch/riscv/kvm/aia.c
> @@ -8,11 +8,47 @@
>   */
>
>  #include <linux/kernel.h>
> +#include <linux/bitops.h>
> +#include <linux/irq.h>
> +#include <linux/irqdomain.h>
>  #include <linux/kvm_host.h>
> +#include <linux/percpu.h>
> +#include <linux/spinlock.h>
>  #include <asm/hwcap.h>
>
> +struct aia_hgei_control {
> +       raw_spinlock_t lock;
> +       unsigned long free_bitmap;
> +       struct kvm_vcpu *owners[BITS_PER_LONG];
> +};
> +static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
> +static int hgei_parent_irq;
> +
> +unsigned int kvm_riscv_aia_nr_hgei;
>  DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>
> +static int aia_find_hgei(struct kvm_vcpu *owner)
> +{
> +       int i, hgei;
> +       unsigned long flags;
> +       struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
> +
> +       raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +
> +       hgei = -1;
> +       for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
> +               if (hgctrl->owners[i] == owner) {
> +                       hgei = i;
> +                       break;
> +               }
> +       }
> +
> +       raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +
> +       put_cpu_ptr(&aia_hgei);
> +       return hgei;
> +}
> +
>  static void aia_set_hvictl(bool ext_irq_pending)
>  {
>         unsigned long hvictl;
> @@ -56,6 +92,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
>
>  bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
>  {
> +       int hgei;
>         unsigned long seip;
>
>         if (!kvm_riscv_aia_available())
> @@ -74,6 +111,10 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
>         if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
>                 return false;
>
> +       hgei = aia_find_hgei(vcpu);
> +       if (hgei > 0)
> +               return !!(csr_read(CSR_HGEIP) & BIT(hgei));
> +
>         return false;
>  }
>
> @@ -348,6 +389,143 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
>         return KVM_INSN_EXIT_TO_USER_SPACE;
>  }
>
> +int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
> +                            void __iomem **hgei_va, phys_addr_t *hgei_pa)
> +{
> +       int ret = -ENOENT;
> +       unsigned long flags;
> +       struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
> +
> +       if (!kvm_riscv_aia_available() || !hgctrl)
> +               return -ENODEV;
> +
> +       raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +
> +       if (hgctrl->free_bitmap) {
> +               ret = __ffs(hgctrl->free_bitmap);
> +               hgctrl->free_bitmap &= ~BIT(ret);
> +               hgctrl->owners[ret] = owner;
> +       }
> +
> +       raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +
> +       /* TODO: To be updated later by AIA in-kernel irqchip support */
> +       if (hgei_va)
> +               *hgei_va = NULL;
> +       if (hgei_pa)
> +               *hgei_pa = 0;
> +
> +       return ret;
> +}
> +
> +void kvm_riscv_aia_free_hgei(int cpu, int hgei)
> +{
> +       unsigned long flags;
> +       struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
> +
> +       if (!kvm_riscv_aia_available() || !hgctrl)
> +               return;
> +
> +       raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +
> +       if (hgei > 0 && hgei <= kvm_riscv_aia_nr_hgei) {
> +               if (!(hgctrl->free_bitmap & BIT(hgei))) {
> +                       hgctrl->free_bitmap |= BIT(hgei);
> +                       hgctrl->owners[hgei] = NULL;
> +               }
> +       }
> +
> +       raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +}
> +
> +void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
> +{
> +       int hgei;
> +
> +       if (!kvm_riscv_aia_available())
> +               return;
> +
> +       hgei = aia_find_hgei(owner);
> +       if (hgei > 0) {
> +               if (enable)
> +                       csr_set(CSR_HGEIE, BIT(hgei));
> +               else
> +                       csr_clear(CSR_HGEIE, BIT(hgei));
> +       }
> +}
> +
> +static irqreturn_t hgei_interrupt(int irq, void *dev_id)
> +{
> +       int i;
> +       unsigned long hgei_mask, flags;
> +       struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
> +
> +       hgei_mask = csr_read(CSR_HGEIP) & csr_read(CSR_HGEIE);
> +       csr_clear(CSR_HGEIE, hgei_mask);
> +
> +       raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +
> +       for_each_set_bit(i, &hgei_mask, BITS_PER_LONG) {
> +               if (hgctrl->owners[i])
> +                       kvm_vcpu_kick(hgctrl->owners[i]);
> +       }
> +
> +       raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +
> +       put_cpu_ptr(&aia_hgei);
> +       return IRQ_HANDLED;
> +}
> +
> +static int aia_hgei_init(void)
> +{
> +       int cpu, rc;
> +       struct irq_domain *domain;
> +       struct aia_hgei_control *hgctrl;
> +
> +       /* Initialize per-CPU guest external interrupt line management */
> +       for_each_possible_cpu(cpu) {
> +               hgctrl = per_cpu_ptr(&aia_hgei, cpu);
> +               raw_spin_lock_init(&hgctrl->lock);
> +               if (kvm_riscv_aia_nr_hgei) {
> +                       hgctrl->free_bitmap =
> +                               BIT(kvm_riscv_aia_nr_hgei + 1) - 1;
> +                       hgctrl->free_bitmap &= ~BIT(0);
> +               } else
> +                       hgctrl->free_bitmap = 0;
> +       }
> +
> +       /* Find INTC irq domain */
> +       domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
> +                                         DOMAIN_BUS_ANY);
> +       if (!domain) {
> +               kvm_err("unable to find INTC domain\n");
> +               return -ENOENT;
> +       }
> +
> +       /* Map per-CPU SGEI interrupt from INTC domain */
> +       hgei_parent_irq = irq_create_mapping(domain, IRQ_S_GEXT);
> +       if (!hgei_parent_irq) {
> +               kvm_err("unable to map SGEI IRQ\n");
> +               return -ENOMEM;
> +       }
> +
> +       /* Request per-CPU SGEI interrupt */
> +       rc = request_percpu_irq(hgei_parent_irq, hgei_interrupt,
> +                               "riscv-kvm", &aia_hgei);
> +       if (rc) {
> +               kvm_err("failed to request SGEI IRQ\n");
> +               return rc;
> +       }
> +
> +       return 0;
> +}
> +
> +static void aia_hgei_exit(void)
> +{
> +       /* Free per-CPU SGEI interrupt */
> +       free_percpu_irq(hgei_parent_irq, &aia_hgei);
> +}
> +
>  void kvm_riscv_aia_enable(void)
>  {
>         if (!kvm_riscv_aia_available())
> @@ -362,21 +540,82 @@ void kvm_riscv_aia_enable(void)
>         csr_write(CSR_HVIPRIO1H, 0x0);
>         csr_write(CSR_HVIPRIO2H, 0x0);
>  #endif
> +
> +       /* Enable per-CPU SGEI interrupt */
> +       enable_percpu_irq(hgei_parent_irq,
> +                         irq_get_trigger_type(hgei_parent_irq));
> +       csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
>  }
>
>  void kvm_riscv_aia_disable(void)
>  {
> +       int i;
> +       unsigned long flags;
> +       struct kvm_vcpu *vcpu;
> +       struct aia_hgei_control *hgctrl;
> +
>         if (!kvm_riscv_aia_available())
>                 return;
> +       hgctrl = get_cpu_ptr(&aia_hgei);
> +
> +       /* Disable per-CPU SGEI interrupt */
> +       csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
> +       disable_percpu_irq(hgei_parent_irq);
>
>         aia_set_hvictl(false);
> +
> +       raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +
> +       for (i = 0; i <= kvm_riscv_aia_nr_hgei; i++) {
> +               vcpu = hgctrl->owners[i];
> +               if (!vcpu)
> +                       continue;
> +
> +               /*
> +                * We release hgctrl->lock before notifying IMSIC
> +                * so that we don't have lock ordering issues.
> +                */
> +               raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +
> +               /* Notify IMSIC */
> +               kvm_riscv_vcpu_aia_imsic_release(vcpu);
> +
> +               /*
> +                * Wakeup VCPU if it was blocked so that it can
> +                * run on other HARTs
> +                */
> +               if (csr_read(CSR_HGEIE) & BIT(i)) {
> +                       csr_clear(CSR_HGEIE, BIT(i));
> +                       kvm_vcpu_kick(vcpu);
> +               }
> +
> +               raw_spin_lock_irqsave(&hgctrl->lock, flags);
> +       }
> +
> +       raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
> +
> +       put_cpu_ptr(&aia_hgei);
>  }
>
>  int kvm_riscv_aia_init(void)
>  {
> +       int rc;
> +
>         if (!riscv_isa_extension_available(NULL, SxAIA))
>                 return -ENODEV;
>
> +       /* Figure-out number of bits in HGEIE */
> +       csr_write(CSR_HGEIE, -1UL);
> +       kvm_riscv_aia_nr_hgei = fls_long(csr_read(CSR_HGEIE));
> +       csr_write(CSR_HGEIE, 0);
> +       if (kvm_riscv_aia_nr_hgei)
> +               kvm_riscv_aia_nr_hgei--;
> +
> +       /* Initialize guest external interrupt line management */
> +       rc = aia_hgei_init();
> +       if (rc)
> +               return rc;
> +
>         /* Enable KVM AIA support */
>         static_branch_enable(&kvm_riscv_aia_available);
>
> @@ -385,4 +624,9 @@ int kvm_riscv_aia_init(void)
>
>  void kvm_riscv_aia_exit(void)
>  {
> +       if (!kvm_riscv_aia_available())
> +               return;
> +
> +       /* Cleanup the HGEI state */
> +       aia_hgei_exit();
>  }
> diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
> index a7112d583637..48ae0d4b3932 100644
> --- a/arch/riscv/kvm/main.c
> +++ b/arch/riscv/kvm/main.c
> @@ -116,7 +116,8 @@ static int __init riscv_kvm_init(void)
>         kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
>
>         if (kvm_riscv_aia_available())
> -               kvm_info("AIA available\n");
> +               kvm_info("AIA available with %d guest external interrupts\n",
> +                        kvm_riscv_aia_nr_hgei);
>
>         rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
>         if (rc) {
> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> index 8bd9f2a8a0b9..2db62c6c0d3e 100644
> --- a/arch/riscv/kvm/vcpu.c
> +++ b/arch/riscv/kvm/vcpu.c
> @@ -250,10 +250,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
>
>  void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
>  {
> +       kvm_riscv_aia_wakeon_hgei(vcpu, true);
>  }
>
>  void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
>  {
> +       kvm_riscv_aia_wakeon_hgei(vcpu, false);
>  }
>
>  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
> --
> 2.34.1
>



Reviewed-by: Atish Patra <atishp@rivosinc.com>

-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines
  2023-05-17 10:51 ` [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines Anup Patel
@ 2023-06-06 22:51   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-06 22:51 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:51 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We add IMSIC related defines in a separate header so that different
> parts of KVM code can share it. Once AIA drivers are merged will
> have a common IMSIC header shared by both KVM and IRQCHIP driver.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia_imsic.h | 38 ++++++++++++++++++++++++++
>  arch/riscv/kvm/aia.c                   |  3 +-
>  2 files changed, 39 insertions(+), 2 deletions(-)
>  create mode 100644 arch/riscv/include/asm/kvm_aia_imsic.h
>
> diff --git a/arch/riscv/include/asm/kvm_aia_imsic.h b/arch/riscv/include/asm/kvm_aia_imsic.h
> new file mode 100644
> index 000000000000..da5881d2bde0
> --- /dev/null
> +++ b/arch/riscv/include/asm/kvm_aia_imsic.h
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + */
> +#ifndef __KVM_RISCV_AIA_IMSIC_H
> +#define __KVM_RISCV_AIA_IMSIC_H
> +
> +#include <linux/types.h>
> +#include <asm/csr.h>
> +
> +#define IMSIC_MMIO_PAGE_SHIFT          12
> +#define IMSIC_MMIO_PAGE_SZ             (1UL << IMSIC_MMIO_PAGE_SHIFT)
> +#define IMSIC_MMIO_PAGE_LE             0x00
> +#define IMSIC_MMIO_PAGE_BE             0x04
> +
> +#define IMSIC_MIN_ID                   63
> +#define IMSIC_MAX_ID                   2048
> +
> +#define IMSIC_EIDELIVERY               0x70
> +
> +#define IMSIC_EITHRESHOLD              0x72
> +
> +#define IMSIC_EIP0                     0x80
> +#define IMSIC_EIP63                    0xbf
> +#define IMSIC_EIPx_BITS                        32
> +
> +#define IMSIC_EIE0                     0xc0
> +#define IMSIC_EIE63                    0xff
> +#define IMSIC_EIEx_BITS                        32
> +
> +#define IMSIC_FIRST                    IMSIC_EIDELIVERY
> +#define IMSIC_LAST                     IMSIC_EIE63
> +
> +#define IMSIC_MMIO_SETIPNUM_LE         0x00
> +#define IMSIC_MMIO_SETIPNUM_BE         0x04
> +
> +#endif
> diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> index 1cee75a8c883..c78c06d99e39 100644
> --- a/arch/riscv/kvm/aia.c
> +++ b/arch/riscv/kvm/aia.c
> @@ -15,6 +15,7 @@
>  #include <linux/percpu.h>
>  #include <linux/spinlock.h>
>  #include <asm/hwcap.h>
> +#include <asm/kvm_aia_imsic.h>
>
>  struct aia_hgei_control {
>         raw_spinlock_t lock;
> @@ -364,8 +365,6 @@ static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
>         return KVM_INSN_CONTINUE_NEXT_SEPC;
>  }
>
> -#define IMSIC_FIRST    0x70
> -#define IMSIC_LAST     0xff
>  int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
>                                 unsigned long *val, unsigned long new_val,
>                                 unsigned long wr_mask)
> --
> 2.34.1
>


Reviewed-by: Atish Patra <atishp@rivosinc.com>

-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 03/10] RISC-V: KVM: Add APLIC related defines
  2023-05-17 10:51 ` [PATCH 03/10] RISC-V: KVM: Add APLIC " Anup Patel
@ 2023-06-06 22:51   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-06 22:51 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We add APLIC related defines in a separate header so that different
> parts of KVM code can share it. Once AIA drivers are merged will
> have a common APLIC header shared by both KVM and IRQCHIP driver.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia_aplic.h | 58 ++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
>  create mode 100644 arch/riscv/include/asm/kvm_aia_aplic.h
>
> diff --git a/arch/riscv/include/asm/kvm_aia_aplic.h b/arch/riscv/include/asm/kvm_aia_aplic.h
> new file mode 100644
> index 000000000000..6dd1a4809ec1
> --- /dev/null
> +++ b/arch/riscv/include/asm/kvm_aia_aplic.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + */
> +#ifndef __KVM_RISCV_AIA_IMSIC_H
> +#define __KVM_RISCV_AIA_IMSIC_H
> +
> +#include <linux/bitops.h>
> +
> +#define APLIC_MAX_IDC                  BIT(14)
> +#define APLIC_MAX_SOURCE               1024
> +
> +#define APLIC_DOMAINCFG                        0x0000
> +#define APLIC_DOMAINCFG_RDONLY         0x80000000
> +#define APLIC_DOMAINCFG_IE             BIT(8)
> +#define APLIC_DOMAINCFG_DM             BIT(2)
> +#define APLIC_DOMAINCFG_BE             BIT(0)
> +
> +#define APLIC_SOURCECFG_BASE           0x0004
> +#define APLIC_SOURCECFG_D              BIT(10)
> +#define APLIC_SOURCECFG_CHILDIDX_MASK  0x000003ff
> +#define APLIC_SOURCECFG_SM_MASK        0x00000007
> +#define APLIC_SOURCECFG_SM_INACTIVE    0x0
> +#define APLIC_SOURCECFG_SM_DETACH      0x1
> +#define APLIC_SOURCECFG_SM_EDGE_RISE   0x4
> +#define APLIC_SOURCECFG_SM_EDGE_FALL   0x5
> +#define APLIC_SOURCECFG_SM_LEVEL_HIGH  0x6
> +#define APLIC_SOURCECFG_SM_LEVEL_LOW   0x7
> +
> +#define APLIC_IRQBITS_PER_REG          32
> +
> +#define APLIC_SETIP_BASE               0x1c00
> +#define APLIC_SETIPNUM                 0x1cdc
> +
> +#define APLIC_CLRIP_BASE               0x1d00
> +#define APLIC_CLRIPNUM                 0x1ddc
> +
> +#define APLIC_SETIE_BASE               0x1e00
> +#define APLIC_SETIENUM                 0x1edc
> +
> +#define APLIC_CLRIE_BASE               0x1f00
> +#define APLIC_CLRIENUM                 0x1fdc
> +
> +#define APLIC_SETIPNUM_LE              0x2000
> +#define APLIC_SETIPNUM_BE              0x2004
> +
> +#define APLIC_GENMSI                   0x3000
> +
> +#define APLIC_TARGET_BASE              0x3004
> +#define APLIC_TARGET_HART_IDX_SHIFT    18
> +#define APLIC_TARGET_HART_IDX_MASK     0x3fff
> +#define APLIC_TARGET_GUEST_IDX_SHIFT   12
> +#define APLIC_TARGET_GUEST_IDX_MASK    0x3f
> +#define APLIC_TARGET_IPRIO_MASK        0xff
> +#define APLIC_TARGET_EIID_MASK 0x7ff
> +
> +#endif
> --
> 2.34.1
>


Reviewed-by: Atish Patra <atishp@rivosinc.com>
-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
  2023-05-17 10:51 ` [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero Anup Patel
@ 2023-06-06 22:53   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-06 22:53 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We hard-code the kvm_riscv_aia_nr_hgei to zero until IMSIC HW
> guest file support is added in KVM RISC-V.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/kvm/aia.c | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> index c78c06d99e39..3f97575707eb 100644
> --- a/arch/riscv/kvm/aia.c
> +++ b/arch/riscv/kvm/aia.c
> @@ -408,7 +408,7 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
>
>         raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
>
> -       /* TODO: To be updated later by AIA in-kernel irqchip support */
> +       /* TODO: To be updated later by AIA IMSIC HW guest file support */
>         if (hgei_va)
>                 *hgei_va = NULL;
>         if (hgei_pa)
> @@ -610,6 +610,14 @@ int kvm_riscv_aia_init(void)
>         if (kvm_riscv_aia_nr_hgei)
>                 kvm_riscv_aia_nr_hgei--;
>
> +       /*
> +        * Number of usable HGEI lines should be minimum of per-HART
> +        * IMSIC guest files and number of bits in HGEIE
> +        *
> +        * TODO: To be updated later by AIA IMSIC HW guest file support
> +        */
> +       kvm_riscv_aia_nr_hgei = 0;
> +
>         /* Initialize guest external interrupt line management */
>         rc = aia_hgei_init();
>         if (rc)
> --
> 2.34.1
>

Reviewed-by: Atish Patra <atishp@rivosinc.com>

-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support
  2023-05-17 10:51 ` [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support Anup Patel
@ 2023-06-06 23:19   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-06 23:19 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> To incrementally implement in-kernel AIA irqchip support, we first
> add minimal skeletal support which only compiles but does not provide
> any functionality.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia.h  |  20 ++++++
>  arch/riscv/include/asm/kvm_host.h |   4 ++
>  arch/riscv/include/uapi/asm/kvm.h |   4 ++
>  arch/riscv/kvm/Kconfig            |   4 ++
>  arch/riscv/kvm/aia.c              |   8 +++
>  arch/riscv/kvm/vm.c               | 115 ++++++++++++++++++++++++++++++
>  6 files changed, 155 insertions(+)
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index 0938e0cadf80..3bc0a0e47a15 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -45,6 +45,7 @@ struct kvm_vcpu_aia {
>  #define irqchip_in_kernel(k)           ((k)->arch.aia.in_kernel)
>
>  extern unsigned int kvm_riscv_aia_nr_hgei;
> +extern unsigned int kvm_riscv_aia_max_ids;
>  DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>  #define kvm_riscv_aia_available() \
>         static_branch_unlikely(&kvm_riscv_aia_available)
> @@ -116,6 +117,25 @@ static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
>  {
>  }
>
> +static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm,
> +                                                u32 hart_index,
> +                                                u32 guest_index, u32 iid)
> +{
> +       return 0;
> +}
> +
> +static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm,
> +                                          struct kvm_msi *msi)
> +{
> +       return 0;
> +}
> +
> +static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm,
> +                                          unsigned int irq, bool level)
> +{
> +       return 0;
> +}
> +
>  static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
>  {
>  }
> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> index ee0acccb1d3b..871432586a63 100644
> --- a/arch/riscv/include/asm/kvm_host.h
> +++ b/arch/riscv/include/asm/kvm_host.h
> @@ -27,6 +27,8 @@
>
>  #define KVM_VCPU_MAX_FEATURES          0
>
> +#define KVM_IRQCHIP_NUM_PINS           1024
> +
>  #define KVM_REQ_SLEEP \
>         KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_VCPU_RESET             KVM_ARCH_REQ(1)
> @@ -318,6 +320,8 @@ int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
>  bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
>  void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
>
> +int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines);
> +
>  void __kvm_riscv_unpriv_trap(void);
>
>  unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
> diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
> index f92790c9481a..332d4a274891 100644
> --- a/arch/riscv/include/uapi/asm/kvm.h
> +++ b/arch/riscv/include/uapi/asm/kvm.h
> @@ -15,6 +15,7 @@
>  #include <asm/bitsperlong.h>
>  #include <asm/ptrace.h>
>
> +#define __KVM_HAVE_IRQ_LINE
>  #define __KVM_HAVE_READONLY_MEM
>
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
> @@ -203,6 +204,9 @@ enum KVM_RISCV_SBI_EXT_ID {
>  #define KVM_REG_RISCV_SBI_MULTI_REG_LAST       \
>                 KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
>
> +/* One single KVM irqchip, ie. the AIA */
> +#define KVM_NR_IRQCHIPS                        1
> +
>  #endif
>
>  #endif /* __LINUX_KVM_RISCV_H */
> diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
> index 28891e583259..dfc237d7875b 100644
> --- a/arch/riscv/kvm/Kconfig
> +++ b/arch/riscv/kvm/Kconfig
> @@ -21,6 +21,10 @@ config KVM
>         tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
>         depends on RISCV_SBI && MMU
>         select HAVE_KVM_EVENTFD
> +       select HAVE_KVM_IRQCHIP
> +       select HAVE_KVM_IRQFD
> +       select HAVE_KVM_IRQ_ROUTING
> +       select HAVE_KVM_MSI
>         select HAVE_KVM_VCPU_ASYNC_IOCTL
>         select KVM_GENERIC_DIRTYLOG_READ_PROTECT
>         select KVM_GENERIC_HARDWARE_ENABLING
> diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> index 3f97575707eb..18c442c15ff2 100644
> --- a/arch/riscv/kvm/aia.c
> +++ b/arch/riscv/kvm/aia.c
> @@ -26,6 +26,7 @@ static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
>  static int hgei_parent_irq;
>
>  unsigned int kvm_riscv_aia_nr_hgei;
> +unsigned int kvm_riscv_aia_max_ids;
>  DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>
>  static int aia_find_hgei(struct kvm_vcpu *owner)
> @@ -618,6 +619,13 @@ int kvm_riscv_aia_init(void)
>          */
>         kvm_riscv_aia_nr_hgei = 0;
>
> +       /*
> +        * Find number of guest MSI IDs
> +        *
> +        * TODO: To be updated later by AIA IMSIC HW guest file support
> +        */
> +       kvm_riscv_aia_max_ids = IMSIC_MAX_ID;
> +
>         /* Initialize guest external interrupt line management */
>         rc = aia_hgei_init();
>         if (rc)
> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
> index 6ef15f78e80f..d2349326b2ce 100644
> --- a/arch/riscv/kvm/vm.c
> +++ b/arch/riscv/kvm/vm.c
> @@ -55,6 +55,121 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>         kvm_riscv_aia_destroy_vm(kvm);
>  }
>
> +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irql,
> +                         bool line_status)
> +{
> +       if (!irqchip_in_kernel(kvm))
> +               return -ENXIO;
> +
> +       return kvm_riscv_aia_inject_irq(kvm, irql->irq, irql->level);
> +}
> +
> +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
> +               struct kvm *kvm, int irq_source_id,
> +               int level, bool line_status)
> +{
> +       struct kvm_msi msi;
> +
> +       if (!level)
> +               return -1;
> +
> +       msi.address_lo = e->msi.address_lo;
> +       msi.address_hi = e->msi.address_hi;
> +       msi.data = e->msi.data;
> +       msi.flags = e->msi.flags;
> +       msi.devid = e->msi.devid;
> +
> +       return kvm_riscv_aia_inject_msi(kvm, &msi);
> +}
> +
> +static int kvm_riscv_set_irq(struct kvm_kernel_irq_routing_entry *e,
> +                            struct kvm *kvm, int irq_source_id,
> +                            int level, bool line_status)
> +{
> +       return kvm_riscv_aia_inject_irq(kvm, e->irqchip.pin, level);
> +}
> +
> +int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines)
> +{
> +       struct kvm_irq_routing_entry *ents;
> +       int i, rc;
> +
> +       ents = kcalloc(lines, sizeof(*ents), GFP_KERNEL);
> +       if (!ents)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < lines; i++) {
> +               ents[i].gsi = i;
> +               ents[i].type = KVM_IRQ_ROUTING_IRQCHIP;
> +               ents[i].u.irqchip.irqchip = 0;
> +               ents[i].u.irqchip.pin = i;
> +       }
> +       rc = kvm_set_irq_routing(kvm, ents, lines, 0);
> +       kfree(ents);
> +
> +       return rc;
> +}
> +
> +bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
> +{
> +       return irqchip_in_kernel(kvm);
> +}
> +
> +int kvm_set_routing_entry(struct kvm *kvm,
> +                         struct kvm_kernel_irq_routing_entry *e,
> +                         const struct kvm_irq_routing_entry *ue)
> +{
> +       int r = -EINVAL;
> +
> +       switch (ue->type) {
> +       case KVM_IRQ_ROUTING_IRQCHIP:
> +               e->set = kvm_riscv_set_irq;
> +               e->irqchip.irqchip = ue->u.irqchip.irqchip;
> +               e->irqchip.pin = ue->u.irqchip.pin;
> +               if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
> +                   (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
> +                       goto out;
> +               break;
> +       case KVM_IRQ_ROUTING_MSI:
> +               e->set = kvm_set_msi;
> +               e->msi.address_lo = ue->u.msi.address_lo;
> +               e->msi.address_hi = ue->u.msi.address_hi;
> +               e->msi.data = ue->u.msi.data;
> +               e->msi.flags = ue->flags;
> +               e->msi.devid = ue->u.msi.devid;
> +               break;
> +       default:
> +               goto out;
> +       }
> +       r = 0;
> +out:
> +       return r;
> +}
> +
> +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
> +                             struct kvm *kvm, int irq_source_id, int level,
> +                             bool line_status)
> +{
> +       if (!level)
> +               return -EWOULDBLOCK;
> +
> +       switch (e->type) {
> +       case KVM_IRQ_ROUTING_MSI:
> +               return kvm_set_msi(e, kvm, irq_source_id, level, line_status);
> +
> +       case KVM_IRQ_ROUTING_IRQCHIP:
> +               return kvm_riscv_set_irq(e, kvm, irq_source_id,
> +                                        level, line_status);
> +       }
> +
> +       return -EWOULDBLOCK;
> +}
> +
> +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
> +{
> +       return irqchip_in_kernel(kvm);
> +}
> +
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  {
>         int r;
> --
> 2.34.1
>

Reviewed-by: Atish Patra <atishp@rivosinc.com>

-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip
  2023-05-17 10:51 ` [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip Anup Patel
@ 2023-06-07  0:13   ` Atish Patra
  2023-06-07 14:23     ` Anup Patel
  0 siblings, 1 reply; 20+ messages in thread
From: Atish Patra @ 2023-06-07  0:13 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We implement KVM device interface for in-kernel AIA irqchip so that
> user-space can use KVM device ioctls to create, configure, and destroy
> in-kernel AIA irqchip.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia.h  | 132 +++++--
>  arch/riscv/include/uapi/asm/kvm.h |  36 ++
>  arch/riscv/kvm/Makefile           |   1 +
>  arch/riscv/kvm/aia.c              |  11 +
>  arch/riscv/kvm/aia_device.c       | 622 ++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm.h          |   2 +
>  6 files changed, 762 insertions(+), 42 deletions(-)
>  create mode 100644 arch/riscv/kvm/aia_device.c
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index 3bc0a0e47a15..a1281ebc9b92 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -20,6 +20,33 @@ struct kvm_aia {
>
>         /* In-kernel irqchip initialized */
>         bool            initialized;
> +
> +       /* Virtualization mode (Emulation, HW Accelerated, or Auto) */
> +       u32             mode;
> +
> +       /* Number of MSIs */
> +       u32             nr_ids;
> +
> +       /* Number of wired IRQs */
> +       u32             nr_sources;
> +
> +       /* Number of group bits in IMSIC address */
> +       u32             nr_group_bits;
> +
> +       /* Position of group bits in IMSIC address */
> +       u32             nr_group_shift;
> +
> +       /* Number of hart bits in IMSIC address */
> +       u32             nr_hart_bits;
> +
> +       /* Number of guest bits in IMSIC address */
> +       u32             nr_guest_bits;
> +
> +       /* Guest physical address of APLIC */
> +       gpa_t           aplic_addr;
> +
> +       /* Internal state of APLIC */
> +       void            *aplic_state;
>  };
>
>  struct kvm_vcpu_aia_csr {
> @@ -38,8 +65,19 @@ struct kvm_vcpu_aia {
>
>         /* CPU AIA CSR context upon Guest VCPU reset */
>         struct kvm_vcpu_aia_csr guest_reset_csr;
> +
> +       /* Guest physical address of IMSIC for this VCPU */
> +       gpa_t           imsic_addr;
> +
> +       /* HART index of IMSIC extacted from guest physical address */
> +       u32             hart_index;
> +
> +       /* Internal state of IMSIC for this VCPU */
> +       void            *imsic_state;
>  };
>
> +#define KVM_RISCV_AIA_UNDEF_ADDR       (-1)
> +
>  #define kvm_riscv_aia_initialized(k)   ((k)->arch.aia.initialized)
>
>  #define irqchip_in_kernel(k)           ((k)->arch.aia.in_kernel)
> @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>  #define kvm_riscv_aia_available() \
>         static_branch_unlikely(&kvm_riscv_aia_available)
>
> +extern struct kvm_device_ops kvm_riscv_aia_device_ops;
> +
>  static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
>  {
>  }
>
> +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
> +{
> +       return 1;
> +}
> +
>  #define KVM_RISCV_AIA_IMSIC_TOPEI      (ISELECT_MASK + 1)
>  static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
>                                                unsigned long isel,
> @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
>         return 0;
>  }
>
> +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
> +                                                 u32 guest_index, u32 offset,
> +                                                 u32 iid)
> +{
> +       return 0;
> +}
> +
> +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
> +{
> +       return 0;
> +}
> +
> +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
> +                                            u32 source, bool level)
> +{
> +       return 0;
> +}
> +
> +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> +{
> +       return 0;
> +}
> +
> +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> +{
> +}
> +
>  #ifdef CONFIG_32BIT
>  void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
>  void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu);
> @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
>  { .base = CSR_SIREG,      .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \
>  { .base = CSR_STOPEI,     .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei },
>
> -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
> -{
> -       return 1;
> -}
> -
> -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
> -{
> -}
> -
> -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
> -{
> -       return 0;
> -}
> -
> -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
> -{
> -}
> -
> -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm,
> -                                                u32 hart_index,
> -                                                u32 guest_index, u32 iid)
> -{
> -       return 0;
> -}
> -
> -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm,
> -                                          struct kvm_msi *msi)
> -{
> -       return 0;
> -}
> +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu);
> +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu);
>
> -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm,
> -                                          unsigned int irq, bool level)
> -{
> -       return 0;
> -}
> +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
> +                                  u32 guest_index, u32 iid);
> +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
> +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level);
>
> -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
> -{
> -}
> -
> -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
> -{
> -}
> +void kvm_riscv_aia_init_vm(struct kvm *kvm);
> +void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
>
>  int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
>                              void __iomem **hgei_va, phys_addr_t *hgei_pa);
> diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
> index 332d4a274891..57f8d8bb498e 100644
> --- a/arch/riscv/include/uapi/asm/kvm.h
> +++ b/arch/riscv/include/uapi/asm/kvm.h
> @@ -204,6 +204,42 @@ enum KVM_RISCV_SBI_EXT_ID {
>  #define KVM_REG_RISCV_SBI_MULTI_REG_LAST       \
>                 KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
>
> +/* Device Control API: RISC-V AIA */
> +#define KVM_DEV_RISCV_APLIC_ALIGN              0x1000
> +#define KVM_DEV_RISCV_APLIC_SIZE               0x4000
> +#define KVM_DEV_RISCV_APLIC_MAX_HARTS          0x4000
> +#define KVM_DEV_RISCV_IMSIC_ALIGN              0x1000
> +#define KVM_DEV_RISCV_IMSIC_SIZE               0x1000
> +
> +#define KVM_DEV_RISCV_AIA_GRP_CONFIG           0
> +#define KVM_DEV_RISCV_AIA_CONFIG_MODE          0
> +#define KVM_DEV_RISCV_AIA_CONFIG_IDS           1
> +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS          2
> +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS    3
> +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT   4
> +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS     5
> +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS    6

nit: Space here and some comment about each mode would be useful.

> +#define KVM_DEV_RISCV_AIA_MODE_EMUL            0
> +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL         1
> +#define KVM_DEV_RISCV_AIA_MODE_AUTO            2


> +#define KVM_DEV_RISCV_AIA_IDS_MIN              63
> +#define KVM_DEV_RISCV_AIA_IDS_MAX              2048
> +#define KVM_DEV_RISCV_AIA_SRCS_MAX             1024
> +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX       8
> +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN      24
> +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX      56
> +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX        16
> +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX       8
> +
> +#define KVM_DEV_RISCV_AIA_GRP_ADDR             1
> +#define KVM_DEV_RISCV_AIA_ADDR_APLIC           0
> +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu)   (1 + (__vcpu))
> +#define KVM_DEV_RISCV_AIA_ADDR_MAX             \
> +               (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS)
> +
> +#define KVM_DEV_RISCV_AIA_GRP_CTRL             2

Why not keep all KVM_DEV_RISCV_AIA_GRP_* items together ?
There are two more KVM_DEV_RISCV_AIA_GRP_APLIC/IMSIC defined in the
other patches.

I think it would be good to keep the uapi changes in one patch if possible.

> +#define KVM_DEV_RISCV_AIA_CTRL_INIT            0
> +
>  /* One single KVM irqchip, ie. the AIA */
>  #define KVM_NR_IRQCHIPS                        1
>
> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> index 8031b8912a0d..dd69ebe098bd 100644
> --- a/arch/riscv/kvm/Makefile
> +++ b/arch/riscv/kvm/Makefile
> @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o
>  kvm-y += vcpu_timer.o
>  kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
>  kvm-y += aia.o
> +kvm-y += aia_device.o
> diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> index 18c442c15ff2..585a3b42c52c 100644
> --- a/arch/riscv/kvm/aia.c
> +++ b/arch/riscv/kvm/aia.c
> @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void)
>         if (rc)
>                 return rc;
>
> +       /* Register device operations */
> +       rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops,
> +                                    KVM_DEV_TYPE_RISCV_AIA);
> +       if (rc) {
> +               aia_hgei_exit();
> +               return rc;
> +       }
> +
>         /* Enable KVM AIA support */
>         static_branch_enable(&kvm_riscv_aia_available);
>
> @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void)
>         if (!kvm_riscv_aia_available())
>                 return;
>
> +       /* Unregister device operations */
> +       kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA);
> +
>         /* Cleanup the HGEI state */
>         aia_hgei_exit();
>  }
> diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
> new file mode 100644
> index 000000000000..a151fb357887
> --- /dev/null
> +++ b/arch/riscv/kvm/aia_device.c
> @@ -0,0 +1,622 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + *
> + * Authors:
> + *     Anup Patel <apatel@ventanamicro.com>
> + */
> +
> +#include <linux/bits.h>
> +#include <linux/kvm_host.h>
> +#include <linux/uaccess.h>
> +#include <asm/kvm_aia_imsic.h>
> +
> +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
> +{
> +       struct kvm_vcpu *tmp_vcpu;
> +
> +       for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
> +               tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
> +               mutex_unlock(&tmp_vcpu->mutex);
> +       }
> +}
> +
> +static void unlock_all_vcpus(struct kvm *kvm)
> +{
> +       unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
> +}
> +
> +static bool lock_all_vcpus(struct kvm *kvm)
> +{
> +       struct kvm_vcpu *tmp_vcpu;
> +       unsigned long c;
> +
> +       kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
> +               if (!mutex_trylock(&tmp_vcpu->mutex)) {
> +                       unlock_vcpus(kvm, c - 1);
> +                       return false;
> +               }
> +       }
> +
> +       return true;
> +}
> +
> +static int aia_create(struct kvm_device *dev, u32 type)
> +{
> +       int ret;
> +       unsigned long i;
> +       struct kvm *kvm = dev->kvm;
> +       struct kvm_vcpu *vcpu;
> +
> +       if (irqchip_in_kernel(kvm))
> +               return -EEXIST;
> +
> +       ret = -EBUSY;
> +       if (!lock_all_vcpus(kvm))
> +               return ret;
> +
> +       kvm_for_each_vcpu(i, vcpu, kvm) {
> +               if (vcpu->arch.ran_atleast_once)
> +                       goto out_unlock;
> +       }
> +       ret = 0;
> +
> +       kvm->arch.aia.in_kernel = true;
> +
> +out_unlock:
> +       unlock_all_vcpus(kvm);
> +       return ret;
> +}
> +
> +static void aia_destroy(struct kvm_device *dev)
> +{
> +       kfree(dev);
> +}
> +
> +static int aia_config(struct kvm *kvm, unsigned long type,
> +                     u32 *nr, bool write)
> +{
> +       struct kvm_aia *aia = &kvm->arch.aia;
> +
> +       /* Writes can only be done before irqchip is initialized */
> +       if (write && kvm_riscv_aia_initialized(kvm))
> +               return -EBUSY;
> +
> +       switch (type) {
> +       case KVM_DEV_RISCV_AIA_CONFIG_MODE:
> +               if (write) {
> +                       switch (*nr) {
> +                       case KVM_DEV_RISCV_AIA_MODE_EMUL:
> +                               break;
> +                       case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
> +                       case KVM_DEV_RISCV_AIA_MODE_AUTO:
> +                               /*
> +                                * HW Acceleration and Auto modes only
> +                                * supported on host with non-zero guest
> +                                * external interrupts (i.e. non-zero
> +                                * VS-level IMSIC pages).
> +                                */
> +                               if (!kvm_riscv_aia_nr_hgei)
> +                                       return -EINVAL;
> +                               break;
> +                       default:
> +                               return -EINVAL;
> +                       };
> +                       aia->mode = *nr;
> +               } else
> +                       *nr = aia->mode;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_IDS:
> +               if (write) {
> +                       if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) ||
> +                           (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) ||
> +                           (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) ||
> +                           (kvm_riscv_aia_max_ids <= *nr))
> +                               return -EINVAL;
> +                       aia->nr_ids = *nr;
> +               } else
> +                       *nr = aia->nr_ids;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
> +               if (write) {
> +                       if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) ||
> +                           (*nr >= kvm_riscv_aia_max_ids))
> +                               return -EINVAL;
> +                       aia->nr_sources = *nr;
> +               } else
> +                       *nr = aia->nr_sources;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
> +               if (write) {
> +                       if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX)
> +                               return -EINVAL;
> +                       aia->nr_group_bits = *nr;
> +               } else
> +                       *nr = aia->nr_group_bits;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
> +               if (write) {
> +                       if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) ||
> +                           (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX))
> +                               return -EINVAL;
> +                       aia->nr_group_shift = *nr;
> +               } else
> +                       *nr = aia->nr_group_shift;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
> +               if (write) {
> +                       if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX)
> +                               return -EINVAL;
> +                       aia->nr_hart_bits = *nr;
> +               } else
> +                       *nr = aia->nr_hart_bits;
> +               break;
> +       case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
> +               if (write) {
> +                       if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX)
> +                               return -EINVAL;
> +                       aia->nr_guest_bits = *nr;
> +               } else
> +                       *nr = aia->nr_guest_bits;
> +               break;
> +       default:
> +               return -ENXIO;
> +       };
> +
> +       return 0;
> +}
> +
> +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write)
> +{
> +       struct kvm_aia *aia = &kvm->arch.aia;
> +
> +       if (write) {
> +               /* Writes can only be done before irqchip is initialized */
> +               if (kvm_riscv_aia_initialized(kvm))
> +                       return -EBUSY;
> +
> +               if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1))
> +                       return -EINVAL;
> +
> +               aia->aplic_addr = *addr;
> +       } else
> +               *addr = aia->aplic_addr;
> +
> +       return 0;
> +}
> +
> +static int aia_imsic_addr(struct kvm *kvm, u64 *addr,
> +                         unsigned long vcpu_idx, bool write)
> +{
> +       struct kvm_vcpu *vcpu;
> +       struct kvm_vcpu_aia *vcpu_aia;
> +
> +       vcpu = kvm_get_vcpu(kvm, vcpu_idx);
> +       if (!vcpu)
> +               return -EINVAL;
> +       vcpu_aia = &vcpu->arch.aia_context;
> +
> +       if (write) {
> +               /* Writes can only be done before irqchip is initialized */
> +               if (kvm_riscv_aia_initialized(kvm))
> +                       return -EBUSY;
> +
> +               if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1))
> +                       return -EINVAL;
> +       }
> +
> +       mutex_lock(&vcpu->mutex);
> +       if (write)
> +               vcpu_aia->imsic_addr = *addr;
> +       else
> +               *addr = vcpu_aia->imsic_addr;
> +       mutex_unlock(&vcpu->mutex);
> +
> +       return 0;
> +}
> +
> +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
> +{
> +       u32 h, l;
> +       gpa_t mask = 0;
> +
> +       h = aia->nr_hart_bits + aia->nr_guest_bits +
> +           IMSIC_MMIO_PAGE_SHIFT - 1;
> +       mask = GENMASK_ULL(h, 0);
> +
> +       if (aia->nr_group_bits) {
> +               h = aia->nr_group_bits + aia->nr_group_shift - 1;
> +               l = aia->nr_group_shift;
> +               mask |= GENMASK_ULL(h, l);
> +       }
> +
> +       return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT;
> +}
> +
> +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
> +{
> +       u32 hart, group = 0;
> +
> +       hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
> +               GENMASK_ULL(aia->nr_hart_bits - 1, 0);
> +       if (aia->nr_group_bits)
> +               group = (addr >> aia->nr_group_shift) &
> +                       GENMASK_ULL(aia->nr_group_bits - 1, 0);
> +
> +       return (group << aia->nr_hart_bits) | hart;
> +}
> +
> +static int aia_init(struct kvm *kvm)
> +{
> +       int ret, i;
> +       unsigned long idx;
> +       struct kvm_vcpu *vcpu;
> +       struct kvm_vcpu_aia *vaia;
> +       struct kvm_aia *aia = &kvm->arch.aia;
> +       gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR;
> +
> +       /* Irqchip can be initialized only once */
> +       if (kvm_riscv_aia_initialized(kvm))
> +               return -EBUSY;
> +
> +       /* We might be in the middle of creating a VCPU? */
> +       if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
> +               return -EBUSY;
> +
> +       /* Number of sources should be less than or equals number of IDs */
> +       if (aia->nr_ids < aia->nr_sources)
> +               return -EINVAL;
> +
> +       /* APLIC base is required for non-zero number of sources */
> +       if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR)
> +               return -EINVAL;
> +
> +       /* Initialize APLIC */
> +       ret = kvm_riscv_aia_aplic_init(kvm);
> +       if (ret)
> +               return ret;
> +
> +       /* Iterate over each VCPU */
> +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> +               vaia = &vcpu->arch.aia_context;
> +
> +               /* IMSIC base is required */
> +               if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) {
> +                       ret = -EINVAL;
> +                       goto fail_cleanup_imsics;
> +               }
> +
> +               /* All IMSICs should have matching base PPN */
> +               if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR)
> +                       base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr);
> +               if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) {
> +                       ret = -EINVAL;
> +                       goto fail_cleanup_imsics;
> +               }
> +
> +               /* Update HART index of the IMSIC based on IMSIC base */
> +               vaia->hart_index = aia_imsic_hart_index(aia,
> +                                                       vaia->imsic_addr);
> +
> +               /* Initialize IMSIC for this VCPU */
> +               ret = kvm_riscv_vcpu_aia_imsic_init(vcpu);
> +               if (ret)
> +                       goto fail_cleanup_imsics;
> +       }
> +
> +       /* Set the initialized flag */
> +       kvm->arch.aia.initialized = true;
> +
> +       return 0;
> +
> +fail_cleanup_imsics:
> +       for (i = idx - 1; i >= 0; i--) {
> +               vcpu = kvm_get_vcpu(kvm, i);
> +               if (!vcpu)
> +                       continue;
> +               kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
> +       }
> +       kvm_riscv_aia_aplic_cleanup(kvm);
> +       return ret;
> +}
> +
> +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +       u32 nr;
> +       u64 addr;
> +       int nr_vcpus, r = -ENXIO;
> +       unsigned long type = (unsigned long)attr->attr;
> +       void __user *uaddr = (void __user *)(long)attr->addr;
> +
> +       switch (attr->group) {
> +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> +               if (copy_from_user(&nr, uaddr, sizeof(nr)))
> +                       return -EFAULT;
> +
> +               mutex_lock(&dev->kvm->lock);
> +               r = aia_config(dev->kvm, type, &nr, true);
> +               mutex_unlock(&dev->kvm->lock);
> +
> +               break;
> +
> +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> +               if (copy_from_user(&addr, uaddr, sizeof(addr)))
> +                       return -EFAULT;
> +
> +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> +               mutex_lock(&dev->kvm->lock);
> +               if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> +                       r = aia_aplic_addr(dev->kvm, &addr, true);
> +               else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> +                       r = aia_imsic_addr(dev->kvm, &addr,
> +                           type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true);
> +               mutex_unlock(&dev->kvm->lock);
> +
> +               break;
> +
> +       case KVM_DEV_RISCV_AIA_GRP_CTRL:
> +               switch (type) {
> +               case KVM_DEV_RISCV_AIA_CTRL_INIT:
> +                       mutex_lock(&dev->kvm->lock);
> +                       r = aia_init(dev->kvm);
> +                       mutex_unlock(&dev->kvm->lock);
> +                       break;
> +               }
> +
> +               break;
> +       }
> +
> +       return r;
> +}
> +
> +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +       u32 nr;
> +       u64 addr;
> +       int nr_vcpus, r = -ENXIO;
> +       void __user *uaddr = (void __user *)(long)attr->addr;
> +       unsigned long type = (unsigned long)attr->attr;
> +
> +       switch (attr->group) {
> +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> +               if (copy_from_user(&nr, uaddr, sizeof(nr)))
> +                       return -EFAULT;
> +
> +               mutex_lock(&dev->kvm->lock);
> +               r = aia_config(dev->kvm, type, &nr, false);
> +               mutex_unlock(&dev->kvm->lock);
> +               if (r)
> +                       return r;
> +
> +               if (copy_to_user(uaddr, &nr, sizeof(nr)))
> +                       return -EFAULT;
> +
> +               break;
> +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> +               if (copy_from_user(&addr, uaddr, sizeof(addr)))
> +                       return -EFAULT;
> +
> +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> +               mutex_lock(&dev->kvm->lock);
> +               if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> +                       r = aia_aplic_addr(dev->kvm, &addr, false);
> +               else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> +                       r = aia_imsic_addr(dev->kvm, &addr,
> +                           type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false);
> +               mutex_unlock(&dev->kvm->lock);
> +               if (r)
> +                       return r;
> +
> +               if (copy_to_user(uaddr, &addr, sizeof(addr)))
> +                       return -EFAULT;
> +
> +               break;
> +       }
> +
> +       return r;
> +}
> +
> +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +       int nr_vcpus;
> +
> +       switch (attr->group) {
> +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> +               switch (attr->attr) {
> +               case KVM_DEV_RISCV_AIA_CONFIG_MODE:
> +               case KVM_DEV_RISCV_AIA_CONFIG_IDS:
> +               case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
> +               case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
> +               case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
> +               case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
> +               case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
> +                       return 0;
> +               }
> +               break;
> +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> +               if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> +                       return 0;
> +               else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> +                       return 0;
> +               break;
> +       case KVM_DEV_RISCV_AIA_GRP_CTRL:
> +               switch (attr->attr) {
> +               case KVM_DEV_RISCV_AIA_CTRL_INIT:
> +                       return 0;
> +               }
> +               break;
> +       }
> +
> +       return -ENXIO;
> +}
> +
> +struct kvm_device_ops kvm_riscv_aia_device_ops = {
> +       .name = "kvm-riscv-aia",
> +       .create = aia_create,
> +       .destroy = aia_destroy,
> +       .set_attr = aia_set_attr,
> +       .get_attr = aia_get_attr,
> +       .has_attr = aia_has_attr,
> +};
> +
> +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
> +{
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> +               return 1;
> +
> +       /* Update the IMSIC HW state before entering guest mode */
> +       return kvm_riscv_vcpu_aia_imsic_update(vcpu);
> +}
> +
> +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
> +       struct kvm_vcpu_aia_csr *reset_csr =
> +                               &vcpu->arch.aia_context.guest_reset_csr;
> +
> +       if (!kvm_riscv_aia_available())
> +               return;
> +       memcpy(csr, reset_csr, sizeof(*csr));
> +
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> +               return;
> +
> +       /* Reset the IMSIC context */
> +       kvm_riscv_vcpu_aia_imsic_reset(vcpu);
> +}
> +
> +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
> +
> +       if (!kvm_riscv_aia_available())
> +               return 0;
> +
> +       /*
> +        * We don't do any memory allocations over here because these
> +        * will be done after AIA device is initialized by the user-space.
> +        *
> +        * Refer, aia_init() implementation for more details.
> +        */
> +
> +       /* Initialize default values in AIA vcpu context */
> +       vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
> +       vaia->hart_index = vcpu->vcpu_idx;
> +
> +       return 0;
> +}
> +
> +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
> +{
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> +               return;
> +
> +       /* Cleanup IMSIC context */
> +       kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
> +}
> +
> +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
> +                                  u32 guest_index, u32 iid)
> +{
> +       unsigned long idx;
> +       struct kvm_vcpu *vcpu;
> +
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return -EBUSY;
> +
> +       /* Inject MSI to matching VCPU */
> +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> +               if (vcpu->arch.aia_context.hart_index == hart_index)
> +                       return kvm_riscv_vcpu_aia_imsic_inject(vcpu,
> +                                                              guest_index,
> +                                                              0, iid);
> +       }
> +
> +       return 0;
> +}
> +
> +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
> +{
> +       gpa_t tppn, ippn;
> +       unsigned long idx;
> +       struct kvm_vcpu *vcpu;
> +       u32 g, toff, iid = msi->data;
> +       struct kvm_aia *aia = &kvm->arch.aia;
> +       gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo;
> +
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return -EBUSY;
> +
> +       /* Convert target address to target PPN */
> +       tppn = target >> IMSIC_MMIO_PAGE_SHIFT;
> +
> +       /* Extract and clear Guest ID from target PPN */
> +       g = tppn & (BIT(aia->nr_guest_bits) - 1);
> +       tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1));
> +
> +       /* Inject MSI to matching VCPU */
> +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> +               ippn = vcpu->arch.aia_context.imsic_addr >>
> +                                       IMSIC_MMIO_PAGE_SHIFT;
> +               if (ippn == tppn) {
> +                       toff = target & (IMSIC_MMIO_PAGE_SZ - 1);
> +                       return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g,
> +                                                              toff, iid);
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level)
> +{
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return -EBUSY;
> +
> +       /* Inject interrupt level change in APLIC */
> +       return kvm_riscv_aia_aplic_inject(kvm, irq, level);
> +}
> +
> +void kvm_riscv_aia_init_vm(struct kvm *kvm)
> +{
> +       struct kvm_aia *aia = &kvm->arch.aia;
> +
> +       if (!kvm_riscv_aia_available())
> +               return;
> +
> +       /*
> +        * We don't do any memory allocations over here because these
> +        * will be done after AIA device is initialized by the user-space.
> +        *
> +        * Refer, aia_init() implementation for more details.
> +        */
> +
> +       /* Initialize default values in AIA global context */
> +       aia->mode = (kvm_riscv_aia_nr_hgei) ?
> +               KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL;
> +       aia->nr_ids = kvm_riscv_aia_max_ids - 1;
> +       aia->nr_sources = 0;
> +       aia->nr_group_bits = 0;
> +       aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN;
> +       aia->nr_hart_bits = 0;
> +       aia->nr_guest_bits = 0;
> +       aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
> +}
> +
> +void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
> +{
> +       /* Proceed only if AIA was initialized successfully */
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return;
> +
> +       /* Cleanup APLIC context */
> +       kvm_riscv_aia_aplic_cleanup(kvm);
> +}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 737318b1c1d9..27ccd07898e1 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1442,6 +1442,8 @@ enum kvm_device_type {
>  #define KVM_DEV_TYPE_XIVE              KVM_DEV_TYPE_XIVE
>         KVM_DEV_TYPE_ARM_PV_TIME,
>  #define KVM_DEV_TYPE_ARM_PV_TIME       KVM_DEV_TYPE_ARM_PV_TIME
> +       KVM_DEV_TYPE_RISCV_AIA,
> +#define KVM_DEV_TYPE_RISCV_AIA         KVM_DEV_TYPE_RISCV_AIA
>         KVM_DEV_TYPE_MAX,
>  };
>
> --
> 2.34.1
>

Apart from the above comments, LGTM.

-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip
  2023-06-07  0:13   ` Atish Patra
@ 2023-06-07 14:23     ` Anup Patel
  0 siblings, 0 replies; 20+ messages in thread
From: Anup Patel @ 2023-06-07 14:23 UTC (permalink / raw)
  To: Atish Patra
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, Jun 7, 2023 at 5:44 AM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
> >
> > We implement KVM device interface for in-kernel AIA irqchip so that
> > user-space can use KVM device ioctls to create, configure, and destroy
> > in-kernel AIA irqchip.
> >
> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> > ---
> >  arch/riscv/include/asm/kvm_aia.h  | 132 +++++--
> >  arch/riscv/include/uapi/asm/kvm.h |  36 ++
> >  arch/riscv/kvm/Makefile           |   1 +
> >  arch/riscv/kvm/aia.c              |  11 +
> >  arch/riscv/kvm/aia_device.c       | 622 ++++++++++++++++++++++++++++++
> >  include/uapi/linux/kvm.h          |   2 +
> >  6 files changed, 762 insertions(+), 42 deletions(-)
> >  create mode 100644 arch/riscv/kvm/aia_device.c
> >
> > diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> > index 3bc0a0e47a15..a1281ebc9b92 100644
> > --- a/arch/riscv/include/asm/kvm_aia.h
> > +++ b/arch/riscv/include/asm/kvm_aia.h
> > @@ -20,6 +20,33 @@ struct kvm_aia {
> >
> >         /* In-kernel irqchip initialized */
> >         bool            initialized;
> > +
> > +       /* Virtualization mode (Emulation, HW Accelerated, or Auto) */
> > +       u32             mode;
> > +
> > +       /* Number of MSIs */
> > +       u32             nr_ids;
> > +
> > +       /* Number of wired IRQs */
> > +       u32             nr_sources;
> > +
> > +       /* Number of group bits in IMSIC address */
> > +       u32             nr_group_bits;
> > +
> > +       /* Position of group bits in IMSIC address */
> > +       u32             nr_group_shift;
> > +
> > +       /* Number of hart bits in IMSIC address */
> > +       u32             nr_hart_bits;
> > +
> > +       /* Number of guest bits in IMSIC address */
> > +       u32             nr_guest_bits;
> > +
> > +       /* Guest physical address of APLIC */
> > +       gpa_t           aplic_addr;
> > +
> > +       /* Internal state of APLIC */
> > +       void            *aplic_state;
> >  };
> >
> >  struct kvm_vcpu_aia_csr {
> > @@ -38,8 +65,19 @@ struct kvm_vcpu_aia {
> >
> >         /* CPU AIA CSR context upon Guest VCPU reset */
> >         struct kvm_vcpu_aia_csr guest_reset_csr;
> > +
> > +       /* Guest physical address of IMSIC for this VCPU */
> > +       gpa_t           imsic_addr;
> > +
> > +       /* HART index of IMSIC extacted from guest physical address */
> > +       u32             hart_index;
> > +
> > +       /* Internal state of IMSIC for this VCPU */
> > +       void            *imsic_state;
> >  };
> >
> > +#define KVM_RISCV_AIA_UNDEF_ADDR       (-1)
> > +
> >  #define kvm_riscv_aia_initialized(k)   ((k)->arch.aia.initialized)
> >
> >  #define irqchip_in_kernel(k)           ((k)->arch.aia.in_kernel)
> > @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
> >  #define kvm_riscv_aia_available() \
> >         static_branch_unlikely(&kvm_riscv_aia_available)
> >
> > +extern struct kvm_device_ops kvm_riscv_aia_device_ops;
> > +
> >  static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
> >  {
> >  }
> >
> > +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
> > +{
> > +       return 1;
> > +}
> > +
> >  #define KVM_RISCV_AIA_IMSIC_TOPEI      (ISELECT_MASK + 1)
> >  static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
> >                                                unsigned long isel,
> > @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
> >         return 0;
> >  }
> >
> > +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > +
> > +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
> > +                                                 u32 guest_index, u32 offset,
> > +                                                 u32 iid)
> > +{
> > +       return 0;
> > +}
> > +
> > +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
> > +{
> > +       return 0;
> > +}
> > +
> > +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > +
> > +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm,
> > +                                            u32 source, bool level)
> > +{
> > +       return 0;
> > +}
> > +
> > +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm)
> > +{
> > +       return 0;
> > +}
> > +
> > +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
> > +{
> > +}
> > +
> >  #ifdef CONFIG_32BIT
> >  void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
> >  void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu);
> > @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
> >  { .base = CSR_SIREG,      .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \
> >  { .base = CSR_STOPEI,     .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei },
> >
> > -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
> > -{
> > -       return 1;
> > -}
> > -
> > -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
> > -{
> > -}
> > -
> > -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
> > -{
> > -       return 0;
> > -}
> > -
> > -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
> > -{
> > -}
> > -
> > -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm,
> > -                                                u32 hart_index,
> > -                                                u32 guest_index, u32 iid)
> > -{
> > -       return 0;
> > -}
> > -
> > -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm,
> > -                                          struct kvm_msi *msi)
> > -{
> > -       return 0;
> > -}
> > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu);
> > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu);
> > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu);
> > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu);
> >
> > -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm,
> > -                                          unsigned int irq, bool level)
> > -{
> > -       return 0;
> > -}
> > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
> > +                                  u32 guest_index, u32 iid);
> > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
> > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level);
> >
> > -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
> > -{
> > -}
> > -
> > -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
> > -{
> > -}
> > +void kvm_riscv_aia_init_vm(struct kvm *kvm);
> > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
> >
> >  int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
> >                              void __iomem **hgei_va, phys_addr_t *hgei_pa);
> > diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
> > index 332d4a274891..57f8d8bb498e 100644
> > --- a/arch/riscv/include/uapi/asm/kvm.h
> > +++ b/arch/riscv/include/uapi/asm/kvm.h
> > @@ -204,6 +204,42 @@ enum KVM_RISCV_SBI_EXT_ID {
> >  #define KVM_REG_RISCV_SBI_MULTI_REG_LAST       \
> >                 KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
> >
> > +/* Device Control API: RISC-V AIA */
> > +#define KVM_DEV_RISCV_APLIC_ALIGN              0x1000
> > +#define KVM_DEV_RISCV_APLIC_SIZE               0x4000
> > +#define KVM_DEV_RISCV_APLIC_MAX_HARTS          0x4000
> > +#define KVM_DEV_RISCV_IMSIC_ALIGN              0x1000
> > +#define KVM_DEV_RISCV_IMSIC_SIZE               0x1000
> > +
> > +#define KVM_DEV_RISCV_AIA_GRP_CONFIG           0
> > +#define KVM_DEV_RISCV_AIA_CONFIG_MODE          0
> > +#define KVM_DEV_RISCV_AIA_CONFIG_IDS           1
> > +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS          2
> > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS    3
> > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT   4
> > +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS     5
> > +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS    6
>
> nit: Space here and some comment about each mode would be useful.

Okay, I will update.

>
> > +#define KVM_DEV_RISCV_AIA_MODE_EMUL            0
> > +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL         1
> > +#define KVM_DEV_RISCV_AIA_MODE_AUTO            2
>
>
> > +#define KVM_DEV_RISCV_AIA_IDS_MIN              63
> > +#define KVM_DEV_RISCV_AIA_IDS_MAX              2048
> > +#define KVM_DEV_RISCV_AIA_SRCS_MAX             1024
> > +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX       8
> > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN      24
> > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX      56
> > +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX        16
> > +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX       8
> > +
> > +#define KVM_DEV_RISCV_AIA_GRP_ADDR             1
> > +#define KVM_DEV_RISCV_AIA_ADDR_APLIC           0
> > +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu)   (1 + (__vcpu))
> > +#define KVM_DEV_RISCV_AIA_ADDR_MAX             \
> > +               (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS)
> > +
> > +#define KVM_DEV_RISCV_AIA_GRP_CTRL             2
>
> Why not keep all KVM_DEV_RISCV_AIA_GRP_* items together ?
> There are two more KVM_DEV_RISCV_AIA_GRP_APLIC/IMSIC defined in the
> other patches.
>
> I think it would be good to keep the uapi changes in one patch if possible.

I have divided changes among patches based on where the
defines are used.

>
> > +#define KVM_DEV_RISCV_AIA_CTRL_INIT            0
> > +
> >  /* One single KVM irqchip, ie. the AIA */
> >  #define KVM_NR_IRQCHIPS                        1
> >
> > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> > index 8031b8912a0d..dd69ebe098bd 100644
> > --- a/arch/riscv/kvm/Makefile
> > +++ b/arch/riscv/kvm/Makefile
> > @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o
> >  kvm-y += vcpu_timer.o
> >  kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
> >  kvm-y += aia.o
> > +kvm-y += aia_device.o
> > diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
> > index 18c442c15ff2..585a3b42c52c 100644
> > --- a/arch/riscv/kvm/aia.c
> > +++ b/arch/riscv/kvm/aia.c
> > @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void)
> >         if (rc)
> >                 return rc;
> >
> > +       /* Register device operations */
> > +       rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops,
> > +                                    KVM_DEV_TYPE_RISCV_AIA);
> > +       if (rc) {
> > +               aia_hgei_exit();
> > +               return rc;
> > +       }
> > +
> >         /* Enable KVM AIA support */
> >         static_branch_enable(&kvm_riscv_aia_available);
> >
> > @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void)
> >         if (!kvm_riscv_aia_available())
> >                 return;
> >
> > +       /* Unregister device operations */
> > +       kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA);
> > +
> >         /* Cleanup the HGEI state */
> >         aia_hgei_exit();
> >  }
> > diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
> > new file mode 100644
> > index 000000000000..a151fb357887
> > --- /dev/null
> > +++ b/arch/riscv/kvm/aia_device.c
> > @@ -0,0 +1,622 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> > + * Copyright (C) 2022 Ventana Micro Systems Inc.
> > + *
> > + * Authors:
> > + *     Anup Patel <apatel@ventanamicro.com>
> > + */
> > +
> > +#include <linux/bits.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/uaccess.h>
> > +#include <asm/kvm_aia_imsic.h>
> > +
> > +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
> > +{
> > +       struct kvm_vcpu *tmp_vcpu;
> > +
> > +       for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
> > +               tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
> > +               mutex_unlock(&tmp_vcpu->mutex);
> > +       }
> > +}
> > +
> > +static void unlock_all_vcpus(struct kvm *kvm)
> > +{
> > +       unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
> > +}
> > +
> > +static bool lock_all_vcpus(struct kvm *kvm)
> > +{
> > +       struct kvm_vcpu *tmp_vcpu;
> > +       unsigned long c;
> > +
> > +       kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
> > +               if (!mutex_trylock(&tmp_vcpu->mutex)) {
> > +                       unlock_vcpus(kvm, c - 1);
> > +                       return false;
> > +               }
> > +       }
> > +
> > +       return true;
> > +}
> > +
> > +static int aia_create(struct kvm_device *dev, u32 type)
> > +{
> > +       int ret;
> > +       unsigned long i;
> > +       struct kvm *kvm = dev->kvm;
> > +       struct kvm_vcpu *vcpu;
> > +
> > +       if (irqchip_in_kernel(kvm))
> > +               return -EEXIST;
> > +
> > +       ret = -EBUSY;
> > +       if (!lock_all_vcpus(kvm))
> > +               return ret;
> > +
> > +       kvm_for_each_vcpu(i, vcpu, kvm) {
> > +               if (vcpu->arch.ran_atleast_once)
> > +                       goto out_unlock;
> > +       }
> > +       ret = 0;
> > +
> > +       kvm->arch.aia.in_kernel = true;
> > +
> > +out_unlock:
> > +       unlock_all_vcpus(kvm);
> > +       return ret;
> > +}
> > +
> > +static void aia_destroy(struct kvm_device *dev)
> > +{
> > +       kfree(dev);
> > +}
> > +
> > +static int aia_config(struct kvm *kvm, unsigned long type,
> > +                     u32 *nr, bool write)
> > +{
> > +       struct kvm_aia *aia = &kvm->arch.aia;
> > +
> > +       /* Writes can only be done before irqchip is initialized */
> > +       if (write && kvm_riscv_aia_initialized(kvm))
> > +               return -EBUSY;
> > +
> > +       switch (type) {
> > +       case KVM_DEV_RISCV_AIA_CONFIG_MODE:
> > +               if (write) {
> > +                       switch (*nr) {
> > +                       case KVM_DEV_RISCV_AIA_MODE_EMUL:
> > +                               break;
> > +                       case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
> > +                       case KVM_DEV_RISCV_AIA_MODE_AUTO:
> > +                               /*
> > +                                * HW Acceleration and Auto modes only
> > +                                * supported on host with non-zero guest
> > +                                * external interrupts (i.e. non-zero
> > +                                * VS-level IMSIC pages).
> > +                                */
> > +                               if (!kvm_riscv_aia_nr_hgei)
> > +                                       return -EINVAL;
> > +                               break;
> > +                       default:
> > +                               return -EINVAL;
> > +                       };
> > +                       aia->mode = *nr;
> > +               } else
> > +                       *nr = aia->mode;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_IDS:
> > +               if (write) {
> > +                       if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) ||
> > +                           (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) ||
> > +                           (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) ||
> > +                           (kvm_riscv_aia_max_ids <= *nr))
> > +                               return -EINVAL;
> > +                       aia->nr_ids = *nr;
> > +               } else
> > +                       *nr = aia->nr_ids;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
> > +               if (write) {
> > +                       if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) ||
> > +                           (*nr >= kvm_riscv_aia_max_ids))
> > +                               return -EINVAL;
> > +                       aia->nr_sources = *nr;
> > +               } else
> > +                       *nr = aia->nr_sources;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
> > +               if (write) {
> > +                       if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX)
> > +                               return -EINVAL;
> > +                       aia->nr_group_bits = *nr;
> > +               } else
> > +                       *nr = aia->nr_group_bits;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
> > +               if (write) {
> > +                       if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) ||
> > +                           (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX))
> > +                               return -EINVAL;
> > +                       aia->nr_group_shift = *nr;
> > +               } else
> > +                       *nr = aia->nr_group_shift;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
> > +               if (write) {
> > +                       if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX)
> > +                               return -EINVAL;
> > +                       aia->nr_hart_bits = *nr;
> > +               } else
> > +                       *nr = aia->nr_hart_bits;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
> > +               if (write) {
> > +                       if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX)
> > +                               return -EINVAL;
> > +                       aia->nr_guest_bits = *nr;
> > +               } else
> > +                       *nr = aia->nr_guest_bits;
> > +               break;
> > +       default:
> > +               return -ENXIO;
> > +       };
> > +
> > +       return 0;
> > +}
> > +
> > +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write)
> > +{
> > +       struct kvm_aia *aia = &kvm->arch.aia;
> > +
> > +       if (write) {
> > +               /* Writes can only be done before irqchip is initialized */
> > +               if (kvm_riscv_aia_initialized(kvm))
> > +                       return -EBUSY;
> > +
> > +               if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1))
> > +                       return -EINVAL;
> > +
> > +               aia->aplic_addr = *addr;
> > +       } else
> > +               *addr = aia->aplic_addr;
> > +
> > +       return 0;
> > +}
> > +
> > +static int aia_imsic_addr(struct kvm *kvm, u64 *addr,
> > +                         unsigned long vcpu_idx, bool write)
> > +{
> > +       struct kvm_vcpu *vcpu;
> > +       struct kvm_vcpu_aia *vcpu_aia;
> > +
> > +       vcpu = kvm_get_vcpu(kvm, vcpu_idx);
> > +       if (!vcpu)
> > +               return -EINVAL;
> > +       vcpu_aia = &vcpu->arch.aia_context;
> > +
> > +       if (write) {
> > +               /* Writes can only be done before irqchip is initialized */
> > +               if (kvm_riscv_aia_initialized(kvm))
> > +                       return -EBUSY;
> > +
> > +               if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1))
> > +                       return -EINVAL;
> > +       }
> > +
> > +       mutex_lock(&vcpu->mutex);
> > +       if (write)
> > +               vcpu_aia->imsic_addr = *addr;
> > +       else
> > +               *addr = vcpu_aia->imsic_addr;
> > +       mutex_unlock(&vcpu->mutex);
> > +
> > +       return 0;
> > +}
> > +
> > +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
> > +{
> > +       u32 h, l;
> > +       gpa_t mask = 0;
> > +
> > +       h = aia->nr_hart_bits + aia->nr_guest_bits +
> > +           IMSIC_MMIO_PAGE_SHIFT - 1;
> > +       mask = GENMASK_ULL(h, 0);
> > +
> > +       if (aia->nr_group_bits) {
> > +               h = aia->nr_group_bits + aia->nr_group_shift - 1;
> > +               l = aia->nr_group_shift;
> > +               mask |= GENMASK_ULL(h, l);
> > +       }
> > +
> > +       return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT;
> > +}
> > +
> > +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
> > +{
> > +       u32 hart, group = 0;
> > +
> > +       hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
> > +               GENMASK_ULL(aia->nr_hart_bits - 1, 0);
> > +       if (aia->nr_group_bits)
> > +               group = (addr >> aia->nr_group_shift) &
> > +                       GENMASK_ULL(aia->nr_group_bits - 1, 0);
> > +
> > +       return (group << aia->nr_hart_bits) | hart;
> > +}
> > +
> > +static int aia_init(struct kvm *kvm)
> > +{
> > +       int ret, i;
> > +       unsigned long idx;
> > +       struct kvm_vcpu *vcpu;
> > +       struct kvm_vcpu_aia *vaia;
> > +       struct kvm_aia *aia = &kvm->arch.aia;
> > +       gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR;
> > +
> > +       /* Irqchip can be initialized only once */
> > +       if (kvm_riscv_aia_initialized(kvm))
> > +               return -EBUSY;
> > +
> > +       /* We might be in the middle of creating a VCPU? */
> > +       if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
> > +               return -EBUSY;
> > +
> > +       /* Number of sources should be less than or equals number of IDs */
> > +       if (aia->nr_ids < aia->nr_sources)
> > +               return -EINVAL;
> > +
> > +       /* APLIC base is required for non-zero number of sources */
> > +       if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR)
> > +               return -EINVAL;
> > +
> > +       /* Initialize APLIC */
> > +       ret = kvm_riscv_aia_aplic_init(kvm);
> > +       if (ret)
> > +               return ret;
> > +
> > +       /* Iterate over each VCPU */
> > +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> > +               vaia = &vcpu->arch.aia_context;
> > +
> > +               /* IMSIC base is required */
> > +               if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) {
> > +                       ret = -EINVAL;
> > +                       goto fail_cleanup_imsics;
> > +               }
> > +
> > +               /* All IMSICs should have matching base PPN */
> > +               if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR)
> > +                       base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr);
> > +               if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) {
> > +                       ret = -EINVAL;
> > +                       goto fail_cleanup_imsics;
> > +               }
> > +
> > +               /* Update HART index of the IMSIC based on IMSIC base */
> > +               vaia->hart_index = aia_imsic_hart_index(aia,
> > +                                                       vaia->imsic_addr);
> > +
> > +               /* Initialize IMSIC for this VCPU */
> > +               ret = kvm_riscv_vcpu_aia_imsic_init(vcpu);
> > +               if (ret)
> > +                       goto fail_cleanup_imsics;
> > +       }
> > +
> > +       /* Set the initialized flag */
> > +       kvm->arch.aia.initialized = true;
> > +
> > +       return 0;
> > +
> > +fail_cleanup_imsics:
> > +       for (i = idx - 1; i >= 0; i--) {
> > +               vcpu = kvm_get_vcpu(kvm, i);
> > +               if (!vcpu)
> > +                       continue;
> > +               kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
> > +       }
> > +       kvm_riscv_aia_aplic_cleanup(kvm);
> > +       return ret;
> > +}
> > +
> > +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +       u32 nr;
> > +       u64 addr;
> > +       int nr_vcpus, r = -ENXIO;
> > +       unsigned long type = (unsigned long)attr->attr;
> > +       void __user *uaddr = (void __user *)(long)attr->addr;
> > +
> > +       switch (attr->group) {
> > +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> > +               if (copy_from_user(&nr, uaddr, sizeof(nr)))
> > +                       return -EFAULT;
> > +
> > +               mutex_lock(&dev->kvm->lock);
> > +               r = aia_config(dev->kvm, type, &nr, true);
> > +               mutex_unlock(&dev->kvm->lock);
> > +
> > +               break;
> > +
> > +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> > +               if (copy_from_user(&addr, uaddr, sizeof(addr)))
> > +                       return -EFAULT;
> > +
> > +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> > +               mutex_lock(&dev->kvm->lock);
> > +               if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> > +                       r = aia_aplic_addr(dev->kvm, &addr, true);
> > +               else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> > +                       r = aia_imsic_addr(dev->kvm, &addr,
> > +                           type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true);
> > +               mutex_unlock(&dev->kvm->lock);
> > +
> > +               break;
> > +
> > +       case KVM_DEV_RISCV_AIA_GRP_CTRL:
> > +               switch (type) {
> > +               case KVM_DEV_RISCV_AIA_CTRL_INIT:
> > +                       mutex_lock(&dev->kvm->lock);
> > +                       r = aia_init(dev->kvm);
> > +                       mutex_unlock(&dev->kvm->lock);
> > +                       break;
> > +               }
> > +
> > +               break;
> > +       }
> > +
> > +       return r;
> > +}
> > +
> > +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +       u32 nr;
> > +       u64 addr;
> > +       int nr_vcpus, r = -ENXIO;
> > +       void __user *uaddr = (void __user *)(long)attr->addr;
> > +       unsigned long type = (unsigned long)attr->attr;
> > +
> > +       switch (attr->group) {
> > +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> > +               if (copy_from_user(&nr, uaddr, sizeof(nr)))
> > +                       return -EFAULT;
> > +
> > +               mutex_lock(&dev->kvm->lock);
> > +               r = aia_config(dev->kvm, type, &nr, false);
> > +               mutex_unlock(&dev->kvm->lock);
> > +               if (r)
> > +                       return r;
> > +
> > +               if (copy_to_user(uaddr, &nr, sizeof(nr)))
> > +                       return -EFAULT;
> > +
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> > +               if (copy_from_user(&addr, uaddr, sizeof(addr)))
> > +                       return -EFAULT;
> > +
> > +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> > +               mutex_lock(&dev->kvm->lock);
> > +               if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> > +                       r = aia_aplic_addr(dev->kvm, &addr, false);
> > +               else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> > +                       r = aia_imsic_addr(dev->kvm, &addr,
> > +                           type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false);
> > +               mutex_unlock(&dev->kvm->lock);
> > +               if (r)
> > +                       return r;
> > +
> > +               if (copy_to_user(uaddr, &addr, sizeof(addr)))
> > +                       return -EFAULT;
> > +
> > +               break;
> > +       }
> > +
> > +       return r;
> > +}
> > +
> > +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +       int nr_vcpus;
> > +
> > +       switch (attr->group) {
> > +       case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> > +               switch (attr->attr) {
> > +               case KVM_DEV_RISCV_AIA_CONFIG_MODE:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_IDS:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
> > +               case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
> > +                       return 0;
> > +               }
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_GRP_ADDR:
> > +               nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
> > +               if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC)
> > +                       return 0;
> > +               else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
> > +                       return 0;
> > +               break;
> > +       case KVM_DEV_RISCV_AIA_GRP_CTRL:
> > +               switch (attr->attr) {
> > +               case KVM_DEV_RISCV_AIA_CTRL_INIT:
> > +                       return 0;
> > +               }
> > +               break;
> > +       }
> > +
> > +       return -ENXIO;
> > +}
> > +
> > +struct kvm_device_ops kvm_riscv_aia_device_ops = {
> > +       .name = "kvm-riscv-aia",
> > +       .create = aia_create,
> > +       .destroy = aia_destroy,
> > +       .set_attr = aia_set_attr,
> > +       .get_attr = aia_get_attr,
> > +       .has_attr = aia_has_attr,
> > +};
> > +
> > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
> > +{
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> > +               return 1;
> > +
> > +       /* Update the IMSIC HW state before entering guest mode */
> > +       return kvm_riscv_vcpu_aia_imsic_update(vcpu);
> > +}
> > +
> > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
> > +{
> > +       struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
> > +       struct kvm_vcpu_aia_csr *reset_csr =
> > +                               &vcpu->arch.aia_context.guest_reset_csr;
> > +
> > +       if (!kvm_riscv_aia_available())
> > +               return;
> > +       memcpy(csr, reset_csr, sizeof(*csr));
> > +
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> > +               return;
> > +
> > +       /* Reset the IMSIC context */
> > +       kvm_riscv_vcpu_aia_imsic_reset(vcpu);
> > +}
> > +
> > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
> > +{
> > +       struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
> > +
> > +       if (!kvm_riscv_aia_available())
> > +               return 0;
> > +
> > +       /*
> > +        * We don't do any memory allocations over here because these
> > +        * will be done after AIA device is initialized by the user-space.
> > +        *
> > +        * Refer, aia_init() implementation for more details.
> > +        */
> > +
> > +       /* Initialize default values in AIA vcpu context */
> > +       vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
> > +       vaia->hart_index = vcpu->vcpu_idx;
> > +
> > +       return 0;
> > +}
> > +
> > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
> > +{
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(vcpu->kvm))
> > +               return;
> > +
> > +       /* Cleanup IMSIC context */
> > +       kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
> > +}
> > +
> > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
> > +                                  u32 guest_index, u32 iid)
> > +{
> > +       unsigned long idx;
> > +       struct kvm_vcpu *vcpu;
> > +
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(kvm))
> > +               return -EBUSY;
> > +
> > +       /* Inject MSI to matching VCPU */
> > +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> > +               if (vcpu->arch.aia_context.hart_index == hart_index)
> > +                       return kvm_riscv_vcpu_aia_imsic_inject(vcpu,
> > +                                                              guest_index,
> > +                                                              0, iid);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
> > +{
> > +       gpa_t tppn, ippn;
> > +       unsigned long idx;
> > +       struct kvm_vcpu *vcpu;
> > +       u32 g, toff, iid = msi->data;
> > +       struct kvm_aia *aia = &kvm->arch.aia;
> > +       gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo;
> > +
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(kvm))
> > +               return -EBUSY;
> > +
> > +       /* Convert target address to target PPN */
> > +       tppn = target >> IMSIC_MMIO_PAGE_SHIFT;
> > +
> > +       /* Extract and clear Guest ID from target PPN */
> > +       g = tppn & (BIT(aia->nr_guest_bits) - 1);
> > +       tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1));
> > +
> > +       /* Inject MSI to matching VCPU */
> > +       kvm_for_each_vcpu(idx, vcpu, kvm) {
> > +               ippn = vcpu->arch.aia_context.imsic_addr >>
> > +                                       IMSIC_MMIO_PAGE_SHIFT;
> > +               if (ippn == tppn) {
> > +                       toff = target & (IMSIC_MMIO_PAGE_SZ - 1);
> > +                       return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g,
> > +                                                              toff, iid);
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level)
> > +{
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(kvm))
> > +               return -EBUSY;
> > +
> > +       /* Inject interrupt level change in APLIC */
> > +       return kvm_riscv_aia_aplic_inject(kvm, irq, level);
> > +}
> > +
> > +void kvm_riscv_aia_init_vm(struct kvm *kvm)
> > +{
> > +       struct kvm_aia *aia = &kvm->arch.aia;
> > +
> > +       if (!kvm_riscv_aia_available())
> > +               return;
> > +
> > +       /*
> > +        * We don't do any memory allocations over here because these
> > +        * will be done after AIA device is initialized by the user-space.
> > +        *
> > +        * Refer, aia_init() implementation for more details.
> > +        */
> > +
> > +       /* Initialize default values in AIA global context */
> > +       aia->mode = (kvm_riscv_aia_nr_hgei) ?
> > +               KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL;
> > +       aia->nr_ids = kvm_riscv_aia_max_ids - 1;
> > +       aia->nr_sources = 0;
> > +       aia->nr_group_bits = 0;
> > +       aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN;
> > +       aia->nr_hart_bits = 0;
> > +       aia->nr_guest_bits = 0;
> > +       aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
> > +}
> > +
> > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
> > +{
> > +       /* Proceed only if AIA was initialized successfully */
> > +       if (!kvm_riscv_aia_initialized(kvm))
> > +               return;
> > +
> > +       /* Cleanup APLIC context */
> > +       kvm_riscv_aia_aplic_cleanup(kvm);
> > +}
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 737318b1c1d9..27ccd07898e1 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1442,6 +1442,8 @@ enum kvm_device_type {
> >  #define KVM_DEV_TYPE_XIVE              KVM_DEV_TYPE_XIVE
> >         KVM_DEV_TYPE_ARM_PV_TIME,
> >  #define KVM_DEV_TYPE_ARM_PV_TIME       KVM_DEV_TYPE_ARM_PV_TIME
> > +       KVM_DEV_TYPE_RISCV_AIA,
> > +#define KVM_DEV_TYPE_RISCV_AIA         KVM_DEV_TYPE_RISCV_AIA
> >         KVM_DEV_TYPE_MAX,
> >  };
> >
> > --
> > 2.34.1
> >
>
> Apart from the above comments, LGTM.
>
> --
> Regards,
> Atish

Regards,
Anup

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
  2023-05-17 10:51 ` [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC Anup Patel
@ 2023-06-07 23:16   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-07 23:16 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We can have AIA IMSIC support for both HS-level and VS-level but
> the VS-level IMSICs are optional. We use the VS-level IMSICs for
> Guest/VM whenever available otherwise we fallback to software
> emulation of AIA IMSIC.
>
> This patch adds in-kernel virtualization of AIA IMSIC.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia.h |  46 +-
>  arch/riscv/kvm/Makefile          |   1 +
>  arch/riscv/kvm/aia_imsic.c       | 913 +++++++++++++++++++++++++++++++
>  3 files changed, 924 insertions(+), 36 deletions(-)
>  create mode 100644 arch/riscv/kvm/aia_imsic.c
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index ba939c0054aa..a4f6ebf90e31 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -90,44 +90,18 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
>
>  extern struct kvm_device_ops kvm_riscv_aia_device_ops;
>
> -static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
> -{
> -}
> -
> -static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
> -{
> -       return 1;
> -}
> +void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
> +int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
>
>  #define KVM_RISCV_AIA_IMSIC_TOPEI      (ISELECT_MASK + 1)
> -static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
> -                                              unsigned long isel,
> -                                              unsigned long *val,
> -                                              unsigned long new_val,
> -                                              unsigned long wr_mask)
> -{
> -       return 0;
> -}
> -
> -static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
> -{
> -}
> -
> -static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
> -                                                 u32 guest_index, u32 offset,
> -                                                 u32 iid)
> -{
> -       return 0;
> -}
> -
> -static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
> -{
> -       return 0;
> -}
> -
> -static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> -{
> -}
> +int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
> +                                unsigned long *val, unsigned long new_val,
> +                                unsigned long wr_mask);
> +void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
> +int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
> +                                   u32 guest_index, u32 offset, u32 iid);
> +int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu);
> +void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu);
>
>  int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v);
>  int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v);
> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> index 94c43702c765..c1d1356387ff 100644
> --- a/arch/riscv/kvm/Makefile
> +++ b/arch/riscv/kvm/Makefile
> @@ -29,3 +29,4 @@ kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
>  kvm-y += aia.o
>  kvm-y += aia_device.o
>  kvm-y += aia_aplic.o
> +kvm-y += aia_imsic.o
> diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
> new file mode 100644
> index 000000000000..2dc09dcb8ab5
> --- /dev/null
> +++ b/arch/riscv/kvm/aia_imsic.c
> @@ -0,0 +1,913 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + *
> + * Authors:
> + *     Anup Patel <apatel@ventanamicro.com>
> + */
> +
> +#include <linux/bitmap.h>
> +#include <linux/kvm_host.h>
> +#include <linux/math.h>
> +#include <linux/spinlock.h>
> +#include <linux/swab.h>
> +#include <kvm/iodev.h>
> +#include <asm/csr.h>
> +#include <asm/kvm_aia_imsic.h>
> +
> +#define IMSIC_MAX_EIX  (IMSIC_MAX_ID / BITS_PER_TYPE(u64))
> +
> +struct imsic_mrif_eix {
> +       unsigned long eip[BITS_PER_TYPE(u64) / BITS_PER_LONG];
> +       unsigned long eie[BITS_PER_TYPE(u64) / BITS_PER_LONG];
> +};
> +
> +struct imsic_mrif {
> +       struct imsic_mrif_eix eix[IMSIC_MAX_EIX];
> +       unsigned long eithreshold;
> +       unsigned long eidelivery;
> +};
> +
> +struct imsic {
> +       struct kvm_io_device iodev;
> +
> +       u32 nr_msis;
> +       u32 nr_eix;
> +       u32 nr_hw_eix;
> +
> +       /*
> +        * At any point in time, the register state is in
> +        * one of the following places:
> +        *
> +        * 1) Hardware: IMSIC VS-file (vsfile_cpu >= 0)
> +        * 2) Software: IMSIC SW-file (vsfile_cpu < 0)
> +        */
> +
> +       /* IMSIC VS-file */
> +       rwlock_t vsfile_lock;
> +       int vsfile_cpu;
> +       int vsfile_hgei;
> +       void __iomem *vsfile_va;
> +       phys_addr_t vsfile_pa;
> +
> +       /* IMSIC SW-file */
> +       struct imsic_mrif *swfile;
> +       phys_addr_t swfile_pa;
> +};
> +
> +#define imsic_vs_csr_read(__c)                 \
> +({                                             \
> +       unsigned long __r;                      \
> +       csr_write(CSR_VSISELECT, __c);          \
> +       __r = csr_read(CSR_VSIREG);             \
> +       __r;                                    \
> +})
> +
> +#define imsic_read_switchcase(__ireg)                  \
> +       case __ireg:                                    \
> +               return imsic_vs_csr_read(__ireg);
> +#define imsic_read_switchcase_2(__ireg)                        \
> +       imsic_read_switchcase(__ireg + 0)               \
> +       imsic_read_switchcase(__ireg + 1)
> +#define imsic_read_switchcase_4(__ireg)                        \
> +       imsic_read_switchcase_2(__ireg + 0)             \
> +       imsic_read_switchcase_2(__ireg + 2)
> +#define imsic_read_switchcase_8(__ireg)                        \
> +       imsic_read_switchcase_4(__ireg + 0)             \
> +       imsic_read_switchcase_4(__ireg + 4)
> +#define imsic_read_switchcase_16(__ireg)               \
> +       imsic_read_switchcase_8(__ireg + 0)             \
> +       imsic_read_switchcase_8(__ireg + 8)
> +#define imsic_read_switchcase_32(__ireg)               \
> +       imsic_read_switchcase_16(__ireg + 0)            \
> +       imsic_read_switchcase_16(__ireg + 16)
> +#define imsic_read_switchcase_64(__ireg)               \
> +       imsic_read_switchcase_32(__ireg + 0)            \
> +       imsic_read_switchcase_32(__ireg + 32)
> +
> +static unsigned long imsic_eix_read(int ireg)
> +{
> +       switch (ireg) {
> +       imsic_read_switchcase_64(IMSIC_EIP0)
> +       imsic_read_switchcase_64(IMSIC_EIE0)
> +       };
> +
> +       return 0;
> +}
> +
> +#define imsic_vs_csr_swap(__c, __v)            \
> +({                                             \
> +       unsigned long __r;                      \
> +       csr_write(CSR_VSISELECT, __c);          \
> +       __r = csr_swap(CSR_VSIREG, __v);        \
> +       __r;                                    \
> +})
> +
> +#define imsic_swap_switchcase(__ireg, __v)             \
> +       case __ireg:                                    \
> +               return imsic_vs_csr_swap(__ireg, __v);
> +#define imsic_swap_switchcase_2(__ireg, __v)           \
> +       imsic_swap_switchcase(__ireg + 0, __v)          \
> +       imsic_swap_switchcase(__ireg + 1, __v)
> +#define imsic_swap_switchcase_4(__ireg, __v)           \
> +       imsic_swap_switchcase_2(__ireg + 0, __v)        \
> +       imsic_swap_switchcase_2(__ireg + 2, __v)
> +#define imsic_swap_switchcase_8(__ireg, __v)           \
> +       imsic_swap_switchcase_4(__ireg + 0, __v)        \
> +       imsic_swap_switchcase_4(__ireg + 4, __v)
> +#define imsic_swap_switchcase_16(__ireg, __v)          \
> +       imsic_swap_switchcase_8(__ireg + 0, __v)        \
> +       imsic_swap_switchcase_8(__ireg + 8, __v)
> +#define imsic_swap_switchcase_32(__ireg, __v)          \
> +       imsic_swap_switchcase_16(__ireg + 0, __v)       \
> +       imsic_swap_switchcase_16(__ireg + 16, __v)
> +#define imsic_swap_switchcase_64(__ireg, __v)          \
> +       imsic_swap_switchcase_32(__ireg + 0, __v)       \
> +       imsic_swap_switchcase_32(__ireg + 32, __v)
> +
> +static unsigned long imsic_eix_swap(int ireg, unsigned long val)
> +{
> +       switch (ireg) {
> +       imsic_swap_switchcase_64(IMSIC_EIP0, val)
> +       imsic_swap_switchcase_64(IMSIC_EIE0, val)
> +       };
> +
> +       return 0;
> +}
> +
> +#define imsic_vs_csr_write(__c, __v)           \
> +do {                                           \
> +       csr_write(CSR_VSISELECT, __c);          \
> +       csr_write(CSR_VSIREG, __v);             \
> +} while (0)
> +
> +#define imsic_write_switchcase(__ireg, __v)            \
> +       case __ireg:                                    \
> +               imsic_vs_csr_write(__ireg, __v);        \
> +               break;
> +#define imsic_write_switchcase_2(__ireg, __v)          \
> +       imsic_write_switchcase(__ireg + 0, __v)         \
> +       imsic_write_switchcase(__ireg + 1, __v)
> +#define imsic_write_switchcase_4(__ireg, __v)          \
> +       imsic_write_switchcase_2(__ireg + 0, __v)       \
> +       imsic_write_switchcase_2(__ireg + 2, __v)
> +#define imsic_write_switchcase_8(__ireg, __v)          \
> +       imsic_write_switchcase_4(__ireg + 0, __v)       \
> +       imsic_write_switchcase_4(__ireg + 4, __v)
> +#define imsic_write_switchcase_16(__ireg, __v)         \
> +       imsic_write_switchcase_8(__ireg + 0, __v)       \
> +       imsic_write_switchcase_8(__ireg + 8, __v)
> +#define imsic_write_switchcase_32(__ireg, __v)         \
> +       imsic_write_switchcase_16(__ireg + 0, __v)      \
> +       imsic_write_switchcase_16(__ireg + 16, __v)
> +#define imsic_write_switchcase_64(__ireg, __v)         \
> +       imsic_write_switchcase_32(__ireg + 0, __v)      \
> +       imsic_write_switchcase_32(__ireg + 32, __v)
> +
> +static void imsic_eix_write(int ireg, unsigned long val)
> +{
> +       switch (ireg) {
> +       imsic_write_switchcase_64(IMSIC_EIP0, val)
> +       imsic_write_switchcase_64(IMSIC_EIE0, val)
> +       };
> +}
> +
> +#define imsic_vs_csr_set(__c, __v)             \
> +do {                                           \
> +       csr_write(CSR_VSISELECT, __c);          \
> +       csr_set(CSR_VSIREG, __v);               \
> +} while (0)
> +
> +#define imsic_set_switchcase(__ireg, __v)              \
> +       case __ireg:                                    \
> +               imsic_vs_csr_set(__ireg, __v);          \
> +               break;
> +#define imsic_set_switchcase_2(__ireg, __v)            \
> +       imsic_set_switchcase(__ireg + 0, __v)           \
> +       imsic_set_switchcase(__ireg + 1, __v)
> +#define imsic_set_switchcase_4(__ireg, __v)            \
> +       imsic_set_switchcase_2(__ireg + 0, __v)         \
> +       imsic_set_switchcase_2(__ireg + 2, __v)
> +#define imsic_set_switchcase_8(__ireg, __v)            \
> +       imsic_set_switchcase_4(__ireg + 0, __v)         \
> +       imsic_set_switchcase_4(__ireg + 4, __v)
> +#define imsic_set_switchcase_16(__ireg, __v)           \
> +       imsic_set_switchcase_8(__ireg + 0, __v)         \
> +       imsic_set_switchcase_8(__ireg + 8, __v)
> +#define imsic_set_switchcase_32(__ireg, __v)           \
> +       imsic_set_switchcase_16(__ireg + 0, __v)        \
> +       imsic_set_switchcase_16(__ireg + 16, __v)
> +#define imsic_set_switchcase_64(__ireg, __v)           \
> +       imsic_set_switchcase_32(__ireg + 0, __v)        \
> +       imsic_set_switchcase_32(__ireg + 32, __v)
> +
> +static void imsic_eix_set(int ireg, unsigned long val)
> +{
> +       switch (ireg) {
> +       imsic_set_switchcase_64(IMSIC_EIP0, val)
> +       imsic_set_switchcase_64(IMSIC_EIE0, val)
> +       };
> +}
> +
> +static unsigned long imsic_mrif_atomic_rmw(struct imsic_mrif *mrif,
> +                                          unsigned long *ptr,
> +                                          unsigned long new_val,
> +                                          unsigned long wr_mask)
> +{
> +       unsigned long old_val = 0, tmp = 0;
> +
> +       __asm__ __volatile__ (
> +               "0:     lr.w.aq   %1, %0\n"
> +               "       and       %2, %1, %3\n"
> +               "       or        %2, %2, %4\n"
> +               "       sc.w.rl   %2, %2, %0\n"
> +               "       bnez      %2, 0b"
> +               : "+A" (*ptr), "+r" (old_val), "+r" (tmp)
> +               : "r" (~wr_mask), "r" (new_val & wr_mask)
> +               : "memory");
> +
> +       return old_val;
> +}
> +
> +static unsigned long imsic_mrif_atomic_or(struct imsic_mrif *mrif,
> +                                         unsigned long *ptr,
> +                                         unsigned long val)
> +{
> +       return arch_atomic_long_fetch_or(val, (atomic_long_t *)ptr);
> +}
> +
> +#define imsic_mrif_atomic_write(__mrif, __ptr, __new_val)      \
> +               imsic_mrif_atomic_rmw(__mrif, __ptr, __new_val, -1UL)
> +#define imsic_mrif_atomic_read(__mrif, __ptr)                  \
> +               imsic_mrif_atomic_or(__mrif, __ptr, 0)
> +
> +static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
> +{
> +       struct imsic_mrif_eix *eix;
> +       u32 i, imin, imax, ei, max_msi;
> +       unsigned long eipend[BITS_PER_TYPE(u64) / BITS_PER_LONG];
> +       unsigned long eithreshold = imsic_mrif_atomic_read(mrif,
> +                                                       &mrif->eithreshold);
> +
> +       max_msi = (eithreshold && (eithreshold <= nr_msis)) ?
> +                  eithreshold : nr_msis;
> +       for (ei = 0; ei < nr_eix; ei++) {
> +               eix = &mrif->eix[ei];
> +               eipend[0] = imsic_mrif_atomic_read(mrif, &eix->eie[0]) &
> +                           imsic_mrif_atomic_read(mrif, &eix->eip[0]);
> +#ifdef CONFIG_32BIT
> +               eipend[1] = imsic_mrif_atomic_read(mrif, &eix->eie[1]) &
> +                           imsic_mrif_atomic_read(mrif, &eix->eip[1]);
> +               if (!eipend[0] && !eipend[1])
> +#else
> +               if (!eipend[0])
> +#endif
> +                       continue;
> +
> +               imin = ei * BITS_PER_TYPE(u64);
> +               imax = ((imin + BITS_PER_TYPE(u64)) < max_msi) ?
> +                       imin + BITS_PER_TYPE(u64) : max_msi;
> +               for (i = (!imin) ? 1 : imin; i < imax; i++) {
> +                       if (test_bit(i - imin, eipend))
> +                               return (i << TOPEI_ID_SHIFT) | i;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
> +                         unsigned long isel, unsigned long *val,
> +                         unsigned long new_val, unsigned long wr_mask)
> +{
> +       bool pend;
> +       struct imsic_mrif_eix *eix;
> +       unsigned long *ei, num, old_val = 0;
> +
> +       switch (isel) {
> +       case IMSIC_EIDELIVERY:
> +               old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eidelivery,
> +                                               new_val, wr_mask & 0x1);
> +               break;
> +       case IMSIC_EITHRESHOLD:
> +               old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eithreshold,
> +                               new_val, wr_mask & (IMSIC_MAX_ID - 1));
> +               break;
> +       case IMSIC_EIP0 ... IMSIC_EIP63:
> +       case IMSIC_EIE0 ... IMSIC_EIE63:
> +               if (isel >= IMSIC_EIP0 && isel <= IMSIC_EIP63) {
> +                       pend = true;
> +                       num = isel - IMSIC_EIP0;
> +               } else {
> +                       pend = false;
> +                       num = isel - IMSIC_EIE0;
> +               }
> +
> +               if ((num / 2) >= nr_eix)
> +                       return -EINVAL;
> +               eix = &mrif->eix[num / 2];
> +
> +#ifndef CONFIG_32BIT
> +               if (num & 0x1)
> +                       return -EINVAL;
> +               ei = (pend) ? &eix->eip[0] : &eix->eie[0];
> +#else
> +               ei = (pend) ? &eix->eip[num & 0x1] : &eix->eie[num & 0x1];
> +#endif
> +
> +               /* Bit0 of EIP0 or EIE0 is read-only */
> +               if (!num)
> +                       wr_mask &= ~BIT(0);
> +
> +               old_val = imsic_mrif_atomic_rmw(mrif, ei, new_val, wr_mask);
> +               break;
> +       default:
> +               return -ENOENT;
> +       };
> +
> +       if (val)
> +               *val = old_val;
> +
> +       return 0;
> +}
> +
> +struct imsic_vsfile_read_data {
> +       int hgei;
> +       u32 nr_eix;
> +       bool clear;
> +       struct imsic_mrif *mrif;
> +};
> +
> +static void imsic_vsfile_local_read(void *data)
> +{
> +       u32 i;
> +       struct imsic_mrif_eix *eix;
> +       struct imsic_vsfile_read_data *idata = data;
> +       struct imsic_mrif *mrif = idata->mrif;
> +       unsigned long new_hstatus, old_hstatus, old_vsiselect;
> +
> +       old_vsiselect = csr_read(CSR_VSISELECT);
> +       old_hstatus = csr_read(CSR_HSTATUS);
> +       new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
> +       new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
> +       csr_write(CSR_HSTATUS, new_hstatus);
> +
> +       /*
> +        * We don't use imsic_mrif_atomic_xyz() functions to store
> +        * values in MRIF because imsic_vsfile_read() is always called
> +        * with pointer to temporary MRIF on stack.
> +        */
> +
> +       if (idata->clear) {
> +               mrif->eidelivery = imsic_vs_csr_swap(IMSIC_EIDELIVERY, 0);
> +               mrif->eithreshold = imsic_vs_csr_swap(IMSIC_EITHRESHOLD, 0);
> +               for (i = 0; i < idata->nr_eix; i++) {
> +                       eix = &mrif->eix[i];
> +                       eix->eip[0] = imsic_eix_swap(IMSIC_EIP0 + i * 2, 0);
> +                       eix->eie[0] = imsic_eix_swap(IMSIC_EIE0 + i * 2, 0);
> +#ifdef CONFIG_32BIT
> +                       eix->eip[1] = imsic_eix_swap(IMSIC_EIP0 + i * 2 + 1, 0);
> +                       eix->eie[1] = imsic_eix_swap(IMSIC_EIE0 + i * 2 + 1, 0);
> +#endif
> +               }
> +       } else {
> +               mrif->eidelivery = imsic_vs_csr_read(IMSIC_EIDELIVERY);
> +               mrif->eithreshold = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
> +               for (i = 0; i < idata->nr_eix; i++) {
> +                       eix = &mrif->eix[i];
> +                       eix->eip[0] = imsic_eix_read(IMSIC_EIP0 + i * 2);
> +                       eix->eie[0] = imsic_eix_read(IMSIC_EIE0 + i * 2);
> +#ifdef CONFIG_32BIT
> +                       eix->eip[1] = imsic_eix_read(IMSIC_EIP0 + i * 2 + 1);
> +                       eix->eie[1] = imsic_eix_read(IMSIC_EIE0 + i * 2 + 1);
> +#endif
> +               }
> +       }
> +
> +       csr_write(CSR_HSTATUS, old_hstatus);
> +       csr_write(CSR_VSISELECT, old_vsiselect);
> +}
> +
> +static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
> +                             bool clear, struct imsic_mrif *mrif)
> +{
> +       struct imsic_vsfile_read_data idata;
> +
> +       /* We can only read clear if we have a IMSIC VS-file */
> +       if (vsfile_cpu < 0 || vsfile_hgei <= 0)
> +               return;
> +
> +       /* We can only read clear on local CPU */
> +       idata.hgei = vsfile_hgei;
> +       idata.nr_eix = nr_eix;
> +       idata.clear = clear;
> +       idata.mrif = mrif;
> +       on_each_cpu_mask(cpumask_of(vsfile_cpu),
> +                        imsic_vsfile_local_read, &idata, 1);
> +}
> +
> +static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
> +{
> +       u32 i;
> +       unsigned long new_hstatus, old_hstatus, old_vsiselect;
> +
> +       /* We can only zero-out if we have a IMSIC VS-file */
> +       if (vsfile_hgei <= 0)
> +               return;
> +
> +       old_vsiselect = csr_read(CSR_VSISELECT);
> +       old_hstatus = csr_read(CSR_HSTATUS);
> +       new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
> +       new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
> +       csr_write(CSR_HSTATUS, new_hstatus);
> +
> +       imsic_vs_csr_write(IMSIC_EIDELIVERY, 0);
> +       imsic_vs_csr_write(IMSIC_EITHRESHOLD, 0);
> +       for (i = 0; i < nr_eix; i++) {
> +               imsic_eix_write(IMSIC_EIP0 + i * 2, 0);
> +               imsic_eix_write(IMSIC_EIE0 + i * 2, 0);
> +#ifdef CONFIG_32BIT
> +               imsic_eix_write(IMSIC_EIP0 + i * 2 + 1, 0);
> +               imsic_eix_write(IMSIC_EIE0 + i * 2 + 1, 0);
> +#endif
> +       }
> +
> +       csr_write(CSR_HSTATUS, old_hstatus);
> +       csr_write(CSR_VSISELECT, old_vsiselect);
> +}
> +
> +static void imsic_vsfile_local_update(int vsfile_hgei, u32 nr_eix,
> +                                     struct imsic_mrif *mrif)
> +{
> +       u32 i;
> +       struct imsic_mrif_eix *eix;
> +       unsigned long new_hstatus, old_hstatus, old_vsiselect;
> +
> +       /* We can only update if we have a HW IMSIC context */
> +       if (vsfile_hgei <= 0)
> +               return;
> +
> +       /*
> +        * We don't use imsic_mrif_atomic_xyz() functions to read values
> +        * from MRIF in this function because it is always called with
> +        * pointer to temporary MRIF on stack.
> +        */
> +
> +       old_vsiselect = csr_read(CSR_VSISELECT);
> +       old_hstatus = csr_read(CSR_HSTATUS);
> +       new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
> +       new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
> +       csr_write(CSR_HSTATUS, new_hstatus);
> +
> +       for (i = 0; i < nr_eix; i++) {
> +               eix = &mrif->eix[i];
> +               imsic_eix_set(IMSIC_EIP0 + i * 2, eix->eip[0]);
> +               imsic_eix_set(IMSIC_EIE0 + i * 2, eix->eie[0]);
> +#ifdef CONFIG_32BIT
> +               imsic_eix_set(IMSIC_EIP0 + i * 2 + 1, eix->eip[1]);
> +               imsic_eix_set(IMSIC_EIE0 + i * 2 + 1, eix->eie[1]);
> +#endif
> +       }
> +       imsic_vs_csr_write(IMSIC_EITHRESHOLD, mrif->eithreshold);
> +       imsic_vs_csr_write(IMSIC_EIDELIVERY, mrif->eidelivery);
> +
> +       csr_write(CSR_HSTATUS, old_hstatus);
> +       csr_write(CSR_VSISELECT, old_vsiselect);
> +}
> +
> +static void imsic_vsfile_cleanup(struct imsic *imsic)
> +{
> +       int old_vsfile_hgei, old_vsfile_cpu;
> +       unsigned long flags;
> +
> +       /*
> +        * We don't use imsic_mrif_atomic_xyz() functions to clear the
> +        * SW-file in this function because it is always called when the
> +        * VCPU is being destroyed.
> +        */
> +
> +       write_lock_irqsave(&imsic->vsfile_lock, flags);
> +       old_vsfile_hgei = imsic->vsfile_hgei;
> +       old_vsfile_cpu = imsic->vsfile_cpu;
> +       imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
> +       imsic->vsfile_va = NULL;
> +       imsic->vsfile_pa = 0;
> +       write_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       memset(imsic->swfile, 0, sizeof(*imsic->swfile));
> +
> +       if (old_vsfile_cpu >= 0)
> +               kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
> +}
> +
> +static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
> +{
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +       struct imsic_mrif *mrif = imsic->swfile;
> +
> +       if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
> +           imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
> +               kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
> +       else
> +               kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
> +}
> +
> +static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
> +                             struct imsic_mrif *mrif)
> +{
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       /*
> +        * We don't use imsic_mrif_atomic_xyz() functions to read and
> +        * write SW-file and MRIF in this function because it is always
> +        * called when VCPU is not using SW-file and the MRIF points to
> +        * a temporary MRIF on stack.
> +        */
> +
> +       memcpy(mrif, imsic->swfile, sizeof(*mrif));
> +       if (clear) {
> +               memset(imsic->swfile, 0, sizeof(*imsic->swfile));
> +               kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
> +       }
> +}
> +
> +static void imsic_swfile_update(struct kvm_vcpu *vcpu,
> +                               struct imsic_mrif *mrif)
> +{
> +       u32 i;
> +       struct imsic_mrif_eix *seix, *eix;
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +       struct imsic_mrif *smrif = imsic->swfile;
> +
> +       imsic_mrif_atomic_write(smrif, &smrif->eidelivery, mrif->eidelivery);
> +       imsic_mrif_atomic_write(smrif, &smrif->eithreshold, mrif->eithreshold);
> +       for (i = 0; i < imsic->nr_eix; i++) {
> +               seix = &smrif->eix[i];
> +               eix = &mrif->eix[i];
> +               imsic_mrif_atomic_or(smrif, &seix->eip[0], eix->eip[0]);
> +               imsic_mrif_atomic_or(smrif, &seix->eie[0], eix->eie[0]);
> +#ifdef CONFIG_32BIT
> +               imsic_mrif_atomic_or(smrif, &seix->eip[1], eix->eip[1]);
> +               imsic_mrif_atomic_or(smrif, &seix->eie[1], eix->eie[1]);
> +#endif
> +       }
> +
> +       imsic_swfile_extirq_update(vcpu);
> +}
> +
> +void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
> +{
> +       unsigned long flags;
> +       struct imsic_mrif tmrif;
> +       int old_vsfile_hgei, old_vsfile_cpu;
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       /* Read and clear IMSIC VS-file details */
> +       write_lock_irqsave(&imsic->vsfile_lock, flags);
> +       old_vsfile_hgei = imsic->vsfile_hgei;
> +       old_vsfile_cpu = imsic->vsfile_cpu;
> +       imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
> +       imsic->vsfile_va = NULL;
> +       imsic->vsfile_pa = 0;
> +       write_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       /* Do nothing, if no IMSIC VS-file to release */
> +       if (old_vsfile_cpu < 0)
> +               return;
> +
> +       /*
> +        * At this point, all interrupt producers are still using
> +        * the old IMSIC VS-file so we first re-direct all interrupt
> +        * producers.
> +        */
> +
> +       /* Purge the G-stage mapping */
> +       kvm_riscv_gstage_iounmap(vcpu->kvm,
> +                                vcpu->arch.aia_context.imsic_addr,
> +                                IMSIC_MMIO_PAGE_SZ);
> +
> +       /* TODO: Purge the IOMMU mapping ??? */
> +
> +       /*
> +        * At this point, all interrupt producers have been re-directed
> +        * to somewhere else so we move register state from the old IMSIC
> +        * VS-file to the IMSIC SW-file.
> +        */
> +
> +       /* Read and clear register state from old IMSIC VS-file */
> +       memset(&tmrif, 0, sizeof(tmrif));
> +       imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu, imsic->nr_hw_eix,
> +                         true, &tmrif);
> +
> +       /* Update register state in IMSIC SW-file */
> +       imsic_swfile_update(vcpu, &tmrif);
> +
> +       /* Free-up old IMSIC VS-file */
> +       kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
> +}
> +
> +int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
> +{
> +       unsigned long flags;
> +       phys_addr_t new_vsfile_pa;
> +       struct imsic_mrif tmrif;
> +       void __iomem *new_vsfile_va;
> +       struct kvm *kvm = vcpu->kvm;
> +       struct kvm_run *run = vcpu->run;
> +       struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
> +       struct imsic *imsic = vaia->imsic_state;
> +       int ret = 0, new_vsfile_hgei = -1, old_vsfile_hgei, old_vsfile_cpu;
> +
> +       /* Do nothing for emulation mode */
> +       if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
> +               return 1;
> +
> +       /* Read old IMSIC VS-file details */
> +       read_lock_irqsave(&imsic->vsfile_lock, flags);
> +       old_vsfile_hgei = imsic->vsfile_hgei;
> +       old_vsfile_cpu = imsic->vsfile_cpu;
> +       read_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       /* Do nothing if we are continuing on same CPU */
> +       if (old_vsfile_cpu == vcpu->cpu)
> +               return 1;
> +
> +       /* Allocate new IMSIC VS-file */
> +       ret = kvm_riscv_aia_alloc_hgei(vcpu->cpu, vcpu,
> +                                      &new_vsfile_va, &new_vsfile_pa);
> +       if (ret <= 0) {
> +               /* For HW acceleration mode, we can't continue */
> +               if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) {
> +                       run->fail_entry.hardware_entry_failure_reason =
> +                                                               CSR_HSTATUS;
> +                       run->fail_entry.cpu = vcpu->cpu;
> +                       run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> +                       return 0;
> +               }
> +
> +               /* Release old IMSIC VS-file */
> +               if (old_vsfile_cpu >= 0)
> +                       kvm_riscv_vcpu_aia_imsic_release(vcpu);
> +
> +               /* For automatic mode, we continue */
> +               goto done;
> +       }
> +       new_vsfile_hgei = ret;
> +
> +       /*
> +        * At this point, all interrupt producers are still using
> +        * to the old IMSIC VS-file so we first move all interrupt
> +        * producers to the new IMSIC VS-file.
> +        */
> +
> +       /* Zero-out new IMSIC VS-file */
> +       imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
> +
> +       /* Update G-stage mapping for the new IMSIC VS-file */
> +       ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
> +                                      new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
> +                                      true, true);
> +       if (ret)
> +               goto fail_free_vsfile_hgei;
> +
> +       /* TODO: Update the IOMMU mapping ??? */
> +
> +       /* Update new IMSIC VS-file details in IMSIC context */
> +       write_lock_irqsave(&imsic->vsfile_lock, flags);
> +       imsic->vsfile_hgei = new_vsfile_hgei;
> +       imsic->vsfile_cpu = vcpu->cpu;
> +       imsic->vsfile_va = new_vsfile_va;
> +       imsic->vsfile_pa = new_vsfile_pa;
> +       write_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       /*
> +        * At this point, all interrupt producers have been moved
> +        * to the new IMSIC VS-file so we move register state from
> +        * the old IMSIC VS/SW-file to the new IMSIC VS-file.
> +        */
> +
> +       memset(&tmrif, 0, sizeof(tmrif));
> +       if (old_vsfile_cpu >= 0) {
> +               /* Read and clear register state from old IMSIC VS-file */
> +               imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu,
> +                                 imsic->nr_hw_eix, true, &tmrif);
> +
> +               /* Free-up old IMSIC VS-file */
> +               kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
> +       } else {
> +               /* Read and clear register state from IMSIC SW-file */
> +               imsic_swfile_read(vcpu, true, &tmrif);
> +       }
> +
> +       /* Restore register state in the new IMSIC VS-file */
> +       imsic_vsfile_local_update(new_vsfile_hgei, imsic->nr_hw_eix, &tmrif);
> +
> +done:
> +       /* Set VCPU HSTATUS.VGEIN to new IMSIC VS-file */
> +       vcpu->arch.guest_context.hstatus &= ~HSTATUS_VGEIN;
> +       if (new_vsfile_hgei > 0)
> +               vcpu->arch.guest_context.hstatus |=
> +                       ((unsigned long)new_vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
> +
> +       /* Continue run-loop */
> +       return 1;
> +
> +fail_free_vsfile_hgei:
> +       kvm_riscv_aia_free_hgei(vcpu->cpu, new_vsfile_hgei);
> +       return ret;
> +}
> +
> +int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
> +                                unsigned long *val, unsigned long new_val,
> +                                unsigned long wr_mask)
> +{
> +       u32 topei;
> +       struct imsic_mrif_eix *eix;
> +       int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC;
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) {
> +               /* Read pending and enabled interrupt with highest priority */
> +               topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix,
> +                                        imsic->nr_msis);
> +               if (val)
> +                       *val = topei;
> +
> +               /* Writes ignore value and clear top pending interrupt */
> +               if (topei && wr_mask) {
> +                       topei >>= TOPEI_ID_SHIFT;
> +                       if (topei) {
> +                               eix = &imsic->swfile->eix[topei /
> +                                                         BITS_PER_TYPE(u64)];
> +                               clear_bit(topei & (BITS_PER_TYPE(u64) - 1),
> +                                         eix->eip);
> +                       }
> +               }
> +       } else {
> +               r = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix, isel,
> +                                  val, new_val, wr_mask);
> +               /* Forward unknown IMSIC register to user-space */
> +               if (r)
> +                       rc = (r == -ENOENT) ? 0 : KVM_INSN_ILLEGAL_TRAP;
> +       }
> +
> +       if (wr_mask)
> +               imsic_swfile_extirq_update(vcpu);
> +
> +       return rc;
> +}
> +
> +void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
> +{
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       if (!imsic)
> +               return;
> +
> +       kvm_riscv_vcpu_aia_imsic_release(vcpu);
> +
> +       memset(imsic->swfile, 0, sizeof(*imsic->swfile));
> +}
> +
> +int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
> +                                   u32 guest_index, u32 offset, u32 iid)
> +{
> +       unsigned long flags;
> +       struct imsic_mrif_eix *eix;
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       /* We only emulate one IMSIC MMIO page for each Guest VCPU */
> +       if (!imsic || !iid || guest_index ||
> +           (offset != IMSIC_MMIO_SETIPNUM_LE &&
> +            offset != IMSIC_MMIO_SETIPNUM_BE))
> +               return -ENODEV;
> +
> +       iid = (offset == IMSIC_MMIO_SETIPNUM_BE) ? __swab32(iid) : iid;
> +       if (imsic->nr_msis <= iid)
> +               return -EINVAL;
> +
> +       read_lock_irqsave(&imsic->vsfile_lock, flags);
> +
> +       if (imsic->vsfile_cpu >= 0) {
> +               writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
> +               kvm_vcpu_kick(vcpu);
> +       } else {
> +               eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
> +               set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);
> +               imsic_swfile_extirq_update(vcpu);
> +       }
> +
> +       read_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       return 0;
> +}
> +
> +static int imsic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +                          gpa_t addr, int len, void *val)
> +{
> +       if (len != 4 || (addr & 0x3) != 0)
> +               return -EOPNOTSUPP;
> +
> +       *((u32 *)val) = 0;
> +
> +       return 0;
> +}
> +
> +static int imsic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +                           gpa_t addr, int len, const void *val)
> +{
> +       struct kvm_msi msi = { 0 };
> +
> +       if (len != 4 || (addr & 0x3) != 0)
> +               return -EOPNOTSUPP;
> +
> +       msi.address_hi = addr >> 32;
> +       msi.address_lo = (u32)addr;
> +       msi.data = *((const u32 *)val);
> +       kvm_riscv_aia_inject_msi(vcpu->kvm, &msi);
> +
> +       return 0;
> +};
> +
> +static struct kvm_io_device_ops imsic_iodoev_ops = {
> +       .read = imsic_mmio_read,
> +       .write = imsic_mmio_write,
> +};
> +
> +int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
> +{
> +       int ret = 0;
> +       struct imsic *imsic;
> +       struct page *swfile_page;
> +       struct kvm *kvm = vcpu->kvm;
> +
> +       /* Fail if we have zero IDs */
> +       if (!kvm->arch.aia.nr_ids)
> +               return -EINVAL;
> +
> +       /* Allocate IMSIC context */
> +       imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
> +       if (!imsic)
> +               return -ENOMEM;
> +       vcpu->arch.aia_context.imsic_state = imsic;
> +
> +       /* Setup IMSIC context  */
> +       imsic->nr_msis = kvm->arch.aia.nr_ids + 1;
> +       rwlock_init(&imsic->vsfile_lock);
> +       imsic->nr_eix = BITS_TO_U64(imsic->nr_msis);
> +       imsic->nr_hw_eix = BITS_TO_U64(kvm_riscv_aia_max_ids);
> +       imsic->vsfile_hgei = imsic->vsfile_cpu = -1;
> +
> +       /* Setup IMSIC SW-file */
> +       swfile_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> +                                 get_order(sizeof(*imsic->swfile)));
> +       if (!swfile_page) {
> +               ret = -ENOMEM;
> +               goto fail_free_imsic;
> +       }
> +       imsic->swfile = page_to_virt(swfile_page);
> +       imsic->swfile_pa = page_to_phys(swfile_page);
> +
> +       /* Setup IO device */
> +       kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
> +       mutex_lock(&kvm->slots_lock);
> +       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> +                                     vcpu->arch.aia_context.imsic_addr,
> +                                     KVM_DEV_RISCV_IMSIC_SIZE,
> +                                     &imsic->iodev);
> +       mutex_unlock(&kvm->slots_lock);
> +       if (ret)
> +               goto fail_free_swfile;
> +
> +       return 0;
> +
> +fail_free_swfile:
> +       free_pages((unsigned long)imsic->swfile,
> +                  get_order(sizeof(*imsic->swfile)));
> +fail_free_imsic:
> +       vcpu->arch.aia_context.imsic_state = NULL;
> +       kfree(imsic);
> +       return ret;
> +}
> +
> +void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm *kvm = vcpu->kvm;
> +       struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       if (!imsic)
> +               return;
> +
> +       imsic_vsfile_cleanup(imsic);
> +
> +       mutex_lock(&kvm->slots_lock);
> +       kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &imsic->iodev);
> +       mutex_unlock(&kvm->slots_lock);
> +
> +       free_pages((unsigned long)imsic->swfile,
> +                  get_order(sizeof(*imsic->swfile)));
> +
> +       vcpu->arch.aia_context.imsic_state = NULL;
> +       kfree(imsic);
> +}
> --
> 2.34.1
>


Reviewed-by: Atish Patra <atishp@rivosinc.com>
-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
  2023-05-17 10:51 ` [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip Anup Patel
@ 2023-06-07 23:17   ` Atish Patra
  0 siblings, 0 replies; 20+ messages in thread
From: Atish Patra @ 2023-06-07 23:17 UTC (permalink / raw)
  To: Anup Patel
  Cc: Paolo Bonzini, Palmer Dabbelt, Paul Walmsley, Andrew Jones, kvm,
	kvm-riscv, linux-riscv, linux-kernel

On Wed, May 17, 2023 at 3:52 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> We expose IMSIC registers as KVM device attributes of the in-kernel
> AIA irqchip device. This will allow KVM user-space to save/restore
> IMISC state of each VCPU using KVM device ioctls().
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  arch/riscv/include/asm/kvm_aia.h  |   3 +
>  arch/riscv/include/uapi/asm/kvm.h |  12 +++
>  arch/riscv/kvm/aia_device.c       |  29 ++++-
>  arch/riscv/kvm/aia_imsic.c        | 170 ++++++++++++++++++++++++++++++
>  4 files changed, 212 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
> index a4f6ebf90e31..1f37b600ca47 100644
> --- a/arch/riscv/include/asm/kvm_aia.h
> +++ b/arch/riscv/include/asm/kvm_aia.h
> @@ -97,6 +97,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
>  int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
>                                  unsigned long *val, unsigned long new_val,
>                                  unsigned long wr_mask);
> +int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
> +                               bool write, unsigned long *val);
> +int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type);
>  void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
>  int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
>                                     u32 guest_index, u32 offset, u32 iid);
> diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
> index e80210c2220b..624784bb21dd 100644
> --- a/arch/riscv/include/uapi/asm/kvm.h
> +++ b/arch/riscv/include/uapi/asm/kvm.h
> @@ -242,6 +242,18 @@ enum KVM_RISCV_SBI_EXT_ID {
>
>  #define KVM_DEV_RISCV_AIA_GRP_APLIC            3
>
> +#define KVM_DEV_RISCV_AIA_GRP_IMSIC            4
> +#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS      12
> +#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK      \
> +               ((1U << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) - 1)
> +#define KVM_DEV_RISCV_AIA_IMSIC_MKATTR(__vcpu, __isel) \
> +               (((__vcpu) << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) | \
> +                ((__isel) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK))
> +#define KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(__attr)       \
> +               ((__attr) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK)
> +#define KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(__attr)       \
> +               ((__attr) >> KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS)
> +
>  /* One single KVM irqchip, ie. the AIA */
>  #define KVM_NR_IRQCHIPS                        1
>
> diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
> index 17dba92a90e1..ac7bd98301a3 100644
> --- a/arch/riscv/kvm/aia_device.c
> +++ b/arch/riscv/kvm/aia_device.c
> @@ -326,7 +326,7 @@ static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
>         u32 nr;
>         u64 addr;
>         int nr_vcpus, r = -ENXIO;
> -       unsigned long type = (unsigned long)attr->attr;
> +       unsigned long v, type = (unsigned long)attr->attr;
>         void __user *uaddr = (void __user *)(long)attr->addr;
>
>         switch (attr->group) {
> @@ -373,6 +373,15 @@ static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
>                 r = kvm_riscv_aia_aplic_set_attr(dev->kvm, type, nr);
>                 mutex_unlock(&dev->kvm->lock);
>
> +               break;
> +       case KVM_DEV_RISCV_AIA_GRP_IMSIC:
> +               if (copy_from_user(&v, uaddr, sizeof(v)))
> +                       return -EFAULT;
> +
> +               mutex_lock(&dev->kvm->lock);
> +               r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, true, &v);
> +               mutex_unlock(&dev->kvm->lock);
> +
>                 break;
>         }
>
> @@ -385,7 +394,7 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
>         u64 addr;
>         int nr_vcpus, r = -ENXIO;
>         void __user *uaddr = (void __user *)(long)attr->addr;
> -       unsigned long type = (unsigned long)attr->attr;
> +       unsigned long v, type = (unsigned long)attr->attr;
>
>         switch (attr->group) {
>         case KVM_DEV_RISCV_AIA_GRP_CONFIG:
> @@ -434,6 +443,20 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
>                 if (copy_to_user(uaddr, &nr, sizeof(nr)))
>                         return -EFAULT;
>
> +               break;
> +       case KVM_DEV_RISCV_AIA_GRP_IMSIC:
> +               if (copy_from_user(&v, uaddr, sizeof(v)))
> +                       return -EFAULT;
> +
> +               mutex_lock(&dev->kvm->lock);
> +               r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, false, &v);
> +               mutex_unlock(&dev->kvm->lock);
> +               if (r)
> +                       return r;
> +
> +               if (copy_to_user(uaddr, &v, sizeof(v)))
> +                       return -EFAULT;
> +
>                 break;
>         }
>
> @@ -472,6 +495,8 @@ static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
>                 break;
>         case KVM_DEV_RISCV_AIA_GRP_APLIC:
>                 return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr);
> +       case KVM_DEV_RISCV_AIA_GRP_IMSIC:
> +               return kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr);
>         }
>
>         return -ENXIO;
> diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
> index 2dc09dcb8ab5..8f108cfa80e5 100644
> --- a/arch/riscv/kvm/aia_imsic.c
> +++ b/arch/riscv/kvm/aia_imsic.c
> @@ -277,6 +277,33 @@ static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
>         return 0;
>  }
>
> +static int imsic_mrif_isel_check(u32 nr_eix, unsigned long isel)
> +{
> +       u32 num = 0;
> +
> +       switch (isel) {
> +       case IMSIC_EIDELIVERY:
> +       case IMSIC_EITHRESHOLD:
> +               break;
> +       case IMSIC_EIP0 ... IMSIC_EIP63:
> +               num = isel - IMSIC_EIP0;
> +               break;
> +       case IMSIC_EIE0 ... IMSIC_EIE63:
> +               num = isel - IMSIC_EIE0;
> +               break;
> +       default:
> +               return -ENOENT;
> +       };
> +#ifndef CONFIG_32BIT
> +       if (num & 0x1)
> +               return -EINVAL;
> +#endif
> +       if ((num / 2) >= nr_eix)
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
>  static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
>                           unsigned long isel, unsigned long *val,
>                           unsigned long new_val, unsigned long wr_mask)
> @@ -407,6 +434,86 @@ static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
>                          imsic_vsfile_local_read, &idata, 1);
>  }
>
> +struct imsic_vsfile_rw_data {
> +       int hgei;
> +       int isel;
> +       bool write;
> +       unsigned long val;
> +};
> +
> +static void imsic_vsfile_local_rw(void *data)
> +{
> +       struct imsic_vsfile_rw_data *idata = data;
> +       unsigned long new_hstatus, old_hstatus, old_vsiselect;
> +
> +       old_vsiselect = csr_read(CSR_VSISELECT);
> +       old_hstatus = csr_read(CSR_HSTATUS);
> +       new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
> +       new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
> +       csr_write(CSR_HSTATUS, new_hstatus);
> +
> +       switch (idata->isel) {
> +       case IMSIC_EIDELIVERY:
> +               if (idata->write)
> +                       imsic_vs_csr_write(IMSIC_EIDELIVERY, idata->val);
> +               else
> +                       idata->val = imsic_vs_csr_read(IMSIC_EIDELIVERY);
> +               break;
> +       case IMSIC_EITHRESHOLD:
> +               if (idata->write)
> +                       imsic_vs_csr_write(IMSIC_EITHRESHOLD, idata->val);
> +               else
> +                       idata->val = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
> +               break;
> +       case IMSIC_EIP0 ... IMSIC_EIP63:
> +       case IMSIC_EIE0 ... IMSIC_EIE63:
> +#ifndef CONFIG_32BIT
> +               if (idata->isel & 0x1)
> +                       break;
> +#endif
> +               if (idata->write)
> +                       imsic_eix_write(idata->isel, idata->val);
> +               else
> +                       idata->val = imsic_eix_read(idata->isel);
> +               break;
> +       default:
> +               break;
> +       }
> +
> +       csr_write(CSR_HSTATUS, old_hstatus);
> +       csr_write(CSR_VSISELECT, old_vsiselect);
> +}
> +
> +static int imsic_vsfile_rw(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
> +                          unsigned long isel, bool write,
> +                          unsigned long *val)
> +{
> +       int rc;
> +       struct imsic_vsfile_rw_data rdata;
> +
> +       /* We can only access register if we have a IMSIC VS-file */
> +       if (vsfile_cpu < 0 || vsfile_hgei <= 0)
> +               return -EINVAL;
> +
> +       /* Check IMSIC register iselect */
> +       rc = imsic_mrif_isel_check(nr_eix, isel);
> +       if (rc)
> +               return rc;
> +
> +       /* We can only access register on local CPU */
> +       rdata.hgei = vsfile_hgei;
> +       rdata.isel = isel;
> +       rdata.write = write;
> +       rdata.val = (write) ? *val : 0;
> +       on_each_cpu_mask(cpumask_of(vsfile_cpu),
> +                        imsic_vsfile_local_rw, &rdata, 1);
> +
> +       if (!write)
> +               *val = rdata.val;
> +
> +       return 0;
> +}
> +
>  static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
>  {
>         u32 i;
> @@ -758,6 +865,69 @@ int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
>         return rc;
>  }
>
> +int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
> +                               bool write, unsigned long *val)
> +{
> +       u32 isel, vcpu_id;
> +       unsigned long flags;
> +       struct imsic *imsic;
> +       struct kvm_vcpu *vcpu;
> +       int rc, vsfile_hgei, vsfile_cpu;
> +
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return -ENODEV;
> +
> +       vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
> +       vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
> +       if (!vcpu)
> +               return -ENODEV;
> +
> +       isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
> +       imsic = vcpu->arch.aia_context.imsic_state;
> +
> +       read_lock_irqsave(&imsic->vsfile_lock, flags);
> +
> +       rc = 0;
> +       vsfile_hgei = imsic->vsfile_hgei;
> +       vsfile_cpu = imsic->vsfile_cpu;
> +       if (vsfile_cpu < 0) {
> +               if (write) {
> +                       rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
> +                                           isel, NULL, *val, -1UL);
> +                       imsic_swfile_extirq_update(vcpu);
> +               } else
> +                       rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
> +                                           isel, val, 0, 0);
> +       }
> +
> +       read_unlock_irqrestore(&imsic->vsfile_lock, flags);
> +
> +       if (!rc && vsfile_cpu >= 0)
> +               rc = imsic_vsfile_rw(vsfile_hgei, vsfile_cpu, imsic->nr_eix,
> +                                    isel, write, val);
> +
> +       return rc;
> +}
> +
> +int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
> +{
> +       u32 isel, vcpu_id;
> +       struct imsic *imsic;
> +       struct kvm_vcpu *vcpu;
> +
> +       if (!kvm_riscv_aia_initialized(kvm))
> +               return -ENODEV;
> +
> +       vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
> +       vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
> +       if (!vcpu)
> +               return -ENODEV;
> +
> +       isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
> +       imsic = vcpu->arch.aia_context.imsic_state;
> +       return imsic_mrif_isel_check(imsic->nr_eix, isel);
> +}
> +
>  void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
>  {
>         struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
> --
> 2.34.1
>


Reviewed-by: Atish Patra <atishp@rivosinc.com>
-- 
Regards,
Atish

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2023-06-07 23:17 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-17 10:51 [PATCH 00/10] RISC-V KVM in-kernel AIA irqchip Anup Patel
2023-05-17 10:51 ` [PATCH 01/10] RISC-V: KVM: Implement guest external interrupt line management Anup Patel
2023-06-06 22:49   ` Atish Patra
2023-05-17 10:51 ` [PATCH 02/10] RISC-V: KVM: Add IMSIC related defines Anup Patel
2023-06-06 22:51   ` Atish Patra
2023-05-17 10:51 ` [PATCH 03/10] RISC-V: KVM: Add APLIC " Anup Patel
2023-06-06 22:51   ` Atish Patra
2023-05-17 10:51 ` [PATCH 04/10] RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero Anup Patel
2023-06-06 22:53   ` Atish Patra
2023-05-17 10:51 ` [PATCH 05/10] RISC-V: KVM: Skeletal in-kernel AIA irqchip support Anup Patel
2023-06-06 23:19   ` Atish Patra
2023-05-17 10:51 ` [PATCH 06/10] RISC-V: KVM: Implement device interface for AIA irqchip Anup Patel
2023-06-07  0:13   ` Atish Patra
2023-06-07 14:23     ` Anup Patel
2023-05-17 10:51 ` [PATCH 07/10] RISC-V: KVM: Add in-kernel emulation of AIA APLIC Anup Patel
2023-05-17 10:51 ` [PATCH 08/10] RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip Anup Patel
2023-05-17 10:51 ` [PATCH 09/10] RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC Anup Patel
2023-06-07 23:16   ` Atish Patra
2023-05-17 10:51 ` [PATCH 10/10] RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip Anup Patel
2023-06-07 23:17   ` Atish Patra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).