On Mon, Jan 07, 2019 at 07:39:35PM +0100, Cédric Le Goater wrote: > This introduces a set of helpers when KVM is in use, which create the > KVM XIVE device, initialize the interrupt sources at a KVM level and > connect the interrupt presenters to the vCPU. > > They also handle the initialization of the TIMA and the source ESB > memory regions of the controller. These have a different type under > KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed > to the guest and the associated VMAs on the host are populated > dynamically with the appropriate pages using a fault handler. > > Signed-off-by: Cédric Le Goater Looks fine apart from the details of how the KVM interface works. > --- > default-configs/ppc64-softmmu.mak | 1 + > include/hw/ppc/spapr_xive.h | 10 ++ > include/hw/ppc/xive.h | 22 +++ > target/ppc/kvm_ppc.h | 6 + > hw/intc/spapr_xive.c | 31 ++-- > hw/intc/spapr_xive_kvm.c | 254 ++++++++++++++++++++++++++++++ > hw/intc/xive.c | 22 ++- > hw/ppc/spapr_irq.c | 11 +- > target/ppc/kvm.c | 7 + > hw/intc/Makefile.objs | 1 + > 10 files changed, 349 insertions(+), 16 deletions(-) > create mode 100644 hw/intc/spapr_xive_kvm.c > > diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak > index 7f34ad0528ed..c1bf5cd951f5 100644 > --- a/default-configs/ppc64-softmmu.mak > +++ b/default-configs/ppc64-softmmu.mak > @@ -18,6 +18,7 @@ CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) > CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) > CONFIG_XIVE=$(CONFIG_PSERIES) > CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES) > +CONFIG_XIVE_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) > CONFIG_MEM_DEVICE=y > CONFIG_DIMM=y > CONFIG_SPAPR_RNG=y > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > index 7fdc25057420..24a0be478039 100644 > --- a/include/hw/ppc/spapr_xive.h > +++ b/include/hw/ppc/spapr_xive.h > @@ -35,6 +35,10 @@ typedef struct sPAPRXive { > /* TIMA mapping address */ > hwaddr tm_base; > MemoryRegion tm_mmio; > + > + /* KVM support */ > + int fd; > + void *tm_mmap; > } sPAPRXive; > > bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi); > @@ -48,5 +52,11 @@ void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, > uint32_t phandle); > void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); > void spapr_xive_mmio_set_enabled(sPAPRXive *xive, bool enable); > +void spapr_xive_map_mmio(sPAPRXive *xive); > + > +/* > + * KVM XIVE device helpers > + */ > +void kvmppc_xive_connect(sPAPRXive *xive, Error **errp); > > #endif /* PPC_SPAPR_XIVE_H */ > diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > index ec23253ba448..4bbba8d39a65 100644 > --- a/include/hw/ppc/xive.h > +++ b/include/hw/ppc/xive.h > @@ -140,6 +140,7 @@ > #ifndef PPC_XIVE_H > #define PPC_XIVE_H > > +#include "sysemu/kvm.h" > #include "hw/qdev-core.h" > #include "hw/sysbus.h" > #include "hw/ppc/xive_regs.h" > @@ -194,6 +195,9 @@ typedef struct XiveSource { > uint32_t esb_shift; > MemoryRegion esb_mmio; > > + /* KVM support */ > + void *esb_mmap; > + > XiveNotifier *xive; > } XiveSource; > > @@ -421,4 +425,22 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx) > return (nvt_blk << 19) | nvt_idx; > } > > +/* > + * KVM XIVE device helpers > + */ > + > +/* Keep inlined to discard compile of KVM code sections */ > +static inline bool kvmppc_xive_enabled(void) > +{ > + if (kvm_enabled()) { > + return machine_kernel_irqchip_allowed(MACHINE(qdev_get_machine())); > + } else { > + return false; > + } > +} > + > +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp); > +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); > +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); > + > #endif /* PPC_XIVE_H */ > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h > index bdfaa4e70a83..d2159660f9f2 100644 > --- a/target/ppc/kvm_ppc.h > +++ b/target/ppc/kvm_ppc.h > @@ -59,6 +59,7 @@ bool kvmppc_has_cap_fixup_hcalls(void); > bool kvmppc_has_cap_htm(void); > bool kvmppc_has_cap_mmu_radix(void); > bool kvmppc_has_cap_mmu_hash_v3(void); > +bool kvmppc_has_cap_xive(void); > int kvmppc_get_cap_safe_cache(void); > int kvmppc_get_cap_safe_bounds_check(void); > int kvmppc_get_cap_safe_indirect_branch(void); > @@ -307,6 +308,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void) > return false; > } > > +static inline bool kvmppc_has_cap_xive(void) > +{ > + return false; > +} > + > static inline int kvmppc_get_cap_safe_cache(void) > { > return 0; > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index d391177ab81f..cf6d3a5f12e1 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -172,7 +172,7 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon) > } > } > > -static void spapr_xive_map_mmio(sPAPRXive *xive) > +void spapr_xive_map_mmio(sPAPRXive *xive) > { > sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base); > sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base); > @@ -250,6 +250,9 @@ static void spapr_xive_instance_init(Object *obj) > TYPE_XIVE_END_SOURCE); > object_property_add_child(obj, "end_source", OBJECT(&xive->end_source), > NULL); > + > + /* Not connected to the KVM XIVE device */ > + xive->fd = -1; > } > > static void spapr_xive_realize(DeviceState *dev, Error **errp) > @@ -304,17 +307,25 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > xive->endt = g_new0(XiveEND, xive->nr_ends); > > - /* TIMA initialization */ > - memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive, > - "xive.tima", 4ull << TM_SHIFT); > + if (kvmppc_xive_enabled()) { > + kvmppc_xive_connect(xive, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } else { > + /* TIMA initialization */ > + memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive, > + "xive.tima", 4ull << TM_SHIFT); > > - /* Define all XIVE MMIO regions on SysBus */ > - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); > - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); > - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); > + /* Define all XIVE MMIO regions on SysBus */ > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); > > - /* Map all regions */ > - spapr_xive_map_mmio(xive); > + /* Map all regions */ > + spapr_xive_map_mmio(xive); > + } > > qemu_register_reset(spapr_xive_reset, dev); > } > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > new file mode 100644 > index 000000000000..f96c66fa419d > --- /dev/null > +++ b/hw/intc/spapr_xive_kvm.c > @@ -0,0 +1,254 @@ > +/* > + * QEMU PowerPC sPAPR XIVE interrupt controller model > + * > + * Copyright (c) 2017-2019, IBM Corporation. > + * > + * This code is licensed under the GPL version 2 or later. See the > + * COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/log.h" > +#include "qemu/error-report.h" > +#include "qapi/error.h" > +#include "target/ppc/cpu.h" > +#include "sysemu/cpus.h" > +#include "sysemu/kvm.h" > +#include "hw/ppc/spapr.h" > +#include "hw/ppc/spapr_xive.h" > +#include "hw/ppc/xive.h" > +#include "kvm_ppc.h" > + > +#include > + > +/* > + * Helpers for CPU hotplug > + * > + * TODO: make a common KVMEnabledCPU layer for XICS and XIVE > + */ > +typedef struct KVMEnabledCPU { > + unsigned long vcpu_id; > + QLIST_ENTRY(KVMEnabledCPU) node; > +} KVMEnabledCPU; > + > +static QLIST_HEAD(, KVMEnabledCPU) > + kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); > + > +static bool kvm_cpu_is_enabled(CPUState *cs) > +{ > + KVMEnabledCPU *enabled_cpu; > + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); > + > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { > + if (enabled_cpu->vcpu_id == vcpu_id) { > + return true; > + } > + } > + return false; > +} > + > +static void kvm_cpu_enable(CPUState *cs) > +{ > + KVMEnabledCPU *enabled_cpu; > + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); > + > + enabled_cpu = g_malloc(sizeof(*enabled_cpu)); > + enabled_cpu->vcpu_id = vcpu_id; > + QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); > +} > + > +/* > + * XIVE Thread Interrupt Management context (KVM) > + */ > + > +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > +{ > + sPAPRXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; > + unsigned long vcpu_id; > + int ret; > + > + /* Check if CPU was hot unplugged and replugged. */ > + if (kvm_cpu_is_enabled(tctx->cs)) { > + return; > + } > + > + vcpu_id = kvm_arch_vcpu_id(tctx->cs); > + > + ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, > + vcpu_id, 0); > + if (ret < 0) { > + error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s", > + vcpu_id, strerror(errno)); > + return; > + } > + > + kvm_cpu_enable(tctx->cs); > +} > + > +/* > + * XIVE Interrupt Source (KVM) > + */ > + > +/* > + * At reset, the interrupt sources are simply created and MASKED. We > + * only need to inform the KVM XIVE device about their type: LSI or > + * MSI. > + */ > +void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > +{ > + sPAPRXive *xive = SPAPR_XIVE(xsrc->xive); > + int i; > + > + for (i = 0; i < xsrc->nr_irqs; i++) { > + Error *local_err = NULL; > + uint64_t state = 0; > + > + if (xive_source_irq_is_lsi(xsrc, i)) { > + state |= KVM_XIVE_LEVEL_SENSITIVE; > + if (xsrc->status[i] & XIVE_STATUS_ASSERTED) { > + state |= KVM_XIVE_LEVEL_ASSERTED; > + } > + } > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCES, i, &state, > + true, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > +} > + > +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) > +{ > + XiveSource *xsrc = opaque; > + struct kvm_irq_level args; > + int rc; > + > + args.irq = srcno; > + if (!xive_source_irq_is_lsi(xsrc, srcno)) { > + if (!val) { > + return; > + } > + args.level = KVM_INTERRUPT_SET; > + } else { > + if (val) { > + xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; > + args.level = KVM_INTERRUPT_SET_LEVEL; > + } else { > + xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; > + args.level = KVM_INTERRUPT_UNSET; > + } > + } > + rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args); > + if (rc < 0) { > + error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno)); > + } > +} > + > +/* > + * sPAPR XIVE interrupt controller (KVM) > + */ > + > +static void *kvmppc_xive_mmap(sPAPRXive *xive, int ctrl, size_t len, > + Error **errp) > +{ > + Error *local_err = NULL; > + void *addr; > + int fd; > + > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, ctrl, &fd, false, > + &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return NULL; > + } > + > + addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); > + close(fd); > + if (addr == MAP_FAILED) { > + error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); > + return NULL; > + } > + > + return addr; > +} > + > +/* > + * All the XIVE memory regions are now backed by mappings from the KVM > + * XIVE device. > + */ > +void kvmppc_xive_connect(sPAPRXive *xive, Error **errp) > +{ > + XiveSource *xsrc = &xive->source; > + XiveENDSource *end_xsrc = &xive->end_source; > + Error *local_err = NULL; > + size_t esb_len; > + size_t tima_len; > + > + if (!kvm_enabled() || !kvmppc_has_cap_xive()) { > + error_setg(errp, "IRQ_XIVE capability must be present for KVM"); > + return; > + } > + > + /* First, create the KVM XIVE device */ > + xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); > + if (xive->fd < 0) { > + error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); > + return; > + } > + > + /* > + * Source ESBs KVM mapping > + * > + * Inform KVM where we will map the ESB pages. This is needed by > + * the H_INT_GET_SOURCE_INFO hcall which returns the source > + * characteristics, among which the ESB page address. > + */ > + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_VC_BASE, > + &xive->vc_base, true, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > + esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; > + xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_DEV_XIVE_GET_ESB_FD, > + esb_len, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > + memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc), > + "xive.esb", esb_len, xsrc->esb_mmap); > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); > + > + /* END ESBs mapping (No KVM) */ > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); > + > + /* > + * TIMA KVM mapping > + * > + * We could also inform KVM where the TIMA will be mapped but as > + * this is a fixed MMIO address for the system it does not seem > + * necessary to provide a KVM ioctl to change it. > + */ > + tima_len = 4ull << TM_SHIFT; > + xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_DEV_XIVE_GET_TIMA_FD, > + tima_len, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive), > + "xive.tima", tima_len, xive->tm_mmap); > + sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); > + > + kvm_kernel_irqchip = true; > + kvm_msi_via_irqfd_allowed = true; > + kvm_gsi_direct_mapping = true; > + > + /* Map all regions */ > + spapr_xive_map_mmio(xive); > +} > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > index a3cb0cf0e348..9a2d7be283f8 100644 > --- a/hw/intc/xive.c > +++ b/hw/intc/xive.c > @@ -15,6 +15,7 @@ > #include "sysemu/dma.h" > #include "hw/qdev-properties.h" > #include "monitor/monitor.h" > +#include "hw/boards.h" > #include "hw/ppc/xive.h" > #include "hw/ppc/xive_regs.h" > > @@ -493,6 +494,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) > return; > } > > + /* Connect the presenter to the VCPU (required for CPU hotplug) */ > + if (kvmppc_xive_enabled()) { > + kvmppc_xive_cpu_connect(tctx, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > + > qemu_register_reset(xive_tctx_reset, dev); > } > > @@ -895,6 +905,10 @@ static void xive_source_reset(void *dev) > > /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */ > memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs); > + > + if (kvmppc_xive_enabled()) { > + kvmppc_xive_source_reset(xsrc, &error_fatal); > + } > } > > static void xive_source_realize(DeviceState *dev, Error **errp) > @@ -928,9 +942,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp) > xsrc->status = g_malloc0(xsrc->nr_irqs); > xsrc->lsi_map = bitmap_new(xsrc->nr_irqs); > > - memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), > - &xive_source_esb_ops, xsrc, "xive.esb", > - (1ull << xsrc->esb_shift) * xsrc->nr_irqs); > + if (!kvmppc_xive_enabled()) { > + memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), > + &xive_source_esb_ops, xsrc, "xive.esb", > + (1ull << xsrc->esb_shift) * xsrc->nr_irqs); > + } > > qemu_register_reset(xive_source_reset, dev); > } > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > index 5fce72fe0f6c..afbdabfa6543 100644 > --- a/hw/ppc/spapr_irq.c > +++ b/hw/ppc/spapr_irq.c > @@ -15,6 +15,7 @@ > #include "hw/ppc/spapr_xive.h" > #include "hw/ppc/xics.h" > #include "sysemu/kvm.h" > +#include "kvm_ppc.h" > > #include "trace.h" > > @@ -266,9 +267,9 @@ static void spapr_irq_init_xive(sPAPRMachineState *spapr, Error **errp) > DeviceState *dev; > int i; > > - /* KVM XIVE device not yet available */ > if (kvm_enabled()) { > - if (machine_kernel_irqchip_required(machine)) { > + if (machine_kernel_irqchip_required(machine) && > + !kvmppc_has_cap_xive()) { > error_setg(errp, "kernel_irqchip requested. no KVM XIVE support"); > return; > } > @@ -384,7 +385,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val) > { > sPAPRMachineState *spapr = opaque; > > - xive_source_set_irq(&spapr->xive->source, srcno, val); > + if (kvmppc_xive_enabled()) { > + kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val); > + } else { > + xive_source_set_irq(&spapr->xive->source, srcno, val); > + } > } > > /* > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index ebbb48c42f25..88a470a73e7c 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -86,6 +86,7 @@ static int cap_fixup_hcalls; > static int cap_htm; /* Hardware transactional memory support */ > static int cap_mmu_radix; > static int cap_mmu_hash_v3; > +static int cap_xive; > static int cap_resize_hpt; > static int cap_ppc_pvr_compat; > static int cap_ppc_safe_cache; > @@ -149,6 +150,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) > cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); > cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); > cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); > + cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE); > cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); > kvmppc_get_cpu_characteristics(s); > cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); > @@ -2389,6 +2391,11 @@ static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) > return 0; > } > > +bool kvmppc_has_cap_xive(void) > +{ > + return cap_xive; > +} > + > static void kvmppc_get_cpu_characteristics(KVMState *s) > { > struct kvm_ppc_cpu_char c; > diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs > index 301a8e972d91..23126c199178 100644 > --- a/hw/intc/Makefile.objs > +++ b/hw/intc/Makefile.objs > @@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o > obj-$(CONFIG_XICS_KVM) += xics_kvm.o > obj-$(CONFIG_XIVE) += xive.o > obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o > +obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o > obj-$(CONFIG_POWERNV) += xics_pnv.o > obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o > obj-$(CONFIG_S390_FLIC) += s390_flic.o -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson