From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59230) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UwGWL-0004w3-Sg for qemu-devel@nongnu.org; Mon, 08 Jul 2013 14:50:49 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1UwGWH-0006z6-AM for qemu-devel@nongnu.org; Mon, 08 Jul 2013 14:50:45 -0400 Received: from e39.co.us.ibm.com ([32.97.110.160]:44571) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UwGWG-0006wS-HO for qemu-devel@nongnu.org; Mon, 08 Jul 2013 14:50:41 -0400 Received: from /spool/local by e39.co.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Mon, 8 Jul 2013 12:50:37 -0600 From: Anthony Liguori In-Reply-To: <1372315560-5478-15-git-send-email-aik@ozlabs.ru> References: <1372315560-5478-1-git-send-email-aik@ozlabs.ru> <1372315560-5478-15-git-send-email-aik@ozlabs.ru> Date: Mon, 08 Jul 2013 13:50:10 -0500 Message-ID: <87hag4zy1p.fsf@codemonkey.ws> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Subject: Re: [Qemu-devel] [PATCH 14/17] pseries: Support for in-kernel XICS interrupt controller List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Alexey Kardashevskiy , qemu-devel@nongnu.org Cc: Alexander Graf , qemu-ppc@nongnu.org, Paolo Bonzini , Paul Mackerras , David Gibson Alexey Kardashevskiy writes: > From: David Gibson > > Recent (host) kernels support emulating the PAPR defined "XICS" interrupt > controller system within KVM. This patch allows qemu to initialize and > configure the in-kernel XICS, and keep its state in sync with qemu's XICS > state as necessary. > > This should give considerable performance improvements. e.g. on a simple > IPI ping-pong test between hardware threads, using qemu XICS gives us > around 5,000 irqs/second, whereas the in-kernel XICS gives us around > 70,000 irqs/s on the same hardware configuration. > > [Mike Qiu : fixed mistype which caused ics_set_kvm_state() to fail] > Signed-off-by: David Gibson > [aik: moved to a separate device] > Signed-off-by: Alexey Kardashevskiy > --- > default-configs/ppc64-softmmu.mak | 1 + > hw/intc/Makefile.objs | 1 + > hw/intc/xics_kvm.c | 445 +++++++++++++++++++++++++++++++++++++ > hw/ppc/spapr.c | 32 ++- > include/hw/ppc/xics.h | 13 ++ > 5 files changed, 489 insertions(+), 3 deletions(-) > create mode 100644 hw/intc/xics_kvm.c > > diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak > index 69a9f8d..5b995f9 100644 > --- a/default-configs/ppc64-softmmu.mak > +++ b/default-configs/ppc64-softmmu.mak > @@ -48,5 +48,6 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM)) > # For pSeries > CONFIG_PCI_HOTPLUG=y > CONFIG_XICS=$(CONFIG_PSERIES) > +CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM)) > # For PReP > CONFIG_MC146818RTC=y > diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs > index abe8f80..9e77afe 100644 > --- a/hw/intc/Makefile.objs > +++ b/hw/intc/Makefile.objs > @@ -23,3 +23,4 @@ obj-$(CONFIG_OPENPIC) += openpic.o > obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o > obj-$(CONFIG_SH4) += sh_intc.o > obj-$(CONFIG_XICS) += xics.o > +obj-$(CONFIG_XICS_KVM) += xics_kvm.o > diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c > new file mode 100644 > index 0000000..d5604a7 > --- /dev/null > +++ b/hw/intc/xics_kvm.c > @@ -0,0 +1,445 @@ > +/* > + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator > + * > + * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel emulation > + * > + * Copyright (c) 2013 David Gibson, IBM Corporation. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + * > + */ > + > +#include "hw/hw.h" > +#include "trace.h" > +#include "hw/ppc/spapr.h" > +#include "hw/ppc/xics.h" > +#include "kvm_ppc.h" > +#include "qemu/config-file.h" > + > +#include > + > +struct icp_state_kvm { CodingStyle Regards, Anthony Liguori > + struct icp_state parent; > + > + uint32_t set_xive_token; > + uint32_t get_xive_token; > + uint32_t int_off_token; > + uint32_t int_on_token; > + int kernel_xics_fd; > +}; > + > +static void icp_get_kvm_state(struct icp_server_state *ss) > +{ > + uint64_t state; > + struct kvm_one_reg reg = { > + .id = KVM_REG_PPC_ICP_STATE, > + .addr = (uintptr_t)&state, > + }; > + int ret; > + > + if (!ss->cs) { > + return; /* kernel irqchip not in use */ > + } > + > + ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, ®); > + if (ret != 0) { > + fprintf(stderr, "Unable to retrieve KVM interrupt controller state" > + " for CPU %d: %s\n", ss->cs->cpu_index, strerror(errno)); > + exit(1); > + } > + > + ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT; > + ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT) > + & KVM_REG_PPC_ICP_MFRR_MASK; > + ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT) > + & KVM_REG_PPC_ICP_PPRI_MASK; > +} > + > +static int icp_set_kvm_state(struct icp_server_state *ss) > +{ > + uint64_t state; > + struct kvm_one_reg reg = { > + .id = KVM_REG_PPC_ICP_STATE, > + .addr = (uintptr_t)&state, > + }; > + int ret; > + > + if (!ss->cs) { > + return 0; /* kernel irqchip not in use */ > + } > + > + state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT) > + | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) > + | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT); > + > + ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, ®); > + if (ret != 0) { > + fprintf(stderr, "Unable to restore KVM interrupt controller state (0x%" > + PRIx64 ") for CPU %d: %s\n", state, ss->cs->cpu_index, > + strerror(errno)); > + exit(1); > + return ret; > + } > + > + return 0; > +} > + > +static void ics_get_kvm_state(struct ics_state *ics) > +{ > + struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp); > + uint64_t state; > + struct kvm_device_attr attr = { > + .flags = 0, > + .group = KVM_DEV_XICS_GRP_SOURCES, > + .addr = (uint64_t)(uintptr_t)&state, > + }; > + int i; > + > + for (i = 0; i < ics->nr_irqs; i++) { > + struct ics_irq_state *irq = &ics->irqs[i]; > + int ret; > + > + attr.attr = i + ics->offset; > + > + ret = ioctl(icpkvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr); > + if (ret != 0) { > + fprintf(stderr, "Unable to retrieve KVM interrupt controller state" > + " for IRQ %d: %s\n", i + ics->offset, strerror(errno)); > + exit(1); > + } > + > + irq->server = state & KVM_XICS_DESTINATION_MASK; > + irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT) > + & KVM_XICS_PRIORITY_MASK; > + /* > + * To be consistent with the software emulation in xics.c, we > + * split out the masked state + priority that we get from the > + * kernel into 'current priority' (0xff if masked) and > + * 'saved priority' (if masked, this is the priority the > + * interrupt had before it was masked). Masking and unmasking > + * are done with the ibm,int-off and ibm,int-on RTAS calls. > + */ > + if (state & KVM_XICS_MASKED) { > + irq->priority = 0xff; > + } else { > + irq->priority = irq->saved_priority; > + } > + > + if (state & KVM_XICS_PENDING) { > + if (state & KVM_XICS_LEVEL_SENSITIVE) { > + irq->status |= XICS_STATUS_ASSERTED; > + } else { > + /* > + * A pending edge-triggered interrupt (or MSI) > + * must have been rejected previously when we > + * first detected it and tried to deliver it, > + * so mark it as pending and previously rejected > + * for consistency with how xics.c works. > + */ > + irq->status |= XICS_STATUS_MASKED_PENDING > + | XICS_STATUS_REJECTED; > + } > + } > + } > +} > + > +static int ics_set_kvm_state(struct ics_state *ics) > +{ > + struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp); > + uint64_t state; > + struct kvm_device_attr attr = { > + .flags = 0, > + .group = KVM_DEV_XICS_GRP_SOURCES, > + .addr = (uint64_t)(uintptr_t)&state, > + }; > + int i; > + > + for (i = 0; i < ics->nr_irqs; i++) { > + struct ics_irq_state *irq = &ics->irqs[i]; > + int ret; > + > + attr.attr = i + ics->offset; > + > + state = irq->server; > + state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK) > + << KVM_XICS_PRIORITY_SHIFT; > + if (irq->priority != irq->saved_priority) { > + assert(irq->priority == 0xff); > + state |= KVM_XICS_MASKED; > + } > + > + if (ics->islsi[i]) { > + state |= KVM_XICS_LEVEL_SENSITIVE; > + if (irq->status & XICS_STATUS_ASSERTED) { > + state |= KVM_XICS_PENDING; > + } > + } else { > + if (irq->status & XICS_STATUS_MASKED_PENDING) { > + state |= KVM_XICS_PENDING; > + } > + } > + > + ret = ioctl(icpkvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr); > + if (ret != 0) { > + fprintf(stderr, "Unable to restore KVM interrupt controller state" > + " for IRQs %d: %s\n", i + ics->offset, strerror(errno)); > + return ret; > + } > + } > + > + return 0; > +} > + > +static void icp_pre_save(void *opaque) > +{ > + struct icp_server_state *ss = opaque; > + > + icp_get_kvm_state(ss); > +} > + > +static int icp_post_load(void *opaque, int version_id) > +{ > + struct icp_server_state *ss = opaque; > + > + return icp_set_kvm_state(ss); > +} > + > +static void ics_pre_save(void *opaque) > +{ > + struct ics_state *ics = opaque; > + > + ics_get_kvm_state(ics); > +} > + > +static int ics_post_load(void *opaque, int version_id) > +{ > + struct ics_state *ics = opaque; > + > + return ics_set_kvm_state(ics); > +} > + > +static VMStateDescription vmstate_icpkvm_server = { > + .name = "icpkvm/server", > + .version_id = 1, > + .minimum_version_id = 1, > + .minimum_version_id_old = 1, > + .pre_save = icp_pre_save, > + .post_load = icp_post_load, > +}; > + > +static VMStateDescription vmstate_icskvm = { > + .name = "icskvm", > + .version_id = 1, > + .minimum_version_id = 1, > + .minimum_version_id_old = 1, > + .pre_save = ics_pre_save, > + .post_load = ics_post_load, > +}; > + > +static void ics_set_irq_kvm(void *opaque, int srcno, int val) > +{ > + struct ics_state *ics = opaque; > + struct kvm_irq_level args; > + int rc; > + > + args.irq = srcno + ics->offset; > + if (!ics->islsi[srcno]) { > + if (!val) { > + return; > + } > + args.level = KVM_INTERRUPT_SET; > + } else { > + args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; > + } > + rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args); > + if (rc < 0) { > + perror("kvm_irq_line"); > + } > +} > + > +int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu) > +{ > + CPUState *cs; > + struct icp_server_state *ss; > + struct icp_state_kvm *icpkvm = (struct icp_state_kvm *) object_dynamic_cast( > + OBJECT(icp), TYPE_XICS_KVM); > + > + if (!icpkvm) { > + return -1; > + } > + > + cs = CPU(cpu); > + ss = &icp->ss[cs->cpu_index]; > + > + assert(cs->cpu_index < icp->nr_servers); > + if (icpkvm->kernel_xics_fd == -1) { > + abort(); > + } > + > + if (icpkvm->kernel_xics_fd != -1) { > + int ret; > + struct kvm_enable_cap xics_enable_cap = { > + .cap = KVM_CAP_IRQ_XICS, > + .flags = 0, > + .args = {icpkvm->kernel_xics_fd, cs->cpu_index, 0, 0}, > + }; > + > + ss->cs = cs; > + > + ret = kvm_vcpu_ioctl(ss->cs, KVM_ENABLE_CAP, &xics_enable_cap); > + if (ret < 0) { > + fprintf(stderr, "Unable to connect CPU%d to kernel XICS: %s\n", > + cs->cpu_index, strerror(errno)); > + exit(1); > + } > + } > + xics_common_cpu_setup(icp, cpu); > + > + vmstate_icpkvm_server.fields = vmstate_icp_server.fields; > + vmstate_register(NULL, cs->cpu_index, &vmstate_icpkvm_server, ss); > + > + return 0; > +} > + > +static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr, > + uint32_t token, > + uint32_t nargs, target_ulong args, > + uint32_t nret, target_ulong rets) > +{ > + fprintf(stderr, "pseries: %s() should never be called for in-kernel XICS\n", __func__); > +} > + > +static void xics_kvm_realize(DeviceState *dev, Error **errp) > +{ > + struct icp_state_kvm *icpkvm = XICS_KVM(dev); > + QemuOptsList *list = qemu_find_opts("machine"); > + int rc; > + struct kvm_create_device xics_create_device = { > + .type = KVM_DEV_TYPE_XICS, > + .flags = 0, > + }; > + > + if (!kvm_enabled()) { > + error_setg(errp, "KVM must be enabled for in-kernel XICS"); > + goto fail; > + } > + > + if (QTAILQ_EMPTY(&list->head) || > + !qemu_opt_get_bool(QTAILQ_FIRST(&list->head), > + "kernel_irqchip", true) || > + !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) { > + error_setg(errp, "KVM must be enabled for in-kernel XICS"); > + return; > + } > + > + icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy); > + icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy); > + icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy); > + icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy); > + > + rc = kvmppc_define_rtas_token(icpkvm->set_xive_token, "ibm,set-xive"); > + if (rc < 0) { > + error_setg(errp, "kvmppc_define_rtas_token: ibm,set-xive"); > + goto fail; > + } > + > + rc = kvmppc_define_rtas_token(icpkvm->get_xive_token, "ibm,get-xive"); > + if (rc < 0) { > + error_setg(errp, "kvmppc_define_rtas_token: ibm,get-xive"); > + goto fail; > + } > + > + rc = kvmppc_define_rtas_token(icpkvm->int_on_token, "ibm,int-on"); > + if (rc < 0) { > + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-on"); > + goto fail; > + } > + > + rc = kvmppc_define_rtas_token(icpkvm->int_off_token, "ibm,int-off"); > + if (rc < 0) { > + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-off"); > + goto fail; > + } > + > + /* Create the kernel ICP */ > + rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device); > + if (rc < 0) { > + error_setg_errno(errp, -rc, "Error on KVM_CREATE_DEVICE for XICS"); > + goto fail; > + } > + > + icpkvm->kernel_xics_fd = xics_create_device.fd; > + > + xics_common_init(&icpkvm->parent, ics_set_irq_kvm); > + > + /* We use each the ICS's offset into the global irq number space > + * as an instance id. This means we can extend to multiple ICS > + * instances without needing to change the savevm format */ > + vmstate_icskvm.fields = vmstate_ics.fields; > + vmstate_register(NULL, icpkvm->parent.ics->offset, &vmstate_icskvm, > + icpkvm->parent.ics); > + > + return; > + > +fail: > + kvmppc_define_rtas_token(0, "ibm,set-xive"); > + kvmppc_define_rtas_token(0, "ibm,get-xive"); > + kvmppc_define_rtas_token(0, "ibm,int-on"); > + kvmppc_define_rtas_token(0, "ibm,int-off"); > + return; > +} > + > +static void xics_kvm_reset(DeviceState *d) > +{ > + struct icp_state_kvm *icpkvm = XICS_KVM(d); > + struct icp_state *icp = &icpkvm->parent; > + int i; > + > + xics_common_reset(icp); > + > + for (i = 0; i < icp->nr_servers; i++) { > + if (icp->ss[i].cs) { > + icp_set_kvm_state(&icp->ss[i]); > + } > + } > + > + ics_set_kvm_state(icp->ics); > +} > + > +static void xics_kvm_class_init(ObjectClass *oc, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(oc); > + > + dc->realize = xics_kvm_realize; > + dc->reset = xics_kvm_reset; > +} > + > +static const TypeInfo xics_kvm_info = { > + .name = TYPE_XICS_KVM, > + .parent = TYPE_XICS, > + .instance_size = sizeof(struct icp_state_kvm), > + .class_init = xics_kvm_class_init, > +}; > + > +static void xics_kvm_register_types(void) > +{ > + type_register_static(&xics_kvm_info); > +} > + > +type_init(xics_kvm_register_types) > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index f989a22..211f434 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -1001,7 +1001,31 @@ static struct icp_state *xics_system_init(int nr_servers, int nr_irqs) > { > struct icp_state *icp = NULL; > > - icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs); > + if (kvm_enabled()) { > + bool irqchip_allowed = true, irqchip_required = false; > + QemuOptsList *list = qemu_find_opts("machine"); > + > + if (!QTAILQ_EMPTY(&list->head)) { > + irqchip_allowed = qemu_opt_get_bool(QTAILQ_FIRST(&list->head), > + "kernel_irqchip", true); > + irqchip_required = qemu_opt_get_bool(QTAILQ_FIRST(&list->head), > + "kernel_irqchip", false); > + } > + > + if (irqchip_allowed) { > + icp = try_create_xics(TYPE_XICS_KVM, nr_servers, nr_irqs); > + } > + > + if (irqchip_required && !icp) { > + perror("iFailed to create in-kernel XICS\n"); > + abort(); > + } > + } > + > + if (!icp) { > + icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs); > + } > + > if (!icp) { > perror("Failed to create XICS\n"); > abort(); > @@ -1102,8 +1126,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) > } > env = &cpu->env; > > - xics_cpu_setup(spapr->icp, cpu); > - > /* Set time-base frequency to 512 MHz */ > cpu_ppc_tb_init(env, TIMEBASE_FREQ); > > @@ -1117,6 +1139,10 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) > kvmppc_set_papr(cpu); > } > > + if (xics_kvm_cpu_setup(spapr->icp, cpu)) { > + xics_cpu_setup(spapr->icp, cpu); > + } > + > qemu_register_reset(spapr_cpu_reset, cpu); > } > > diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h > index 3f72806..e474c01 100644 > --- a/include/hw/ppc/xics.h > +++ b/include/hw/ppc/xics.h > @@ -32,6 +32,9 @@ > #define TYPE_XICS "xics" > #define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS) > > +#define TYPE_XICS_KVM "xics-kvm" > +#define XICS_KVM(obj) OBJECT_CHECK(struct icp_state_kvm, (obj), TYPE_XICS_KVM) > + > #define XICS_IPI 0x2 > #define XICS_BUID 0x1 > #define XICS_IRQ_BASE (XICS_BUID << 12) > @@ -53,6 +56,7 @@ struct icp_state { > }; > > struct icp_server_state { > + CPUState *cs; > uint32_t xirr; > uint8_t pending_priority; > uint8_t mfrr; > @@ -88,6 +92,15 @@ void xics_common_reset(struct icp_state *icp); > > void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu); > > +#ifdef CONFIG_KVM > +int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu); > +#else > +static inline int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu) > +{ > + return -1; > +} > +#endif > + > extern const VMStateDescription vmstate_icp_server; > extern const VMStateDescription vmstate_ics; > > -- > 1.7.10.4