On Fri, Feb 22, 2019 at 12:28:30PM +0100, Cédric Le Goater wrote: > These controls will be used by the H_INT_SET_QUEUE_CONFIG and > H_INT_GET_QUEUE_CONFIG hcalls from QEMU. They will also be used to > restore the configuration of the XIVE EQs in the KVM device and to > capture the internal runtime state of the EQs. Both 'get' and 'set' > rely on an OPAL call to access from the XIVE interrupt controller the > EQ toggle bit and EQ index which are updated by the HW when event > notifications are enqueued in the EQ. > > The value of the guest physical address of the event queue is saved in > the XIVE internal xive_q structure for later use. That is when > migration needs to mark the EQ pages dirty to capture a consistent > memory state of the VM. > > To be noted that H_INT_SET_QUEUE_CONFIG does not require the extra > OPAL call setting the EQ toggle bit and EQ index to configure the EQ, > but restoring the EQ state will. > > Signed-off-by: Cédric Le Goater > --- > arch/powerpc/include/asm/xive.h | 2 + > arch/powerpc/include/uapi/asm/kvm.h | 21 +++ > arch/powerpc/kvm/book3s_xive.h | 2 + > arch/powerpc/kvm/book3s_xive.c | 15 +- > arch/powerpc/kvm/book3s_xive_native.c | 207 +++++++++++++++++++++ > Documentation/virtual/kvm/devices/xive.txt | 29 +++ > 6 files changed, 270 insertions(+), 6 deletions(-) > > diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h > index b579a943407b..46891f321606 100644 > --- a/arch/powerpc/include/asm/xive.h > +++ b/arch/powerpc/include/asm/xive.h > @@ -73,6 +73,8 @@ struct xive_q { > u32 esc_irq; > atomic_t count; > atomic_t pending_count; > + u64 guest_qpage; > + u32 guest_qsize; > }; > > /* Global enable flags for the XIVE support */ > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index 91899c7f9abd..177e43f3edaf 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -679,6 +679,7 @@ struct kvm_ppc_cpu_char { > #define KVM_DEV_XIVE_GRP_CTRL 1 > #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source attributes */ > #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source attributes */ > +#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit eq attributes */ > > /* Layout of 64-bit XIVE source attribute values */ > #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) > @@ -694,4 +695,24 @@ struct kvm_ppc_cpu_char { > #define KVM_XIVE_SOURCE_EISN_SHIFT 33 > #define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL > > +/* Layout of 64-bit eq attribute */ > +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0 > +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7 > +#define KVM_XIVE_EQ_SERVER_SHIFT 3 > +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL > + > +/* Layout of 64-bit eq attribute values */ > +struct kvm_ppc_xive_eq { > + __u32 flags; > + __u32 qsize; > + __u64 qpage; > + __u32 qtoggle; > + __u32 qindex; > + __u8 pad[40]; > +}; > + > +#define KVM_XIVE_EQ_FLAG_ENABLED 0x00000001 > +#define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY 0x00000002 > +#define KVM_XIVE_EQ_FLAG_ESCALATE 0x00000004 > + > #endif /* __LINUX_KVM_POWERPC_H */ > diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h > index ab3ac152980d..6660d138c6b7 100644 > --- a/arch/powerpc/kvm/book3s_xive.h > +++ b/arch/powerpc/kvm/book3s_xive.h > @@ -267,6 +267,8 @@ struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( > struct kvmppc_xive *xive, int irq); > void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); > int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); > +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, > + bool single_escalation); > > #endif /* CONFIG_KVM_XICS */ > #endif /* _KVM_PPC_BOOK3S_XICS_H */ > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c > index 086da91d7c6e..7431e31bc541 100644 > --- a/arch/powerpc/kvm/book3s_xive.c > +++ b/arch/powerpc/kvm/book3s_xive.c > @@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data) > return IRQ_HANDLED; > } > > -static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) > +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, > + bool single_escalation) > { > struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > struct xive_q *q = &xc->queues[prio]; > @@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) > return -EIO; > } > > - if (xc->xive->single_escalation) > + if (single_escalation) > name = kasprintf(GFP_KERNEL, "kvm-%d-%d", > vcpu->kvm->arch.lpid, xc->server_num); > else > @@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) > * interrupt, thus leaving it effectively masked after > * it fires once. > */ > - if (xc->xive->single_escalation) { > + if (single_escalation) { > struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); > struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); > > @@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) > continue; > rc = xive_provision_queue(vcpu, prio); > if (rc == 0 && !xive->single_escalation) > - xive_attach_escalation(vcpu, prio); > + kvmppc_xive_attach_escalation(vcpu, prio, > + xive->single_escalation); > if (rc) > return rc; > } > @@ -1219,7 +1221,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, > if (xive->qmap & (1 << i)) { > r = xive_provision_queue(vcpu, i); > if (r == 0 && !xive->single_escalation) > - xive_attach_escalation(vcpu, i); > + kvmppc_xive_attach_escalation( > + vcpu, i, xive->single_escalation); > if (r) > goto bail; > } else { > @@ -1234,7 +1237,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, > } > > /* If not done above, attach priority 0 escalation */ > - r = xive_attach_escalation(vcpu, 0); > + r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); > if (r) > goto bail; > > diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c > index cb5a5c6e05af..34a35bcf550c 100644 > --- a/arch/powerpc/kvm/book3s_xive_native.c > +++ b/arch/powerpc/kvm/book3s_xive_native.c > @@ -341,6 +341,201 @@ static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, > priority, eisn); > } > > +static int xive_native_validate_queue_size(u32 qsize) > +{ > + switch (qsize) { > + case 12: > + case 16: > + case 21: > + case 24: > + case 0: > + return 0; > + default: > + return -EINVAL; > + } > +} > + > +static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, > + long eq_idx, u64 addr) > +{ > + struct kvm *kvm = xive->kvm; > + struct kvm_vcpu *vcpu; > + struct kvmppc_xive_vcpu *xc; > + void __user *ubufp = (u64 __user *) addr; > + u32 server; > + u8 priority; > + struct kvm_ppc_xive_eq kvm_eq; > + int rc; > + __be32 *qaddr = 0; > + struct page *page; > + struct xive_q *q; > + > + /* > + * Demangle priority/server tuple from the EQ index > + */ > + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> > + KVM_XIVE_EQ_PRIORITY_SHIFT; > + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> > + KVM_XIVE_EQ_SERVER_SHIFT; > + > + if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) > + return -EFAULT; > + > + vcpu = kvmppc_xive_find_server(kvm, server); > + if (!vcpu) { > + pr_err("Can't find server %d\n", server); > + return -ENOENT; > + } > + xc = vcpu->arch.xive_vcpu; > + > + if (priority != xive_prio_from_guest(priority)) { > + pr_err("Trying to restore invalid queue %d for VCPU %d\n", > + priority, server); > + return -EINVAL; > + } > + q = &xc->queues[priority]; You need to validate the 'flags' field (AFAICT we don't actually have any flags yet, so it's only valid it if is 0. > + > + pr_devel("%s VCPU %d priority %d fl:%x sz:%d addr:%llx g:%d idx:%d\n", > + __func__, server, priority, kvm_eq.flags, > + kvm_eq.qsize, kvm_eq.qpage, kvm_eq.qtoggle, kvm_eq.qindex); > + > + rc = xive_native_validate_queue_size(kvm_eq.qsize); > + if (rc) { > + pr_err("invalid queue size %d\n", kvm_eq.qsize); > + return rc; > + } > + > + /* reset queue and disable queueing */ > + if (!kvm_eq.qsize) { > + q->guest_qpage = 0; > + q->guest_qsize = 0; > + > + rc = xive_native_configure_queue(xc->vp_id, q, priority, > + NULL, 0, true); > + if (rc) { > + pr_err("Failed to reset queue %d for VCPU %d: %d\n", > + priority, xc->server_num, rc); > + return rc; > + } > + > + if (q->qpage) { > + put_page(virt_to_page(q->qpage)); > + q->qpage = NULL; > + } > + > + return 0; > + } > + > + > + page = gfn_to_page(kvm, gpa_to_gfn(kvm_eq.qpage)); > + if (is_error_page(page)) { > + pr_warn("Couldn't get guest page for %llx!\n", kvm_eq.qpage); > + return -ENOMEM; Nit: I don't think ENOMEM is the right error here. ENOMEM indicates that the kernel couldn't allocate enough memory to complete whatever you asked. Here the problem is the user supplied a bad guest address, which is a rather different error. EFAULT is closer, but still not quite right, since it could be a valid user address but not a valid guest address. There are probably existing KVM calls that could hit this problem, I wonder what they use. > + } > + qaddr = page_to_virt(page) + (kvm_eq.qpage & ~PAGE_MASK); > + > + /* Backup queue page guest address for migration */ > + q->guest_qpage = kvm_eq.qpage; > + q->guest_qsize = kvm_eq.qsize; > + > + rc = xive_native_configure_queue(xc->vp_id, q, priority, > + (__be32 *) qaddr, kvm_eq.qsize, true); > + if (rc) { > + pr_err("Failed to configure queue %d for VCPU %d: %d\n", > + priority, xc->server_num, rc); > + put_page(page); > + return rc; > + } > + > + rc = xive_native_set_queue_state(xc->vp_id, priority, kvm_eq.qtoggle, > + kvm_eq.qindex); > + if (rc) > + goto error; > + > + rc = kvmppc_xive_attach_escalation(vcpu, priority, > + xive->single_escalation); > +error: > + if (rc) > + kvmppc_xive_native_cleanup_queue(vcpu, priority); > + return rc; > +} > + > +static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, > + long eq_idx, u64 addr) > +{ > + struct kvm *kvm = xive->kvm; > + struct kvm_vcpu *vcpu; > + struct kvmppc_xive_vcpu *xc; > + struct xive_q *q; > + void __user *ubufp = (u64 __user *) addr; > + u32 server; > + u8 priority; > + struct kvm_ppc_xive_eq kvm_eq; > + u64 qpage; > + u64 qsize; > + u64 qeoi_page; > + u32 escalate_irq; > + u64 qflags; > + int rc; > + > + /* > + * Demangle priority/server tuple from the EQ index > + */ > + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> > + KVM_XIVE_EQ_PRIORITY_SHIFT; > + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> > + KVM_XIVE_EQ_SERVER_SHIFT; > + > + vcpu = kvmppc_xive_find_server(kvm, server); > + if (!vcpu) { > + pr_err("Can't find server %d\n", server); > + return -ENOENT; > + } > + xc = vcpu->arch.xive_vcpu; > + > + if (priority != xive_prio_from_guest(priority)) { > + pr_err("invalid priority for queue %d for VCPU %d\n", > + priority, server); > + return -EINVAL; > + } > + q = &xc->queues[priority]; > + > + memset(&kvm_eq, 0, sizeof(kvm_eq)); > + > + if (!q->qpage) > + return 0; > + > + rc = xive_native_get_queue_info(xc->vp_id, priority, &qpage, &qsize, > + &qeoi_page, &escalate_irq, &qflags); > + if (rc) > + return rc; > + > + kvm_eq.flags = 0; > + if (qflags & OPAL_XIVE_EQ_ENABLED) > + kvm_eq.flags |= KVM_XIVE_EQ_FLAG_ENABLED; > + if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) > + kvm_eq.flags |= KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY; > + if (qflags & OPAL_XIVE_EQ_ESCALATE) > + kvm_eq.flags |= KVM_XIVE_EQ_FLAG_ESCALATE; > + > + kvm_eq.qsize = q->guest_qsize; > + kvm_eq.qpage = q->guest_qpage; > + > + rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, > + &kvm_eq.qindex); > + if (rc) > + return rc; > + > + pr_devel("%s VCPU %d priority %d fl:%x sz:%d addr:%llx g:%d idx:%d\n", > + __func__, server, priority, kvm_eq.flags, > + kvm_eq.qsize, kvm_eq.qpage, kvm_eq.qtoggle, kvm_eq.qindex); > + > + if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) > + return -EFAULT; > + > + return 0; > +} > + > static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > struct kvm_device_attr *attr) > { > @@ -355,6 +550,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: > return kvmppc_xive_native_set_source_config(xive, attr->attr, > attr->addr); > + case KVM_DEV_XIVE_GRP_EQ_CONFIG: > + return kvmppc_xive_native_set_queue_config(xive, attr->attr, > + attr->addr); > } > return -ENXIO; > } > @@ -362,6 +560,13 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, > static int kvmppc_xive_native_get_attr(struct kvm_device *dev, > struct kvm_device_attr *attr) > { > + struct kvmppc_xive *xive = dev->private; > + > + switch (attr->group) { > + case KVM_DEV_XIVE_GRP_EQ_CONFIG: > + return kvmppc_xive_native_get_queue_config(xive, attr->attr, > + attr->addr); > + } > return -ENXIO; > } > > @@ -377,6 +582,8 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, > attr->attr < KVMPPC_XIVE_NR_IRQS) > return 0; > break; > + case KVM_DEV_XIVE_GRP_EQ_CONFIG: > + return 0; > } > return -ENXIO; > } > diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt > index 4f513a1880c7..c0b5d9bd43fb 100644 > --- a/Documentation/virtual/kvm/devices/xive.txt > +++ b/Documentation/virtual/kvm/devices/xive.txt > @@ -52,3 +52,32 @@ the legacy interrupt mode, referred as XICS (POWER7/8). > -ENXIO: CPU event queues not configured or configuration of the > underlying HW interrupt failed > -EBUSY: No CPU available to serve interrupt > + > + 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) > + Configures an event queue of a CPU > + Attributes: > + EQ descriptor identifier (64-bit) > + The EQ descriptor identifier is a tuple (server, priority) : > + bits: | 63 .... 32 | 31 .. 3 | 2 .. 0 > + values: | unused | server | priority > + The kvm_device_attr.addr points to : > + struct kvm_ppc_xive_eq { > + __u32 flags; > + __u32 qsize; > + __u64 qpage; > + __u32 qtoggle; > + __u32 qindex; > + __u8 pad[40]; > + }; > + - flags: queue flags > + - qsize: queue size (power of 2) > + - qpage: real address of queue > + - qtoggle: current queue toggle bit > + - qindex: current queue index > + - pad: reserved for future use > + Errors: > + -ENOENT: Invalid CPU number > + -EINVAL: Invalid priority or invalid queue size > + -EFAULT: Invalid user pointer for attr->addr. > + -ENOMEM: Invalid queue address > + -EIO: Configuration of the underlying HW failed -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson