On Fri, Feb 22, 2019 at 12:28:36PM +0100, Cédric Le Goater wrote: > Each thread has an associated Thread Interrupt Management context > composed of a set of registers. These registers let the thread handle > priority management and interrupt acknowledgment. The most important > are : > > - Interrupt Pending Buffer (IPB) > - Current Processor Priority (CPPR) > - Notification Source Register (NSR) > > They are exposed to software in four different pages each proposing a > view with a different privilege. The first page is for the physical > thread context and the second for the hypervisor. Only the third > (operating system) and the fourth (user level) are exposed the guest. > > A custom VM fault handler will populate the VMA with the appropriate > pages, which should only be the OS page for now. > > Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Subject to possible modification depending on whether we go with the generic change to allow mmap() on kvm devices. > --- > arch/powerpc/include/asm/xive.h | 1 + > arch/powerpc/include/uapi/asm/kvm.h | 2 ++ > arch/powerpc/kvm/book3s_xive_native.c | 39 ++++++++++++++++++++++ > arch/powerpc/sysdev/xive/native.c | 11 ++++++ > Documentation/virtual/kvm/devices/xive.txt | 23 +++++++++++++ > 5 files changed, 76 insertions(+) > > diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h > index 46891f321606..eb6d302082da 100644 > --- a/arch/powerpc/include/asm/xive.h > +++ b/arch/powerpc/include/asm/xive.h > @@ -23,6 +23,7 @@ > * same offset regardless of where the code is executing > */ > extern void __iomem *xive_tima; > +extern unsigned long xive_tima_os; > > /* > * Offset in the TM area of our current execution level (provided by > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index 42d4ef93ec2d..be9b255e061d 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -720,4 +720,6 @@ struct kvm_ppc_xive_eq { > #define KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY 0x00000002 > #define KVM_XIVE_EQ_FLAG_ESCALATE 0x00000004 > > +#define KVM_XIVE_TIMA_PAGE_OFFSET 0 > + > #endif /* __LINUX_KVM_POWERPC_H */ > diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c > index 132bff52d70a..c6ac818a13b2 100644 > --- a/arch/powerpc/kvm/book3s_xive_native.c > +++ b/arch/powerpc/kvm/book3s_xive_native.c > @@ -176,6 +176,44 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, > return rc; > } > > +static int xive_native_tima_fault(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + > + switch (vmf->pgoff - vma->vm_pgoff) { > + case 0: /* HW - forbid access */ > + case 1: /* HV - forbid access */ > + return VM_FAULT_SIGBUS; > + case 2: /* OS */ > + vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); > + return VM_FAULT_NOPAGE; > + case 3: /* USER - TODO */ > + default: > + return VM_FAULT_SIGBUS; > + } > +} > + > +static const struct vm_operations_struct xive_native_tima_vmops = { > + .fault = xive_native_tima_fault, > +}; > + > +static int kvmppc_xive_native_mmap(struct kvm_device *dev, > + struct vm_area_struct *vma) > +{ > + /* We only allow mappings at fixed offset for now */ > + if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { > + if (vma_pages(vma) > 4) > + return -EINVAL; > + vma->vm_ops = &xive_native_tima_vmops; > + } else { > + return -EINVAL; > + } > + > + vma->vm_flags |= VM_IO | VM_PFNMAP; > + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); > + return 0; > +} > + > static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, > u64 addr) > { > @@ -1005,6 +1043,7 @@ struct kvm_device_ops kvm_xive_native_ops = { > .set_attr = kvmppc_xive_native_set_attr, > .get_attr = kvmppc_xive_native_get_attr, > .has_attr = kvmppc_xive_native_has_attr, > + .mmap = kvmppc_xive_native_mmap, > }; > > void kvmppc_xive_native_init_module(void) > diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c > index 0c037e933e55..7782201e5fe8 100644 > --- a/arch/powerpc/sysdev/xive/native.c > +++ b/arch/powerpc/sysdev/xive/native.c > @@ -521,6 +521,9 @@ u32 xive_native_default_eq_shift(void) > } > EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); > > +unsigned long xive_tima_os; > +EXPORT_SYMBOL_GPL(xive_tima_os); > + > bool __init xive_native_init(void) > { > struct device_node *np; > @@ -573,6 +576,14 @@ bool __init xive_native_init(void) > for_each_possible_cpu(cpu) > kvmppc_set_xive_tima(cpu, r.start, tima); > > + /* Resource 2 is OS window */ > + if (of_address_to_resource(np, 2, &r)) { > + pr_err("Failed to get thread mgmnt area resource\n"); > + return false; > + } > + > + xive_tima_os = r.start; > + > /* Grab size of provisionning pages */ > xive_parse_provisioning(np); > > diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt > index 1b8957c50c53..4d6b41609fd9 100644 > --- a/Documentation/virtual/kvm/devices/xive.txt > +++ b/Documentation/virtual/kvm/devices/xive.txt > @@ -13,6 +13,29 @@ requires a POWER9 host and the guest OS should have support for the > XIVE native exploitation interrupt mode. If not, it should run using > the legacy interrupt mode, referred as XICS (POWER7/8). > > +* Device Mappings > + > + The KVM device exposes different MMIO ranges of the XIVE HW which > + are required for interrupt management. These are exposed to the > + guest in VMAs populated with a custom VM fault handler. > + > + 1. Thread Interrupt Management Area (TIMA) > + > + Each thread has an associated Thread Interrupt Management context > + composed of a set of registers. These registers let the thread > + handle priority management and interrupt acknowledgment. The most > + important are : > + > + - Interrupt Pending Buffer (IPB) > + - Current Processor Priority (CPPR) > + - Notification Source Register (NSR) > + > + They are exposed to software in four different pages each proposing > + a view with a different privilege. The first page is for the > + physical thread context and the second for the hypervisor. Only the > + third (operating system) and the fourth (user level) are exposed the > + guest. > + > * Groups: > > 1. KVM_DEV_XIVE_GRP_CTRL -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson