Ed Swierk wrote:
> As we discussed a while back, support for Xen PV-on-HVM guests can be
> implemented almost entirely in userspace, except for handling one
> annoying MSR that maps a Xen hypercall blob into guest address space.
> 
> A generic mechanism to delegate MSR writes to userspace seems overkill
> and risks encouraging similar MSR abuse in the future.  Thus this patch
> adds special support for the Xen HVM MSR.
> 
> At Avi's suggestion[1] I implemented a new ioctl, KVM_XEN_HVM_CONFIG,
> that lets userspace tell KVM which MSR the guest will write to, as well
> as the starting address and size of the hypercall blobs (one each for
> 32-bit and 64-bit) that userspace has loaded from files.  When the guest
> writes to the MSR, KVM copies one page of the blob from userspace to the
> guest.
> 
> I've tested this patch against a hacked-up version of Gerd's userspace
> code[2]; I'm happy to share those hacks if anyone is interested.
> 
> [1] http://www.mail-archive.com/kvm@vger.kernel.org/msg16065.html
> [2]
> http://git.et.redhat.com/?p=qemu-kraxel.git;a=log;h=refs/heads/xenner.v5
> 
> Signed-off-by: Ed Swierk <eswierk@aristanetworks.com>
> 
> ---
> diff -BurN a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
> --- a/include/asm-x86/kvm.h	2009-10-13 20:40:55.000000000 -0700
> +++ b/include/asm-x86/kvm.h	2009-10-13 20:21:07.000000000 -0700
> @@ -59,6 +59,7 @@
>  #define __KVM_HAVE_MSIX
>  #define __KVM_HAVE_MCE
>  #define __KVM_HAVE_PIT_STATE2
> +#define __KVM_HAVE_XEN_HVM
>  
>  /* Architectural interrupt line count. */
>  #define KVM_NR_INTERRUPTS 256
> diff -BurN a/include/linux/kvm.h b/include/linux/kvm.h
> --- a/include/linux/kvm.h	2009-10-13 20:40:55.000000000 -0700
> +++ b/include/linux/kvm.h	2009-10-13 20:21:26.000000000 -0700
> @@ -476,6 +476,9 @@
>  #endif
>  #define KVM_CAP_IOEVENTFD 36
>  #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
> +#ifdef __KVM_HAVE_XEN_HVM
> +#define KVM_CAP_XEN_HVM 90
> +#endif

When submitting for merge, I would close this gab in the CAP number space.

>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -528,6 +531,14 @@
>  };
>  #endif
>  
> +#ifdef KVM_CAP_XEN_HVM
> +struct kvm_xen_hvm_config {
> +	__u32 msr;
> +	__u64 blob_addr[2];
> +	__u8 blob_size[2];

This needs padding to achieve a stable layout across 32 and 64 bit.

> +};
> +#endif
> +
>  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
>  
>  struct kvm_irqfd {
> @@ -586,6 +597,7 @@
>  #define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
>  #define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
>  #define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
> +#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO, 0xa1, struct kvm_xen_hvm_config)

Also here: next is 0x7a.

>  
>  /*
>   * ioctls for vcpu fds
> diff -BurN a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> --- a/include/linux/kvm_host.h	2009-10-13 20:40:55.000000000 -0700
> +++ b/include/linux/kvm_host.h	2009-10-13 20:27:03.000000000 -0700
> @@ -236,6 +236,10 @@
>  	unsigned long mmu_notifier_seq;
>  	long mmu_notifier_count;
>  #endif
> +
> +#ifdef KVM_CAP_XEN_HVM
> +	struct kvm_xen_hvm_config xen_hvm_config;
> +#endif
>  };
>  
>  /* The guest did something we don't support. */
> diff -BurN a/x86/x86.c b/x86/x86.c
> --- a/x86/x86.c	2009-10-13 20:40:58.000000000 -0700
> +++ b/x86/x86.c	2009-10-13 20:33:49.000000000 -0700
> @@ -875,6 +875,33 @@
>  	return 0;
>  }
>  
> +#ifdef KVM_CAP_XEN_HVM
> +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
> +{
> +	int blob = !!(vcpu->arch.shadow_efer & EFER_LME);
> +	u32 pnum = data & ~PAGE_MASK;
> +	u64 paddr = data & PAGE_MASK;
> +	u8 *page;
> +	int r = 1;
> +	printk(KERN_INFO "kvm: loading xen hvm blob %d page %d at %llx\n",
> +	       blob, pnum, paddr);

Debugging left-over? And please insert a blank line after the variable
block.

> +	if (pnum >= vcpu->kvm->xen_hvm_config.blob_size[blob])
> +		goto out;
> +	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!page)
> +		goto out;
> +	if (copy_from_user(page, (u8 *)vcpu->kvm->xen_hvm_config.blob_addr[blob]
> +			   + pnum * PAGE_SIZE, PAGE_SIZE))
> +		goto out_free;
> +	kvm_write_guest(vcpu->kvm, paddr, page, PAGE_SIZE);

This function returns an error code. Not interested in it?

> +	r = 0;
> +out_free:
> +	kfree(page);
> +out:
> +	return r;
> +}
> +#endif
> +
>  int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  {
>  	switch (msr) {
> @@ -990,6 +1017,10 @@
>  			"0x%x data 0x%llx\n", msr, data);
>  		break;
>  	default:
> +#ifdef KVM_CAP_XEN_HVM
> +		if (msr && (msr == vcpu->kvm->xen_hvm_config.msr))
> +			return xen_hvm_config(vcpu, data);
> +#endif
>  		if (!ignore_msrs) {
>  			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
>  				msr, data);
> @@ -2453,6 +2484,17 @@
>  		r = 0;
>  		break;
>  	}
> +#ifdef KVM_CAP_XEN_HVM
> +	case KVM_XEN_HVM_CONFIG: {
> +		r = -EFAULT;
> +		printk(KERN_INFO "kvm: configuring xen hvm\n");

Yet another noisy printk.

> +		if (copy_from_user(&kvm->xen_hvm_config, argp,
> +				   sizeof(struct kvm_xen_hvm_config)))
> +			goto out;
> +		r = 0;
> +		break;
> +	}
> +#endif
>  	default:
>  		;
>  	}
> 

Interesting stuff. How usable is your work at this point? I've no
immediate demand, but the question if one could integrate Xen guests
with KVM already popped up more than once @work.

Jan