All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: Makarand Sonare <makarandsonare@google.com>
Cc: kvm@vger.kernel.org, pshier@google.com, jmattson@google.com
Subject: Re: [kvm PATCH 1/2] KVM: nVMX - enable VMX preemption timer migration
Date: Tue, 21 Apr 2020 19:36:01 -0700	[thread overview]
Message-ID: <20200422023601.GG17836@linux.intel.com> (raw)
In-Reply-To: <20200417183452.115762-2-makarandsonare@google.com>

On Fri, Apr 17, 2020 at 11:34:51AM -0700, Makarand Sonare wrote:
> From: Peter Shier <pshier@google.com>
> 
> Add new field to hold preemption timer remaining until expiration
> appended to struct kvm_vmx_nested_state_data. KVM_SET_NESTED_STATE restarts
> timer using migrated state regardless of whether L1 sets
> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.

The changelog should call out that simply stuffing vmcs12 will cause
incorrect behavior because the second (and later) VM-Enter after migration
will restart the timer with the wrong value.  It took me a bit to piece
together why the extra field was needed.

 
> -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
> +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
> +					u64 preemption_timeout)
>  {
> -	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
>  	/*
> @@ -3293,8 +3294,15 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>  	 * the timer.
>  	 */
>  	vmx->nested.preemption_timer_expired = false;
> -	if (nested_cpu_has_preemption_timer(vmcs12))
> -		vmx_start_preemption_timer(vcpu);
> +	if (nested_cpu_has_preemption_timer(vmcs12)) {
> +		u64 preemption_timeout;

timer_value would be shorter, and IMO, a better name as well.  I'd even go
so far as to say throw on a follow-up patch to rename the variable in
vmx_start_preemption_timer.

> +		if (from_vmentry)
> +			preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;

vmcs12 is already available.

> +		else
> +			preemption_timeout = vmx->nested.preemption_timer_remaining;
> +		vmx_start_preemption_timer(vcpu, preemption_timeout);
> +	}
>  
>  	/*
>  	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -3888,10 +3896,13 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	else
>  		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
>  
> -	if (nested_cpu_has_preemption_timer(vmcs12) &&
> -	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> +	if (nested_cpu_has_preemption_timer(vmcs12)) {
> +		vmx->nested.preemption_timer_remaining =
> +			vmx_get_preemption_timer_value(vcpu);
> +		if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
>  			vmcs12->vmx_preemption_timer_value =
> -				vmx_get_preemption_timer_value(vcpu);
> +				vmx->nested.preemption_timer_remaining;
> +	}
>  
>  	/*
>  	 * In some cases (usually, nested EPT), L2 is allowed to change its
> @@ -5759,6 +5770,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  
>  			if (vmx->nested.mtf_pending)
>  				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
> +
> +			if (nested_cpu_has_preemption_timer(vmcs12)) {
> +				kvm_state.flags |=
> +					KVM_STATE_NESTED_PREEMPTION_TIMER;
> +				kvm_state.size +=
> +					sizeof(user_vmx_nested_state->preemption_timer_remaining);
> +			}
>  		}
>  	}
>  
> @@ -5790,6 +5808,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  
>  	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
>  	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
> +	BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_remaining)
> +		    != sizeof(vmx->nested.preemption_timer_remaining));
> +
>  
>  	/*
>  	 * Copy over the full allocated size of vmcs12 rather than just the size
> @@ -5805,6 +5826,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  			return -EFAULT;
>  	}
>  
> +	if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +		if (copy_to_user(&user_vmx_nested_state->preemption_timer_remaining,
> +				 &vmx->nested.preemption_timer_remaining,
> +				 sizeof(vmx->nested.preemption_timer_remaining)))
> +			return -EFAULT;

Isn't this suitable for put_user()?  That would allow dropping the
long-winded BUILD_BUG_ON.

> +	}
> +
>  out:
>  	return kvm_state.size;
>  }
> @@ -5876,7 +5904,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>  	 */
>  	if (is_smm(vcpu) ?
>  		(kvm_state->flags &
> -		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
> +		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
> +		  KVM_STATE_NESTED_PREEMPTION_TIMER))
>  		: kvm_state->hdr.vmx.smm.flags)
>  		return -EINVAL;
>  
> @@ -5966,6 +5995,23 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>  			goto error_guest_mode;
>  	}
>  
> +	if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +
> +		if (kvm_state->size <
> +		    sizeof(*kvm_state) +
> +		    sizeof(user_vmx_nested_state->vmcs12) +
> +		    sizeof(user_vmx_nested_state->shadow_vmcs12) +
> +		    sizeof(user_vmx_nested_state->preemption_timer_remaining))

		if (kvm_state->size <
		    offsetof(struct kvm_nested_state, vmx) +
		    offsetofend(struct kvm_nested_state_data,
				preemption_timer_remaining))


> +			goto error_guest_mode;
> +
> +		if (copy_from_user(&vmx->nested.preemption_timer_remaining,
> +				   &user_vmx_nested_state->preemption_timer_remaining,
> +				   sizeof(user_vmx_nested_state->preemption_timer_remaining))) {
> +			ret = -EFAULT;
> +			goto error_guest_mode;
> +		}
> +	}
> +
>  	if (nested_vmx_check_controls(vcpu, vmcs12) ||
>  	    nested_vmx_check_host_state(vcpu, vmcs12) ||
>  	    nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index aab9df55336ef..0098c7dc2e254 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -167,6 +167,7 @@ struct nested_vmx {
>  	u16 posted_intr_nv;
>  
>  	struct hrtimer preemption_timer;
> +	u32 preemption_timer_remaining;
>  	bool preemption_timer_expired;
>  
>  	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 027dfd278a973..ddad6305a0d23 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3374,6 +3374,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_GET_MSR_FEATURES:
>  	case KVM_CAP_MSR_PLATFORM_INFO:
>  	case KVM_CAP_EXCEPTION_PAYLOAD:
> +	case KVM_CAP_NESTED_PREEMPTION_TIMER:

Maybe KVM_CAP_NESTED_STATE_PREEMPTION_TIMER?

>  		r = 1;
>  		break;
>  	case KVM_CAP_SYNC_REGS:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 428c7dde6b4b3..489c3df0b49c8 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_S390_VCPU_RESETS 179
>  #define KVM_CAP_S390_PROTECTED 180
>  #define KVM_CAP_PPC_SECURE_GUEST 181
> +#define KVM_CAP_NESTED_PREEMPTION_TIMER 182
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..ac8e5d391356c 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,8 @@ struct kvm_sync_regs {
>  #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>  #define KVM_STATE_NESTED_EVMCS		0x00000004
>  #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +/* Available with KVM_CAP_NESTED_PREEMPTION_TIMER */
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
>  
>  #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>  #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
> -- 
> 2.26.1.301.g55bc3eb7cb9-goog
> 

  reply	other threads:[~2020-04-22  2:36 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-17 18:34 [kvm PATCH 0/2] *** Enable VMX preemption timer migration *** Makarand Sonare
2020-04-17 18:34 ` [kvm PATCH 1/2] KVM: nVMX - enable VMX preemption timer migration Makarand Sonare
2020-04-22  2:36   ` Sean Christopherson [this message]
2020-04-17 18:34 ` [kvm PATCH 2/2] KVM: nVMX: Don't clobber preemption timer in the VMCS12 before L2 ran Makarand Sonare
2020-04-22  1:57   ` Sean Christopherson
2020-04-22  2:02     ` Sean Christopherson
2020-04-22 17:05       ` Jim Mattson
2020-04-22 23:08         ` Sean Christopherson
2020-04-23 14:58         ` Paolo Bonzini
2020-04-23 21:11           ` Jim Mattson
2020-04-23 21:18             ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200422023601.GG17836@linux.intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=makarandsonare@google.com \
    --cc=pshier@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.