kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Ilias Stamatis <ilstam@amazon.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	pbonzini@redhat.com, mlevitsk@redhat.com, vkuznets@redhat.com,
	wanpengli@tencent.com, jmattson@google.com, joro@8bytes.org,
	zamsden@gmail.com, mtosatti@redhat.com, dwmw@amazon.co.uk
Subject: Re: [PATCH v2 07/10] KVM: X86: Move write_l1_tsc_offset() logic to common code and rename it
Date: Wed, 19 May 2021 00:05:09 +0000	[thread overview]
Message-ID: <YKRWNaqzo4GVDxHP@google.com> (raw)
In-Reply-To: <20210512150945.4591-8-ilstam@amazon.com>

On Wed, May 12, 2021, Ilias Stamatis wrote:
> The write_l1_tsc_offset() callback has a misleading name. It does not
> set L1's TSC offset, it rather updates the current TSC offset which
> might be different if a nested guest is executing. Additionally, both
> the vmx and svm implementations use the same logic for calculating the
> current TSC before writing it to hardware.

I don't disagree, but the current name as the advantage of clarifying (well,
hinting) that the offset is L1's offset.  That hint is lost in this refactoring.
Maybe rename "u64 offset" to "u64 l1_tsc_offset"?

> This patch renames the function and moves the common logic to the

Use imperative mood instead of "This patch.  From 
Documentation/process/submitting-patches.rst:

  Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
  instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
  to do frotz", as if you are giving orders to the codebase to change
  its behaviour.

> caller. The vmx/svm-specific code now merely sets the given offset to
> the corresponding hardware structure.
> 
> Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
> ---
>  arch/x86/include/asm/kvm-x86-ops.h |  2 +-
>  arch/x86/include/asm/kvm_host.h    |  3 +--
>  arch/x86/kvm/svm/svm.c             | 21 ++++-----------------
>  arch/x86/kvm/vmx/vmx.c             | 23 +++--------------------
>  arch/x86/kvm/x86.c                 | 17 ++++++++++++++++-
>  5 files changed, 25 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 2063616fba1c..029c9615378f 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -89,7 +89,7 @@ KVM_X86_OP(load_mmu_pgd)
>  KVM_X86_OP_NULL(has_wbinvd_exit)
>  KVM_X86_OP(get_l2_tsc_offset)
>  KVM_X86_OP(get_l2_tsc_multiplier)
> -KVM_X86_OP(write_l1_tsc_offset)
> +KVM_X86_OP(write_tsc_offset)
>  KVM_X86_OP(get_exit_info)
>  KVM_X86_OP(check_intercept)
>  KVM_X86_OP(handle_exit_irqoff)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 57a25d8e8b0f..61cf201c001a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1307,8 +1307,7 @@ struct kvm_x86_ops {
>  
>  	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
>  	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
> -	/* Returns actual tsc_offset set in active VMCS */
> -	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
> +	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
>  
>  	/*
>  	 * Retrieve somewhat arbitrary exit information.  Intended to be used
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 679b2fc1a3f9..b18f60463073 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1094,26 +1094,13 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
>  	return kvm_default_tsc_scaling_ratio;
>  }
>  
> -static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> +static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> -	u64 g_tsc_offset = 0;
> -
> -	if (is_guest_mode(vcpu)) {
> -		/* Write L1's TSC offset.  */
> -		g_tsc_offset = svm->vmcb->control.tsc_offset -
> -			       svm->vmcb01.ptr->control.tsc_offset;
> -		svm->vmcb01.ptr->control.tsc_offset = offset;
> -	}
> -
> -	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
> -				   svm->vmcb->control.tsc_offset - g_tsc_offset,
> -				   offset);
> -
> -	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
>  
> +	svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
> +	svm->vmcb->control.tsc_offset = offset;
>  	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
> -	return svm->vmcb->control.tsc_offset;
>  }
>  
>  /* Evaluate instruction intercepts that depend on guest CPUID features. */
> @@ -4540,7 +4527,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  
>  	.get_l2_tsc_offset = svm_get_l2_tsc_offset,
>  	.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
> -	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
> +	.write_tsc_offset = svm_write_tsc_offset,
>  
>  	.load_mmu_pgd = svm_load_mmu_pgd,
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 575e13bddda8..3c4eb14a1e86 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1810,26 +1810,9 @@ static u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
>  	return multiplier;
>  }
>  
> -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> +static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>  {
> -	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> -	u64 g_tsc_offset = 0;
> -
> -	/*
> -	 * We're here if L1 chose not to trap WRMSR to TSC. According
> -	 * to the spec, this should set L1's TSC; The offset that L1
> -	 * set for L2 remains unchanged, and still needs to be added
> -	 * to the newly set TSC to get L2's TSC.
> -	 */
> -	if (is_guest_mode(vcpu) &&
> -	    (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
> -		g_tsc_offset = vmcs12->tsc_offset;
> -
> -	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
> -				   vcpu->arch.tsc_offset - g_tsc_offset,
> -				   offset);
> -	vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
> -	return offset + g_tsc_offset;
> +	vmcs_write64(TSC_OFFSET, offset);
>  }
>  
>  /*
> @@ -7725,7 +7708,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>  
>  	.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
>  	.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
> -	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
> +	.write_tsc_offset = vmx_write_tsc_offset,
>  
>  	.load_mmu_pgd = vmx_load_mmu_pgd,
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 1db6cfc2079f..f3ba1be4d5b9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2377,8 +2377,23 @@ EXPORT_SYMBOL_GPL(kvm_set_02_tsc_multiplier);
>  
>  static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>  {
> +	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
> +				   vcpu->arch.l1_tsc_offset,
> +				   offset);
> +
>  	vcpu->arch.l1_tsc_offset = offset;
> -	vcpu->arch.tsc_offset = static_call(kvm_x86_write_l1_tsc_offset)(vcpu, offset);
> +	vcpu->arch.tsc_offset = offset;
> +
> +	if (is_guest_mode(vcpu)) {

Unnecessary curly braces.

> +		/*
> +		 * We're here if L1 chose not to trap WRMSR to TSC and
> +		 * according to the spec this should set L1's TSC (as opposed
> +		 * to setting L1's offset for L2).
> +		 */

While we're shuffling code, can we improve this comment?  It works for the WRMSR
case, but makes no sense in the context of host TSC adjustments.  It's not at all
clear to me that it's even correct or relevant in those cases.

> +		kvm_set_02_tsc_offset(vcpu);

I really dislike that kvm_set_02_tsc_offset() consumes a bunch of variables
_and_ sets arch.tsc_offset, e.g. it's not at all obvious that moving this call
around will break L2.  Even more bizarre is that arch.tsc_offset is conditionally
consumed.  Oh, and kvm_set_02_tsc_offset() is not idempotent since it can do a
RMW on arch.tsc_offset.

The below static_call() dependency doesn't appear to be easily solved, but we
can at least strongly hint that vcpu->arch.tsc_offset is set.  For everything
else, I think we can clean things up by doing this (with the vendor calls
providing the L2 variables directly):

void kvm_set_l2_tsc_offset(struct kvm_vcpu *vcpu, u64 l2_offset,
			   u64 l2_multiplier)
{
	u64 l1_offset = vcpu->arch.l1_tsc_offset;

	if (l2_multiplier != kvm_default_tsc_scaling_ratio)
		l2_offset += mul_s64_u64_shr((s64)l1_tsc_offset, l2_multiplier,
					     kvm_tsc_scaling_ratio_frac_bits);

	vcpu->arch.tsc_offset = l2_offset;
}
EXPORT_SYMBOL_GPL(kvm_get_l2_tsc_offset);

static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
				   vcpu->arch.l1_tsc_offset,
				   offset);

	vcpu->arch.l1_tsc_offset = offset;

	if (is_guest_mode(vcpu))
		kvm_set_l2_tsc_offset(vcpu,
				      static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
				      static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
	else
		vcpu->arch.tsc_offset = offset;

	static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
}


An alternative would be to explicitly track L1 and L2, and _never_ track the
current TSC values.  I.e. always compute the correct value on the fly.  I think
the only hot path is the TSC deadline timer, and AFAICT that always runs with
L1's timer.  Speaking of which, at the end of this series, vmx_set_hv_timer()
uses L1's TSC but the current scaling ratio; that can't be right.

> +	}
> +
> +	static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
>  }
>  
>  static inline bool kvm_check_tsc_unstable(void)
> -- 
> 2.17.1
> 

  reply	other threads:[~2021-05-19  0:05 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-12 15:09 [PATCH v2 00/10] KVM: Implement nested TSC scaling Ilias Stamatis
2021-05-12 15:09 ` [PATCH v2 01/10] math64.h: Add mul_s64_u64_shr() Ilias Stamatis
2021-05-18 22:36   ` Sean Christopherson
2021-05-12 15:09 ` [PATCH v2 02/10] KVM: X86: Store L1's TSC scaling ratio in 'struct kvm_vcpu_arch' Ilias Stamatis
2021-05-18 22:52   ` Sean Christopherson
2021-05-19  8:54     ` Stamatis, Ilias
2021-05-12 15:09 ` [PATCH v2 03/10] KVM: X86: Add kvm_scale_tsc_l1() and kvm_compute_tsc_offset_l1() Ilias Stamatis
2021-05-18 23:04   ` Sean Christopherson
2021-05-19  9:02     ` Stamatis, Ilias
2021-05-19 15:40       ` Sean Christopherson
2021-05-20 18:27         ` Stamatis, Ilias
2021-05-12 15:09 ` [PATCH v2 04/10] KVM: VMX: Add a TSC multiplier field in VMCS12 Ilias Stamatis
2021-05-12 15:09 ` [PATCH v2 05/10] KVM: X86: Add functions for retrieving L2 TSC fields from common code Ilias Stamatis
2021-05-12 15:09 ` [PATCH v2 06/10] KVM: X86: Add functions that calculate the 02 TSC offset and multiplier Ilias Stamatis
2021-05-18 23:21   ` Sean Christopherson
2021-05-19 10:15     ` Stamatis, Ilias
2021-05-12 15:09 ` [PATCH v2 07/10] KVM: X86: Move write_l1_tsc_offset() logic to common code and rename it Ilias Stamatis
2021-05-19  0:05   ` Sean Christopherson [this message]
2021-05-19 11:45     ` Stamatis, Ilias
2021-05-19 15:49       ` Sean Christopherson
2021-05-12 15:09 ` [PATCH v2 08/10] KVM: VMX: Set the TSC offset and multiplier on nested entry and exit Ilias Stamatis
2021-05-19  0:07   ` Sean Christopherson
2021-05-19 11:55     ` Stamatis, Ilias
2021-05-12 15:09 ` [PATCH v2 09/10] KVM: VMX: Expose TSC scaling to L2 Ilias Stamatis
2021-05-12 15:09 ` [PATCH v2 10/10] KVM: selftests: x86: Add vmx_nested_tsc_scaling_test Ilias Stamatis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YKRWNaqzo4GVDxHP@google.com \
    --to=seanjc@google.com \
    --cc=dwmw@amazon.co.uk \
    --cc=ilstam@amazon.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mlevitsk@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=zamsden@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).