qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Wanpeng Li <kernellwp@gmail.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Sean Christopherson <seanjc@google.com>,
	Michael Tokarev <mjt@tls.msk.ru>,
	"qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>,
	kvm <kvm@vger.kernel.org>, Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting
Date: Fri, 9 Apr 2021 16:13:03 +0800	[thread overview]
Message-ID: <CANRm+CwgvAPOvCxmuEDb+L5kvjBcpWE03Ps70qpqKntHuPxpaA@mail.gmail.com> (raw)
In-Reply-To: <874kgg29uo.ffs@nanos.tec.linutronix.de>

On Thu, 8 Apr 2021 at 21:19, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Apr 06 2021 at 21:47, Sean Christopherson wrote:
> > On Tue, Apr 06, 2021, Michael Tokarev wrote:
> >> broke kvm guest cpu time accounting - after this commit, when running
> >> qemu-system-x86_64 -enable-kvm, the guest time (in /proc/stat and
> >> elsewhere) is always 0.
> >>
> >> I dunno why it happened, but it happened, and all kernels after 5.9
> >> are affected by this.
> >>
> >> This commit is found in a (painful) git bisect between kernel 5.8 and 5.10.
> >
> > Yes :-(
> >
> > There's a bugzilla[1] and two proposed fixes[2][3].  I don't particularly like
> > either of the fixes, but an elegant solution hasn't presented itself.
> >
> > Thomas/Paolo, can you please weigh in?
> >
> > [1] https://bugzilla.kernel.org/show_bug.cgi?id=209831
> > [2] https://lkml.kernel.org/r/1617011036-11734-1-git-send-email-wanpengli@tencent.com
> > [3] https://lkml.kernel.org/r/20210206004218.312023-1-seanjc@google.com
>
> All of the solutions I looked at so far are ugly as hell. The problem is
> that the accounting is plumbed into the context tracking and moving
> context tracking around to a different place is just wrong.
>
> I think the right solution is to seperate the time accounting logic out
> from guest_enter/exit_irqoff() and have virt time specific helpers which
> can be placed at the proper spots in kvm.

For x86 part, how about something like below:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 48b396f3..7aeb724 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3730,6 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -3759,6 +3760,8 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c05e6e2..5f6c30c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6617,6 +6617,7 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -6648,6 +6649,8 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16fb395..33422c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9229,6 +9229,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
     ++vcpu->stat.exits;
     local_irq_disable();
     kvm_after_interrupt(vcpu);
+    if (!vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     if (lapic_in_kernel(vcpu)) {
         s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb064..ff70229 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -104,8 +104,7 @@ static inline void context_tracking_init(void) { }


 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
@@ -113,7 +112,11 @@ static __always_inline void guest_enter_irqoff(void)
     else
         current->flags |= PF_VCPU;
     instrumentation_end();
+}

+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
     if (context_tracking_enabled())
         __context_tracking_enter(CONTEXT_GUEST);

@@ -131,11 +134,8 @@ static __always_inline void guest_enter_irqoff(void)
     }
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
-    if (context_tracking_enabled())
-        __context_tracking_exit(CONTEXT_GUEST);
-
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
         vtime_guest_exit(current);
@@ -144,8 +144,14 @@ static __always_inline void guest_exit_irqoff(void)
     instrumentation_end();
 }

+static __always_inline void guest_exit_irqoff(void)
+{
+    if (context_tracking_enabled())
+        __context_tracking_exit(CONTEXT_GUEST);
+}
+
 #else
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     /*
      * This is running in ioctl context so its safe
@@ -155,11 +161,17 @@ static __always_inline void guest_enter_irqoff(void)
     instrumentation_begin();
     vtime_account_kernel(current);
     current->flags |= PF_VCPU;
+    instrumentation_end();
+}
+
+static __always_inline void guest_enter_irqoff(void)
+{
+    instrumentation_begin();
     rcu_virt_note_context_switch(smp_processor_id());
     instrumentation_end();
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
     instrumentation_begin();
     /* Flush the guest cputime we spent on the guest */
@@ -167,6 +179,11 @@ static __always_inline void guest_exit_irqoff(void)
     current->flags &= ~PF_VCPU;
     instrumentation_end();
 }
+
+static __always_inline void guest_exit_irqoff(void)
+{
+
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

 static inline void guest_exit(void)


  parent reply	other threads:[~2021-04-09  8:14 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-06 20:17 Michael Tokarev
2021-04-06 21:47 ` Sean Christopherson
2021-04-08 13:18   ` Thomas Gleixner
2021-04-09  2:14     ` Wanpeng Li
2021-04-09  8:13     ` Wanpeng Li [this message]
2021-04-09  9:19       ` Thomas Gleixner
2021-04-07 11:01 ` Wanpeng Li
2021-04-13 10:48 ` Wanpeng Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANRm+CwgvAPOvCxmuEDb+L5kvjBcpWE03Ps70qpqKntHuPxpaA@mail.gmail.com \
    --to=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=mjt@tls.msk.ru \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --subject='Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).