All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wanpeng Li <kernellwp@gmail.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Sean Christopherson <seanjc@google.com>,
	Michael Tokarev <mjt@tls.msk.ru>, kvm <kvm@vger.kernel.org>,
	"qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting
Date: Fri, 9 Apr 2021 16:13:03 +0800	[thread overview]
Message-ID: <CANRm+CwgvAPOvCxmuEDb+L5kvjBcpWE03Ps70qpqKntHuPxpaA@mail.gmail.com> (raw)
In-Reply-To: <874kgg29uo.ffs@nanos.tec.linutronix.de>

On Thu, 8 Apr 2021 at 21:19, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Apr 06 2021 at 21:47, Sean Christopherson wrote:
> > On Tue, Apr 06, 2021, Michael Tokarev wrote:
> >> broke kvm guest cpu time accounting - after this commit, when running
> >> qemu-system-x86_64 -enable-kvm, the guest time (in /proc/stat and
> >> elsewhere) is always 0.
> >>
> >> I dunno why it happened, but it happened, and all kernels after 5.9
> >> are affected by this.
> >>
> >> This commit is found in a (painful) git bisect between kernel 5.8 and 5.10.
> >
> > Yes :-(
> >
> > There's a bugzilla[1] and two proposed fixes[2][3].  I don't particularly like
> > either of the fixes, but an elegant solution hasn't presented itself.
> >
> > Thomas/Paolo, can you please weigh in?
> >
> > [1] https://bugzilla.kernel.org/show_bug.cgi?id=209831
> > [2] https://lkml.kernel.org/r/1617011036-11734-1-git-send-email-wanpengli@tencent.com
> > [3] https://lkml.kernel.org/r/20210206004218.312023-1-seanjc@google.com
>
> All of the solutions I looked at so far are ugly as hell. The problem is
> that the accounting is plumbed into the context tracking and moving
> context tracking around to a different place is just wrong.
>
> I think the right solution is to seperate the time accounting logic out
> from guest_enter/exit_irqoff() and have virt time specific helpers which
> can be placed at the proper spots in kvm.

For x86 part, how about something like below:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 48b396f3..7aeb724 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3730,6 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -3759,6 +3760,8 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c05e6e2..5f6c30c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6617,6 +6617,7 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -6648,6 +6649,8 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16fb395..33422c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9229,6 +9229,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
     ++vcpu->stat.exits;
     local_irq_disable();
     kvm_after_interrupt(vcpu);
+    if (!vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     if (lapic_in_kernel(vcpu)) {
         s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb064..ff70229 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -104,8 +104,7 @@ static inline void context_tracking_init(void) { }


 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
@@ -113,7 +112,11 @@ static __always_inline void guest_enter_irqoff(void)
     else
         current->flags |= PF_VCPU;
     instrumentation_end();
+}

+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
     if (context_tracking_enabled())
         __context_tracking_enter(CONTEXT_GUEST);

@@ -131,11 +134,8 @@ static __always_inline void guest_enter_irqoff(void)
     }
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
-    if (context_tracking_enabled())
-        __context_tracking_exit(CONTEXT_GUEST);
-
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
         vtime_guest_exit(current);
@@ -144,8 +144,14 @@ static __always_inline void guest_exit_irqoff(void)
     instrumentation_end();
 }

+static __always_inline void guest_exit_irqoff(void)
+{
+    if (context_tracking_enabled())
+        __context_tracking_exit(CONTEXT_GUEST);
+}
+
 #else
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     /*
      * This is running in ioctl context so its safe
@@ -155,11 +161,17 @@ static __always_inline void guest_enter_irqoff(void)
     instrumentation_begin();
     vtime_account_kernel(current);
     current->flags |= PF_VCPU;
+    instrumentation_end();
+}
+
+static __always_inline void guest_enter_irqoff(void)
+{
+    instrumentation_begin();
     rcu_virt_note_context_switch(smp_processor_id());
     instrumentation_end();
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
     instrumentation_begin();
     /* Flush the guest cputime we spent on the guest */
@@ -167,6 +179,11 @@ static __always_inline void guest_exit_irqoff(void)
     current->flags &= ~PF_VCPU;
     instrumentation_end();
 }
+
+static __always_inline void guest_exit_irqoff(void)
+{
+
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

 static inline void guest_exit(void)

WARNING: multiple messages have this Message-ID (diff)
From: Wanpeng Li <kernellwp@gmail.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Sean Christopherson <seanjc@google.com>,
	Michael Tokarev <mjt@tls.msk.ru>,
	"qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>,
	kvm <kvm@vger.kernel.org>, Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting
Date: Fri, 9 Apr 2021 16:13:03 +0800	[thread overview]
Message-ID: <CANRm+CwgvAPOvCxmuEDb+L5kvjBcpWE03Ps70qpqKntHuPxpaA@mail.gmail.com> (raw)
In-Reply-To: <874kgg29uo.ffs@nanos.tec.linutronix.de>

On Thu, 8 Apr 2021 at 21:19, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Apr 06 2021 at 21:47, Sean Christopherson wrote:
> > On Tue, Apr 06, 2021, Michael Tokarev wrote:
> >> broke kvm guest cpu time accounting - after this commit, when running
> >> qemu-system-x86_64 -enable-kvm, the guest time (in /proc/stat and
> >> elsewhere) is always 0.
> >>
> >> I dunno why it happened, but it happened, and all kernels after 5.9
> >> are affected by this.
> >>
> >> This commit is found in a (painful) git bisect between kernel 5.8 and 5.10.
> >
> > Yes :-(
> >
> > There's a bugzilla[1] and two proposed fixes[2][3].  I don't particularly like
> > either of the fixes, but an elegant solution hasn't presented itself.
> >
> > Thomas/Paolo, can you please weigh in?
> >
> > [1] https://bugzilla.kernel.org/show_bug.cgi?id=209831
> > [2] https://lkml.kernel.org/r/1617011036-11734-1-git-send-email-wanpengli@tencent.com
> > [3] https://lkml.kernel.org/r/20210206004218.312023-1-seanjc@google.com
>
> All of the solutions I looked at so far are ugly as hell. The problem is
> that the accounting is plumbed into the context tracking and moving
> context tracking around to a different place is just wrong.
>
> I think the right solution is to seperate the time accounting logic out
> from guest_enter/exit_irqoff() and have virt time specific helpers which
> can be placed at the proper spots in kvm.

For x86 part, how about something like below:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 48b396f3..7aeb724 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3730,6 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -3759,6 +3760,8 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c05e6e2..5f6c30c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6617,6 +6617,7 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
     lockdep_hardirqs_on_prepare(CALLER_ADDR0);
     instrumentation_end();

+    account_guest_enter();
     guest_enter_irqoff();
     lockdep_hardirqs_on(CALLER_ADDR0);

@@ -6648,6 +6649,8 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
      */
     lockdep_hardirqs_off(CALLER_ADDR0);
     guest_exit_irqoff();
+    if (vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     instrumentation_begin();
     trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16fb395..33422c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9229,6 +9229,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
     ++vcpu->stat.exits;
     local_irq_disable();
     kvm_after_interrupt(vcpu);
+    if (!vtime_accounting_enabled_this_cpu())
+        account_guest_exit();

     if (lapic_in_kernel(vcpu)) {
         s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb064..ff70229 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -104,8 +104,7 @@ static inline void context_tracking_init(void) { }


 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
@@ -113,7 +112,11 @@ static __always_inline void guest_enter_irqoff(void)
     else
         current->flags |= PF_VCPU;
     instrumentation_end();
+}

+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
     if (context_tracking_enabled())
         __context_tracking_enter(CONTEXT_GUEST);

@@ -131,11 +134,8 @@ static __always_inline void guest_enter_irqoff(void)
     }
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
-    if (context_tracking_enabled())
-        __context_tracking_exit(CONTEXT_GUEST);
-
     instrumentation_begin();
     if (vtime_accounting_enabled_this_cpu())
         vtime_guest_exit(current);
@@ -144,8 +144,14 @@ static __always_inline void guest_exit_irqoff(void)
     instrumentation_end();
 }

+static __always_inline void guest_exit_irqoff(void)
+{
+    if (context_tracking_enabled())
+        __context_tracking_exit(CONTEXT_GUEST);
+}
+
 #else
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
 {
     /*
      * This is running in ioctl context so its safe
@@ -155,11 +161,17 @@ static __always_inline void guest_enter_irqoff(void)
     instrumentation_begin();
     vtime_account_kernel(current);
     current->flags |= PF_VCPU;
+    instrumentation_end();
+}
+
+static __always_inline void guest_enter_irqoff(void)
+{
+    instrumentation_begin();
     rcu_virt_note_context_switch(smp_processor_id());
     instrumentation_end();
 }

-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
 {
     instrumentation_begin();
     /* Flush the guest cputime we spent on the guest */
@@ -167,6 +179,11 @@ static __always_inline void guest_exit_irqoff(void)
     current->flags &= ~PF_VCPU;
     instrumentation_end();
 }
+
+static __always_inline void guest_exit_irqoff(void)
+{
+
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

 static inline void guest_exit(void)


  parent reply	other threads:[~2021-04-09  8:13 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-06 20:17 Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting Michael Tokarev
2021-04-06 21:47 ` Sean Christopherson
2021-04-08 13:18   ` Thomas Gleixner
2021-04-08 13:18     ` Thomas Gleixner
2021-04-09  2:14     ` Wanpeng Li
2021-04-09  2:14       ` Wanpeng Li
2021-04-09  8:13     ` Wanpeng Li [this message]
2021-04-09  8:13       ` Wanpeng Li
2021-04-09  9:19       ` Thomas Gleixner
2021-04-09  9:19         ` Thomas Gleixner
2021-04-07 11:01 ` Wanpeng Li
2021-04-07 11:01   ` Wanpeng Li
2021-04-13 10:48 ` Wanpeng Li
2021-04-13 10:48   ` Wanpeng Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANRm+CwgvAPOvCxmuEDb+L5kvjBcpWE03Ps70qpqKntHuPxpaA@mail.gmail.com \
    --to=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=mjt@tls.msk.ru \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.