All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrii Anisov <andrii.anisov@gmail.com>
To: xen-devel@lists.xenproject.org
Cc: Stefano Stabellini <sstabellini@kernel.org>,
	Andrii Anisov <andrii_anisov@epam.com>, Wei Liu <wl@xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	George Dunlap <george.dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>, Tim Deegan <tim@xen.org>,
	Julien Grall <julien.grall@arm.com>,
	Meng Xu <mengxu@cis.upenn.edu>, Jan Beulich <jbeulich@suse.com>,
	Dario Faggioli <dfaggioli@suse.com>,
	Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
Subject: [Xen-devel] [RFC 6/6] schedule: account all the hypervisor time to the idle vcpu
Date: Fri, 26 Jul 2019 13:37:40 +0300	[thread overview]
Message-ID: <1564137460-25629-8-git-send-email-andrii.anisov@gmail.com> (raw)
In-Reply-To: <1564137460-25629-1-git-send-email-andrii.anisov@gmail.com>

From: Andrii Anisov <andrii_anisov@epam.com>

Account for a guest:
 - guest running time
 - guest sync traps serving time (hypercalls, trapped emulated iomems, etc)
 - vcpu jobs in leave_hypervisor_tail

Account for the hyp:
 - IRQ processing
 - Softirq processing

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/traps.c       | 49 ++++++++++++++++++++++++++----
 xen/common/sched_credit.c  |  2 +-
 xen/common/sched_credit2.c |  4 +--
 xen/common/sched_rt.c      |  2 +-
 xen/common/schedule.c      | 74 +++++++++++++++++++++++++++++++++++++++-------
 xen/include/xen/sched.h    |  5 ++++
 6 files changed, 116 insertions(+), 20 deletions(-)

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 13726db..f978b94 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -2064,7 +2064,7 @@ void do_trap_guest_sync(struct cpu_user_regs *regs)
         if ( !check_conditional_instr(regs, hsr) )
         {
             advance_pc(regs, hsr);
-            return;
+            break;
         }
         if ( hsr.wfi_wfe.ti ) {
             /* Yield the VCPU for WFE */
@@ -2126,10 +2126,16 @@ void do_trap_guest_sync(struct cpu_user_regs *regs)
         perfc_incr(trap_hvc32);
 #ifndef NDEBUG
         if ( (hsr.iss & 0xff00) == 0xff00 )
-            return do_debug_trap(regs, hsr.iss & 0x00ff);
+        {
+            do_debug_trap(regs, hsr.iss & 0x00ff);
+            break;
+        }
 #endif
         if ( hsr.iss == 0 )
-            return do_trap_hvc_smccc(regs);
+        {
+            do_trap_hvc_smccc(regs);
+            break;
+        }
         nr = regs->r12;
         do_trap_hypercall(regs, &nr, hsr);
         regs->r12 = (uint32_t)nr;
@@ -2141,10 +2147,16 @@ void do_trap_guest_sync(struct cpu_user_regs *regs)
         perfc_incr(trap_hvc64);
 #ifndef NDEBUG
         if ( (hsr.iss & 0xff00) == 0xff00 )
-            return do_debug_trap(regs, hsr.iss & 0x00ff);
+        {
+            do_debug_trap(regs, hsr.iss & 0x00ff);
+            break;
+        }
 #endif
         if ( hsr.iss == 0 )
-            return do_trap_hvc_smccc(regs);
+        {
+            do_trap_hvc_smccc(regs);
+            break;
+        }
         do_trap_hypercall(regs, &regs->x16, hsr);
         break;
     case HSR_EC_SMC64:
@@ -2179,6 +2191,11 @@ void do_trap_guest_sync(struct cpu_user_regs *regs)
                 hsr.bits, hsr.ec, hsr.len, hsr.iss);
         inject_undef_exception(regs, hsr);
     }
+
+    local_irq_disable();
+    hyp_tacc_head(1);
+
+    /*we will call tacc tail from the leave_hypervisor_tail*/
 }
 
 void do_trap_hyp_sync(struct cpu_user_regs *regs)
@@ -2219,6 +2236,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs)
                hsr.bits, hsr.ec, hsr.len, hsr.iss);
         do_unexpected_trap("Hypervisor", regs);
     }
+
 }
 
 void do_trap_hyp_serror(struct cpu_user_regs *regs)
@@ -2234,28 +2252,47 @@ void do_trap_guest_serror(struct cpu_user_regs *regs)
     local_irq_enable();
 
     __do_trap_serror(regs, true);
+
+    local_irq_disable();
+    hyp_tacc_head(2);
 }
 
 void do_trap_guest_irq(struct cpu_user_regs *regs)
 {
+    hyp_tacc_head(3);
+
     enter_hypervisor_head();
     gic_interrupt(regs, 0);
+
+    /*we will call tacc tail from the leave_hypervisor_tail*/
 }
 
 void do_trap_guest_fiq(struct cpu_user_regs *regs)
 {
+    hyp_tacc_head(4);
+
     enter_hypervisor_head();
     gic_interrupt(regs, 1);
+
+    /*we will call tacc tail from the leave_hypervisor_tail*/
 }
 
 void do_trap_hyp_irq(struct cpu_user_regs *regs)
 {
+    hyp_tacc_head(5);
+
     gic_interrupt(regs, 0);
+
+    hyp_tacc_tail(5);
 }
 
 void do_trap_hyp_fiq(struct cpu_user_regs *regs)
 {
+    hyp_tacc_head(6);
+
     gic_interrupt(regs, 1);
+
+    hyp_tacc_tail(6);
 }
 
 static void check_for_pcpu_work(void)
@@ -2318,6 +2355,8 @@ void leave_hypervisor_tail(void)
      */
     SYNCHRONIZE_SERROR(SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT);
 
+    hyp_tacc_tail(1234);
+
     /*
      * The hypervisor runs with the workaround always present.
      * If the guest wants it disabled, so be it...
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 3c0d7c7..b8d866b 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1856,7 +1856,7 @@ csched_schedule(
                     (unsigned char *)&d);
     }
 
-    runtime = now - current->runstate.state_entry_time;
+    runtime = current->runtime;
     if ( runtime < 0 ) /* Does this ever happen? */
         runtime = 0;
 
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 8e4381d..2d11a5f 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -3285,7 +3285,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
      * no point forcing it to do so until rate limiting expires.
      */
     if ( !yield && prv->ratelimit_us && vcpu_runnable(scurr->vcpu) &&
-         (now - scurr->vcpu->runstate.state_entry_time) <
+          scurr->vcpu->runtime <
           MICROSECS(prv->ratelimit_us) )
     {
         if ( unlikely(tb_init_done) )
@@ -3296,7 +3296,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
             } d;
             d.dom = scurr->vcpu->domain->domain_id;
             d.vcpu = scurr->vcpu->vcpu_id;
-            d.runtime = now - scurr->vcpu->runstate.state_entry_time;
+            d.runtime = scurr->vcpu->runtime;
             __trace_var(TRC_CSCHED2_RATELIMIT, 1,
                         sizeof(d),
                         (unsigned char *)&d);
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 0acfc3d..f1de511 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -947,7 +947,7 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now)
         return;
 
     /* burn at nanoseconds level */
-    delta = now - svc->last_start;
+    delta = svc->vcpu->runtime;
     /*
      * delta < 0 only happens in nested virtualization;
      * TODO: how should we handle delta < 0 in a better way?
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 9e8805d..d3246f9 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1504,20 +1504,16 @@ static void schedule(void)
              (now - next->runstate.state_entry_time) : 0,
              next_slice.time);
 
-    ASSERT(prev->runstate.state == RUNSTATE_running);
-
     TRACE_4D(TRC_SCHED_SWITCH,
              prev->domain->domain_id, prev->vcpu_id,
              next->domain->domain_id, next->vcpu_id);
 
-    vcpu_runstate_change(
-        prev,
-        ((prev->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
-         (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
-        now);
-
-    ASSERT(next->runstate.state != RUNSTATE_running);
-    vcpu_runstate_change(next, RUNSTATE_running, now);
+    if ( !vcpu_runnable(prev) )
+        vcpu_runstate_change(
+            prev,
+            ((prev->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
+             RUNSTATE_offline),
+            now);
 
     /*
      * NB. Don't add any trace records from here until the actual context
@@ -1526,6 +1522,7 @@ static void schedule(void)
 
     ASSERT(!next->is_running);
     next->is_running = 1;
+    next->runtime = 0;
 
     pcpu_schedule_unlock_irq(lock, cpu);
 
@@ -1541,6 +1538,58 @@ static void schedule(void)
     context_switch(prev, next);
 }
 
+DEFINE_PER_CPU(int, hyp_tacc_cnt);
+
+void hyp_tacc_head(int place)
+{
+    //printk("\thead cpu %u, place %d, cnt %d\n", smp_processor_id(), place, this_cpu(hyp_tacc_cnt));
+
+    ASSERT(this_cpu(hyp_tacc_cnt) >= 0);
+
+    if ( this_cpu(hyp_tacc_cnt) == 0 )
+    {
+        s_time_t now = NOW();
+        spin_lock(per_cpu(schedule_data,smp_processor_id()).schedule_lock);
+        /*
+         * Stop time accounting for guest (guest vcpu)
+         */
+        ASSERT( (current->runstate.state_entry_time & XEN_RUNSTATE_UPDATE) == 0);
+        current->runtime += now - current->runstate.state_entry_time;
+        vcpu_runstate_change(current, RUNSTATE_runnable, now);
+        /*
+         * Start time accounting for hyp (idle vcpu)
+         */
+        vcpu_runstate_change(idle_vcpu[smp_processor_id()], RUNSTATE_running, now);
+        spin_unlock(per_cpu(schedule_data,smp_processor_id()).schedule_lock);
+    }
+
+    this_cpu(hyp_tacc_cnt)++;
+}
+
+void hyp_tacc_tail(int place)
+{
+    //printk("\t\t\t\ttail cpu %u, place %d, cnt %d\n", smp_processor_id(), place, this_cpu(hyp_tacc_cnt));
+
+    ASSERT(this_cpu(hyp_tacc_cnt) > 0);
+
+    if (this_cpu(hyp_tacc_cnt) == 1)
+    {
+        s_time_t now = NOW();
+        spin_lock(per_cpu(schedule_data,smp_processor_id()).schedule_lock);
+        /*
+         * Stop time accounting for guest (guest vcpu)
+         */
+        vcpu_runstate_change(idle_vcpu[smp_processor_id()], RUNSTATE_runnable, now);
+        /*
+         * Start time accounting for hyp (idle vcpu)
+         */
+        vcpu_runstate_change(current, RUNSTATE_running, now);
+        spin_unlock(per_cpu(schedule_data,smp_processor_id()).schedule_lock);
+    }
+
+    this_cpu(hyp_tacc_cnt)--;
+}
+
 void context_saved(struct vcpu *prev)
 {
     /* Clear running flag /after/ writing context to memory. */
@@ -1597,8 +1646,9 @@ static int cpu_schedule_up(unsigned int cpu)
     sd->curr = idle_vcpu[cpu];
     init_timer(&sd->s_timer, s_timer_fn, NULL, cpu);
     atomic_set(&sd->urgent_count, 0);
+    per_cpu(hyp_tacc_cnt, cpu) = 1;
 
-    /* Boot CPU is dealt with later in schedule_init(). */
+    /* Boot CPU is dealt with later in scheduler_init(). */
     if ( cpu == 0 )
         return 0;
 
@@ -1654,6 +1704,8 @@ static void cpu_schedule_down(unsigned int cpu)
     sd->sched_priv = NULL;
 
     kill_timer(&sd->s_timer);
+
+    per_cpu(hyp_tacc_cnt, cpu) = 0;
 }
 
 static int cpu_schedule_callback(
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5e28797..9391318 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -174,6 +174,8 @@ struct vcpu
     } runstate_guest; /* guest address */
 #endif
 
+    s_time_t runtime;
+
     /* Has the FPU been initialised? */
     bool             fpu_initialised;
     /* Has the FPU been used since it was last saved? */
@@ -998,6 +1000,9 @@ extern void dump_runq(unsigned char key);
 
 void arch_do_physinfo(struct xen_sysctl_physinfo *pi);
 
+void hyp_tacc_head(int place);
+void hyp_tacc_tail(int place);
+
 #endif /* __SCHED_H__ */
 
 /*
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-07-26 10:38 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-26 10:37 [Xen-devel] [RFC 0/6] XEN scheduling hardening Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 1/6] xen/arm: Re-enable interrupt later in the trap path Andrii Anisov
2019-07-26 10:48   ` Julien Grall
2019-07-30 17:35     ` Andrii Anisov
2019-07-30 20:10       ` Julien Grall
2019-08-01  6:45         ` Andrii Anisov
2019-08-01  9:37           ` Julien Grall
2019-08-02  8:28             ` Andrii Anisov
2019-08-02  9:03               ` Julien Grall
2019-08-02 12:24                 ` Andrii Anisov
2019-08-02 13:22                   ` Julien Grall
2019-08-01 11:19           ` Dario Faggioli
2019-08-02  7:50             ` Andrii Anisov
2019-08-02  9:15               ` Julien Grall
2019-08-02 13:07                 ` Andrii Anisov
2019-08-02 13:49                   ` Julien Grall
2019-08-03  1:39                     ` Dario Faggioli
2019-08-03  0:55                   ` Dario Faggioli
2019-08-06 13:09                     ` Andrii Anisov
2019-08-08 14:07                       ` Andrii Anisov
2019-08-13 14:45                         ` Dario Faggioli
2019-08-15 18:25                           ` Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 2/6] schedule: account true system idle time Andrii Anisov
2019-07-26 12:00   ` Dario Faggioli
2019-07-26 12:42     ` Andrii Anisov
2019-07-29 11:40       ` Dario Faggioli
2019-08-01  8:23         ` Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 3/6] sysctl: extend XEN_SYSCTL_getcpuinfo interface Andrii Anisov
2019-07-26 12:15   ` Dario Faggioli
2019-07-26 13:06     ` Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 4/6] xentop: show CPU load information Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 5/6] arm64: сall enter_hypervisor_head only when it is needed Andrii Anisov
2019-07-26 10:44   ` Andrii Anisov
2019-07-26 10:37 ` [Xen-devel] [RFC 5/6] arm64: call " Andrii Anisov
2019-07-26 10:59   ` Julien Grall
2019-07-30 17:35     ` Andrii Anisov
2019-07-31 11:02       ` Julien Grall
2019-07-31 11:33         ` Andre Przywara
2019-08-01  7:33         ` Andrii Anisov
2019-08-01 10:17           ` Julien Grall
2019-08-02 13:50             ` Andrii Anisov
2019-07-26 10:37 ` Andrii Anisov [this message]
2019-07-26 11:56 ` [Xen-devel] [RFC 0/6] XEN scheduling hardening Dario Faggioli
2019-07-26 12:14   ` Juergen Gross
2019-07-29 11:53     ` Dario Faggioli
2019-07-29 12:13       ` Juergen Gross
2019-07-29 14:47     ` Andrii Anisov
2019-07-29 18:46       ` Dario Faggioli
2019-07-29 14:28   ` Andrii Anisov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1564137460-25629-8-git-send-email-andrii.anisov@gmail.com \
    --to=andrii.anisov@gmail.com \
    --cc=Volodymyr_Babchuk@epam.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=andrii_anisov@epam.com \
    --cc=dfaggioli@suse.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@arm.com \
    --cc=konrad.wilk@oracle.com \
    --cc=mengxu@cis.upenn.edu \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.