All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH-for-4.17 v3] xen/sched: migrate timers to correct cpus after suspend
@ 2022-11-02 15:00 Juergen Gross
  2022-11-04  7:48 ` Dario Faggioli
  0 siblings, 1 reply; 3+ messages in thread
From: Juergen Gross @ 2022-11-02 15:00 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, George Dunlap, Dario Faggioli, Meng Xu,
	Henry Wang, Marek Marczykowski-Górecki

Today all timers are migrated to cpu 0 when the system is being
suspended. They are not migrated back after resuming the system again.

This results (at least) to visible problems with the credit scheduler,
as the timer isn't handled on the cpu it was expected to occur, which
will result in an ASSERT() triggering. Other more subtle problems, like
uninterrupted elongated time slices, are probable. The least effect
will be worse performance on cpu 0 resulting from most scheduling
related timer interrupts happening there after suspend/resume.

Add migrating the scheduling related timers of a specific cpu from cpu
0 back to its original cpu when that cpu has gone up when resuming the
system.

Fixes: 0763cd268789 ("xen/sched: don't disable scheduler on cpus during suspend")
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
---
V2:
- fix smt=0 case (Marek Marczykowski-Górecki)
V3:
- minor comment and commit message adjustments (Jan Beulich)
---
 xen/common/sched/core.c    | 29 +++++++++++++++++++
 xen/common/sched/cpupool.c |  2 ++
 xen/common/sched/credit.c  | 13 +++++++++
 xen/common/sched/private.h | 10 +++++++
 xen/common/sched/rt.c      | 58 ++++++++++++++++++++++++++------------
 5 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 23fa6845a8..43132ff6e0 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -1284,6 +1284,35 @@ static int cpu_disable_scheduler_check(unsigned int cpu)
     return 0;
 }
 
+/*
+ * Called after a cpu has come up again in a suspend/resume cycle.
+ * Migrate all timers for this cpu (they have been migrated to cpu 0 when the
+ * cpu was going down).
+ * Note that only timers related to a physical cpu are migrated, not the ones
+ * related to a vcpu or domain.
+ */
+void sched_migrate_timers(unsigned int cpu)
+{
+    struct sched_resource *sr;
+
+    rcu_read_lock(&sched_res_rculock);
+
+    sr = get_sched_res(cpu);
+
+    /*
+     * Note that on a system with parked cpus (e.g. smt=0 on Intel cpus) this
+     * will be called for the parked cpus, too, so the case for no scheduling
+     * resource being available must be considered.
+     */
+    if ( sr && sr->master_cpu == cpu )
+    {
+        migrate_timer(&sr->s_timer, cpu);
+        sched_move_timers(sr->scheduler, sr);
+    }
+
+    rcu_read_unlock(&sched_res_rculock);
+}
+
 /*
  * In general, this must be called with the scheduler lock held, because the
  * adjust_affinity hook may want to modify the vCPU state. However, when the
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index b2c6f520c3..bdf6030ab0 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -1035,6 +1035,8 @@ static int cf_check cpu_callback(
     case CPU_ONLINE:
         if ( system_state <= SYS_STATE_active )
             rc = cpupool_cpu_add(cpu);
+        else
+            sched_migrate_timers(cpu);
         break;
     case CPU_DOWN_PREPARE:
         /* Suspend/Resume don't change assignments of cpus to cpupools. */
diff --git a/xen/common/sched/credit.c b/xen/common/sched/credit.c
index 47945c2834..f2cd3d9da3 100644
--- a/xen/common/sched/credit.c
+++ b/xen/common/sched/credit.c
@@ -614,6 +614,18 @@ init_pdata(struct csched_private *prv, struct csched_pcpu *spc, int cpu)
     spc->nr_runnable = 0;
 }
 
+static void cf_check
+csched_move_timers(const struct scheduler *ops, struct sched_resource *sr)
+{
+    struct csched_private *prv = CSCHED_PRIV(ops);
+    struct csched_pcpu *spc = sr->sched_priv;
+
+    if ( sr->master_cpu == prv->master )
+        migrate_timer(&prv->master_ticker, prv->master);
+
+    migrate_timer(&spc->ticker, sr->master_cpu);
+}
+
 /* Change the scheduler of cpu to us (Credit). */
 static spinlock_t *cf_check
 csched_switch_sched(struct scheduler *new_ops, unsigned int cpu,
@@ -2264,6 +2276,7 @@ static const struct scheduler sched_credit_def = {
     .switch_sched   = csched_switch_sched,
     .alloc_domdata  = csched_alloc_domdata,
     .free_domdata   = csched_free_domdata,
+    .move_timers    = csched_move_timers,
 };
 
 REGISTER_SCHEDULER(sched_credit_def);
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 0126a4bb9e..0527a8c70d 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -331,6 +331,8 @@ struct scheduler {
                                     struct xen_sysctl_scheduler_op *);
     void         (*dump_settings)  (const struct scheduler *);
     void         (*dump_cpu_state) (const struct scheduler *, int);
+    void         (*move_timers)    (const struct scheduler *,
+                                    struct sched_resource *);
 };
 
 static inline int sched_init(struct scheduler *s)
@@ -485,6 +487,13 @@ static inline int sched_adjust_cpupool(const struct scheduler *s,
     return s->adjust_global ? s->adjust_global(s, op) : 0;
 }
 
+static inline void sched_move_timers(const struct scheduler *s,
+                                     struct sched_resource *sr)
+{
+    if ( s->move_timers )
+        s->move_timers(s, sr);
+}
+
 static inline void sched_unit_pause_nosync(const struct sched_unit *unit)
 {
     struct vcpu *v;
@@ -622,6 +631,7 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc);
 void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
 int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *mem);
 int sched_move_domain(struct domain *d, struct cpupool *c);
+void sched_migrate_timers(unsigned int cpu);
 struct cpupool *cpupool_get_by_id(unsigned int poolid);
 void cpupool_put(struct cpupool *pool);
 int cpupool_add_domain(struct domain *d, unsigned int poolid);
diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c
index 1f8d074884..d443cd5831 100644
--- a/xen/common/sched/rt.c
+++ b/xen/common/sched/rt.c
@@ -750,6 +750,27 @@ rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
     return &prv->lock;
 }
 
+static void move_repl_timer(struct rt_private *prv, unsigned int old_cpu)
+{
+    cpumask_t *online = get_sched_res(old_cpu)->cpupool->res_valid;
+    unsigned int new_cpu = cpumask_cycle(old_cpu, online);
+
+    /*
+     * Make sure the timer run on one of the cpus that are still available
+     * to this scheduler. If there aren't any left, it means it's the time
+     * to just kill it.
+     */
+    if ( new_cpu >= nr_cpu_ids )
+    {
+        kill_timer(&prv->repl_timer);
+        dprintk(XENLOG_DEBUG, "RTDS: timer killed on cpu %d\n", old_cpu);
+    }
+    else
+    {
+        migrate_timer(&prv->repl_timer, new_cpu);
+    }
+}
+
 static void cf_check
 rt_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 {
@@ -759,25 +780,25 @@ rt_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
     spin_lock_irqsave(&prv->lock, flags);
 
     if ( prv->repl_timer.cpu == cpu )
-    {
-        cpumask_t *online = get_sched_res(cpu)->cpupool->res_valid;
-        unsigned int new_cpu = cpumask_cycle(cpu, online);
+        move_repl_timer(prv, cpu);
 
-        /*
-         * Make sure the timer run on one of the cpus that are still available
-         * to this scheduler. If there aren't any left, it means it's the time
-         * to just kill it.
-         */
-        if ( new_cpu >= nr_cpu_ids )
-        {
-            kill_timer(&prv->repl_timer);
-            dprintk(XENLOG_DEBUG, "RTDS: timer killed on cpu %d\n", cpu);
-        }
-        else
-        {
-            migrate_timer(&prv->repl_timer, new_cpu);
-        }
-    }
+    spin_unlock_irqrestore(&prv->lock, flags);
+}
+
+static void cf_check
+rt_move_timers(const struct scheduler *ops, struct sched_resource *sr)
+{
+    unsigned long flags;
+    struct rt_private *prv = rt_priv(ops);
+    unsigned int old_cpu;
+
+    spin_lock_irqsave(&prv->lock, flags);
+
+    old_cpu = prv->repl_timer.cpu;
+    if ( prv->repl_timer.status != TIMER_STATUS_invalid &&
+         prv->repl_timer.status != TIMER_STATUS_killed &&
+         !cpumask_test_cpu(old_cpu, sr->cpupool->res_valid) )
+        move_repl_timer(prv, old_cpu);
 
     spin_unlock_irqrestore(&prv->lock, flags);
 }
@@ -1561,6 +1582,7 @@ static const struct scheduler sched_rtds_def = {
     .sleep          = rt_unit_sleep,
     .wake           = rt_unit_wake,
     .context_saved  = rt_context_saved,
+    .move_timers    = rt_move_timers,
 };
 
 REGISTER_SCHEDULER(sched_rtds_def);
-- 
2.35.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH-for-4.17 v3] xen/sched: migrate timers to correct cpus after suspend
  2022-11-02 15:00 [PATCH-for-4.17 v3] xen/sched: migrate timers to correct cpus after suspend Juergen Gross
@ 2022-11-04  7:48 ` Dario Faggioli
  2022-11-04  7:50   ` Henry Wang
  0 siblings, 1 reply; 3+ messages in thread
From: Dario Faggioli @ 2022-11-04  7:48 UTC (permalink / raw)
  To: Juergen Gross, xen-devel; +Cc: Henry.Wang, marmarek, george.dunlap, mengxu

[-- Attachment #1: Type: text/plain, Size: 1360 bytes --]

On Wed, 2022-11-02 at 16:00 +0100, Juergen Gross wrote:
> Today all timers are migrated to cpu 0 when the system is being
> suspended. They are not migrated back after resuming the system
> again.
> 
> This results (at least) to visible problems with the credit
> scheduler,
> as the timer isn't handled on the cpu it was expected to occur, which
> will result in an ASSERT() triggering. Other more subtle problems,
> like
> uninterrupted elongated time slices, are probable. The least effect
> will be worse performance on cpu 0 resulting from most scheduling
> related timer interrupts happening there after suspend/resume.
> 
> Add migrating the scheduling related timers of a specific cpu from
> cpu
> 0 back to its original cpu when that cpu has gone up when resuming
> the
> system.
> 
> Fixes: 0763cd268789 ("xen/sched: don't disable scheduler on cpus
> during suspend")
> Signed-off-by: Juergen Gross <jgross@suse.com>
> Tested-by: Marek Marczykowski-Górecki
> <marmarek@invisiblethingslab.com>
>
Acked-by: Dario Faggioli <dfaggioli@suse.com>

Regards
-- 
Dario Faggioli, Ph.D
http://about.me/dario.faggioli
Virtualization Software Engineer
SUSE Labs, SUSE https://www.suse.com/
-------------------------------------------------------------------
<<This happens because _I_ choose it to happen!>> (Raistlin Majere)

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH-for-4.17 v3] xen/sched: migrate timers to correct cpus after suspend
  2022-11-04  7:48 ` Dario Faggioli
@ 2022-11-04  7:50   ` Henry Wang
  0 siblings, 0 replies; 3+ messages in thread
From: Henry Wang @ 2022-11-04  7:50 UTC (permalink / raw)
  To: Dario Faggioli, Juergen Gross, xen-devel; +Cc: marmarek, george.dunlap, mengxu

Hi Juergen,

> On Wed, 2022-11-02 at 16:00 +0100, Juergen Gross wrote:
> > Today all timers are migrated to cpu 0 when the system is being
> > suspended. They are not migrated back after resuming the system
> > again.
> >
> > This results (at least) to visible problems with the credit
> > scheduler,
> > as the timer isn't handled on the cpu it was expected to occur, which
> > will result in an ASSERT() triggering. Other more subtle problems,
> > like
> > uninterrupted elongated time slices, are probable. The least effect
> > will be worse performance on cpu 0 resulting from most scheduling
> > related timer interrupts happening there after suspend/resume.
> >
> > Add migrating the scheduling related timers of a specific cpu from
> > cpu
> > 0 back to its original cpu when that cpu has gone up when resuming
> > the
> > system.
> >
> > Fixes: 0763cd268789 ("xen/sched: don't disable scheduler on cpus
> > during suspend")
> > Signed-off-by: Juergen Gross <jgross@suse.com>
> > Tested-by: Marek Marczykowski-Górecki
> > <marmarek@invisiblethingslab.com>
> >
> Acked-by: Dario Faggioli <dfaggioli@suse.com>

Release-acked-by: Henry Wang <Henry.Wang@arm.com>

Kind regards,
Henry

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-11-04  7:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-02 15:00 [PATCH-for-4.17 v3] xen/sched: migrate timers to correct cpus after suspend Juergen Gross
2022-11-04  7:48 ` Dario Faggioli
2022-11-04  7:50   ` Henry Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.