All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit
@ 2019-07-02 15:01 Jan Beulich
  2019-07-02 16:22 ` Andrew Cooper
  2019-07-03  7:13 ` Juergen Gross
  0 siblings, 2 replies; 5+ messages in thread
From: Jan Beulich @ 2019-07-02 15:01 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Wilk, George Dunlap,
	Andrew Cooper, Tim Deegan, Dario Faggioli, Julien Grall,
	Ian Jackson, xen-devel, Roger Pau Monné

>>> On 28.05.19 at 12:32, <jgross@suse.com> wrote:
> When running an idle vcpu in a non-idle scheduling unit use a specific
> guest idle loop not performing any tasklets, memory scrubbing and
> livepatching in order to avoid populating the cpu caches with memory
> used by other domains (as far as possible). Softirqs are considered to
> be save (timers might want to be excluded, but this can be fine-tuned
> later).

How could timers be legitimately excluded? And how are softirqs
(which similarly can't be excluded here) any less risky than e.g.
tasklets?

As to scrubbing - what gets brought into cache is, except for a very
brief moment, the value the scrubbing routine actually stores. There's
no knowledge to be gained from that by a guest.

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -159,6 +159,23 @@ static void idle_loop(void)
>      }
>  }
>  
> +/*
> + * Idle loop for siblings of active schedule units.
> + * We don't do any standard idle work like tasklets, page scrubbing or
> + * livepatching.
> + */
> +static void guest_idle_loop(void)
> +{
> +    unsigned int cpu = smp_processor_id();
> +
> +    for ( ; ; )
> +    {
> +        if ( !softirq_pending(cpu) )
> +            sched_guest_idle(pm_idle, cpu);
> +        do_softirq();
> +    }
> +}

In the comment I think you mean "siblings of <whatever> in
active schedule units"?

Having had quite some fun with soft-offlining of CPUs recently,
may I ask that you ASSERT(!cpu_is_offline(cpu)) in the loop
body, such that the absence of a call to play_dead() is also
covered?

> @@ -172,6 +189,10 @@ void startup_cpu_idle_loop(void)
>  
>  static void noreturn continue_idle_domain(struct vcpu *v)
>  {
> +    /* Idle vcpus might be attached to non-idle units! */
> +    if ( !is_idle_domain(v->sched_unit->domain) )
> +        reset_stack_and_jump(guest_idle_loop);
> +
>      reset_stack_and_jump(idle_loop);
>  }

You're aware that there's a call to check_for_livepatch_work() hidden
in reset_stack_and_jump(), which you say you don't want to allow in
this context?

> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -82,7 +82,18 @@ static struct scheduler __read_mostly ops;
>  static inline struct vcpu *sched_unit2vcpu_cpu(struct sched_unit *unit,
>                                                 unsigned int cpu)
>  {
> -    return unit->domain->vcpu[unit->unit_id + per_cpu(sched_res_idx, cpu)];
> +    unsigned int idx = unit->unit_id + per_cpu(sched_res_idx, cpu);
> +    const struct domain *d = unit->domain;
> +    struct vcpu *v;
> +
> +    if ( idx < d->max_vcpus && d->vcpu[idx] )
> +    {
> +        v = d->vcpu[idx];
> +        if ( v->new_state == RUNSTATE_running )
> +            return v;

Isn't this enough of the cache fill half of a gadget to warrant use of
array_index_nospec() or alike?

> @@ -209,19 +223,11 @@ static inline void vcpu_runstate_change(
>      v->runstate.state = new_state;
>  }
>  
> -static inline void sched_unit_runstate_change(struct sched_unit *unit,
> -    bool running, s_time_t new_entry_time)
> +void sched_guest_idle(void (*idle) (void), unsigned int cpu)

Stray blank between closing and opening parenthesis.

>  {
> -    struct vcpu *v;
> -
> -    for_each_sched_unit_vcpu ( unit, v )
> -        if ( running )
> -            vcpu_runstate_change(v, v->new_state, new_entry_time);
> -        else
> -            vcpu_runstate_change(v,
> -                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
> -                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
> -                new_entry_time);
> +    atomic_inc(&get_sched_res(cpu)->urgent_count);
> +    idle();
> +    atomic_dec(&get_sched_res(cpu)->urgent_count);
>  }

What is "urgent" about an idle vCPU filling an empty sched unit slot?
That is, why do you need to prevent the thread from sleeping as
power efficiently as possible (potentially allowing the sibling thread
to even use more resources)?

> @@ -1637,33 +1644,67 @@ static void sched_switch_units(struct sched_resource *sd,
>                                 struct sched_unit *next, struct sched_unit *prev,
>                                 s_time_t now)
>  {
> -    sd->curr = next;
> -
> -    TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id, prev->unit_id,
> -             now - prev->state_entry_time);
> -    TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id, next->unit_id,
> -             (next->vcpu->runstate.state == RUNSTATE_runnable) ?
> -             (now - next->state_entry_time) : 0, prev->next_time);
> +    int cpu;

unsigned int

> @@ -1719,25 +1760,25 @@ static struct sched_unit *do_schedule(struct sched_unit *prev, s_time_t now,
>      if ( prev->next_time >= 0 ) /* -ve means no limit */
>          set_timer(&sd->s_timer, now + prev->next_time);
>  
> -    if ( likely(prev != next) )
> -        sched_switch_units(sd, next, prev, now);
> +    sched_switch_units(sd, next, prev, now);
>  
>      return next;
>  }
>  
> -static void context_saved(struct vcpu *prev)
> +static void context_saved(struct sched_unit *unit)
>  {
> -    struct sched_unit *unit = prev->sched_unit;
> -
>      unit->is_running = 0;
>      unit->state_entry_time = NOW();
> +    get_sched_res(smp_processor_id())->prev = NULL;
>  
>      /* Check for migration request /after/ clearing running flag. */
>      smp_mb();
>  
> -    sched_context_saved(vcpu_scheduler(prev), unit);
> +    sched_context_saved(vcpu_scheduler(unit->vcpu), unit);

An example of it unlikely being just one of the vCPU-s in a unit that
you actually want to deal with.

> @@ -1870,7 +1921,8 @@ static void sched_slave(void)
>  
>      pcpu_schedule_unlock_irq(lock, cpu);
>  
> -    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), now);
> +    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu),
> +                         is_idle_unit(next) && !is_idle_unit(prev), now);
>  }
>  
>  /*
> @@ -1930,7 +1982,8 @@ static void schedule(void)
>      pcpu_schedule_unlock_irq(lock, cpu);
>  
>      vnext = sched_unit2vcpu_cpu(next, cpu);
> -    sched_context_switch(vprev, vnext, now);
> +    sched_context_switch(vprev, vnext,
> +                         !is_idle_unit(prev) && is_idle_unit(next), now);
>  }

Seeing these two calls I'm not only slightly puzzled by the expressions
having operands in opposite order wrt one another, but also why the
callee can't work out the condition without the new parameter/argument.

> --- a/xen/include/asm-arm/current.h
> +++ b/xen/include/asm-arm/current.h
> @@ -18,6 +18,7 @@ DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
>  
>  #define current            (this_cpu(curr_vcpu))
>  #define set_current(vcpu)  do { current = (vcpu); } while (0)
> +#define get_cpu_current(cpu)  (per_cpu(curr_vcpu, cpu))

Pointless outer pair of parentheses.

> --- a/xen/include/asm-x86/current.h
> +++ b/xen/include/asm-x86/current.h
> @@ -77,6 +77,11 @@ struct cpu_info {
>      /* get_stack_bottom() must be 16-byte aligned */
>  };
>  
> +static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
> +{
> +    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
> +}
> +
>  static inline struct cpu_info *get_cpu_info(void)
>  {
>  #ifdef __clang__
> @@ -87,7 +92,7 @@ static inline struct cpu_info *get_cpu_info(void)
>      register unsigned long sp asm("rsp");
>  #endif
>  
> -    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
> +    return get_cpu_info_from_stack(sp);
>  }

With these, why does ...

> --- a/xen/include/asm-x86/smp.h
> +++ b/xen/include/asm-x86/smp.h
> @@ -76,6 +76,9 @@ void set_nr_sockets(void);
>  /* Representing HT and core siblings in each socket. */
>  extern cpumask_t **socket_cpumask;
>  
> +#define get_cpu_current(cpu) \
> +    (get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu)

... this end up in a different header? (The outermost pair of parentheses
isn't strictly needed here.)

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit
  2019-07-02 15:01 [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit Jan Beulich
@ 2019-07-02 16:22 ` Andrew Cooper
  2019-07-03  7:13 ` Juergen Gross
  1 sibling, 0 replies; 5+ messages in thread
From: Andrew Cooper @ 2019-07-02 16:22 UTC (permalink / raw)
  To: Jan Beulich, Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Wilk, George Dunlap,
	Tim Deegan, Dario Faggioli, Julien Grall, xen-devel, Ian Jackson,
	Roger Pau Monné

On 02/07/2019 16:01, Jan Beulich wrote:
> As to scrubbing - what gets brought into cache is, except for a very
> brief moment, the value the scrubbing routine actually stores. There's
> no knowledge to be gained from that by a guest.

Unless we scrub with instructions which have Direct Write semantics (we
don't), the cacheline gets pulled into L1.

That means its leakable via L1TF for the entire duration between the
first speculative touch of the page (especially as prefetching is liable
to bring the content in in short order) to the retirement of the
instruction.

That said, there is zero difference (from an attackers point of view)
between scrubbing a single page as part of alloc_{dom,xen}heap_page(),
and scrubbing in the idle loop. 

The former is strictly required to stay, therefore, don't see
restricting the idle scrubbing as having any impact on security
(although it might very well have an impact on performance).

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit
  2019-07-02 15:01 [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit Jan Beulich
  2019-07-02 16:22 ` Andrew Cooper
@ 2019-07-03  7:13 ` Juergen Gross
  2019-07-03  7:54   ` Jan Beulich
  1 sibling, 1 reply; 5+ messages in thread
From: Juergen Gross @ 2019-07-03  7:13 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Wilk, George Dunlap,
	Andrew Cooper, Tim Deegan, Dario Faggioli, Julien Grall,
	Ian Jackson, xen-devel, Roger Pau Monné

On 02.07.19 17:01, Jan Beulich wrote:
>>>> On 28.05.19 at 12:32, <jgross@suse.com> wrote:
>> When running an idle vcpu in a non-idle scheduling unit use a specific
>> guest idle loop not performing any tasklets, memory scrubbing and
>> livepatching in order to avoid populating the cpu caches with memory
>> used by other domains (as far as possible). Softirqs are considered to
>> be save (timers might want to be excluded, but this can be fine-tuned
>> later).
> 
> How could timers be legitimately excluded? And how are softirqs
> (which similarly can't be excluded here) any less risky than e.g.
> tasklets?

At least some timers are for other guests. I can drop mentioning
timers.

Tasklets are sometimes used for deferred processing of guest specific
actions, like continue_hypercall_on_cpu(). This is something we really
don't want here.

> As to scrubbing - what gets brought into cache is, except for a very
> brief moment, the value the scrubbing routine actually stores. There's
> no knowledge to be gained from that by a guest.

With Andrew's answer going in a similar direction I can add scrubbing
again.

> 
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -159,6 +159,23 @@ static void idle_loop(void)
>>       }
>>   }
>>   
>> +/*
>> + * Idle loop for siblings of active schedule units.
>> + * We don't do any standard idle work like tasklets, page scrubbing or
>> + * livepatching.
>> + */
>> +static void guest_idle_loop(void)
>> +{
>> +    unsigned int cpu = smp_processor_id();
>> +
>> +    for ( ; ; )
>> +    {
>> +        if ( !softirq_pending(cpu) )
>> +            sched_guest_idle(pm_idle, cpu);
>> +        do_softirq();
>> +    }
>> +}
> 
> In the comment I think you mean "siblings of <whatever> in
> active schedule units"?

Is "siblings of cpus in guest mode" fine?

> 
> Having had quite some fun with soft-offlining of CPUs recently,
> may I ask that you ASSERT(!cpu_is_offline(cpu)) in the loop
> body, such that the absence of a call to play_dead() is also
> covered?

Sure.

> 
>> @@ -172,6 +189,10 @@ void startup_cpu_idle_loop(void)
>>   
>>   static void noreturn continue_idle_domain(struct vcpu *v)
>>   {
>> +    /* Idle vcpus might be attached to non-idle units! */
>> +    if ( !is_idle_domain(v->sched_unit->domain) )
>> +        reset_stack_and_jump(guest_idle_loop);
>> +
>>       reset_stack_and_jump(idle_loop);
>>   }
> 
> You're aware that there's a call to check_for_livepatch_work() hidden
> in reset_stack_and_jump(), which you say you don't want to allow in
> this context?

Good point.

IMO it would be best to have a "no-livepatch" variant of
reset_stack_and_jump().

> 
>> --- a/xen/common/schedule.c
>> +++ b/xen/common/schedule.c
>> @@ -82,7 +82,18 @@ static struct scheduler __read_mostly ops;
>>   static inline struct vcpu *sched_unit2vcpu_cpu(struct sched_unit *unit,
>>                                                  unsigned int cpu)
>>   {
>> -    return unit->domain->vcpu[unit->unit_id + per_cpu(sched_res_idx, cpu)];
>> +    unsigned int idx = unit->unit_id + per_cpu(sched_res_idx, cpu);
>> +    const struct domain *d = unit->domain;
>> +    struct vcpu *v;
>> +
>> +    if ( idx < d->max_vcpus && d->vcpu[idx] )
>> +    {
>> +        v = d->vcpu[idx];
>> +        if ( v->new_state == RUNSTATE_running )
>> +            return v;
> 
> Isn't this enough of the cache fill half of a gadget to warrant use of
> array_index_nospec() or alike?

The input data is in no way user controlled. Do we really want to add
barriers before each array access?

> 
>> @@ -209,19 +223,11 @@ static inline void vcpu_runstate_change(
>>       v->runstate.state = new_state;
>>   }
>>   
>> -static inline void sched_unit_runstate_change(struct sched_unit *unit,
>> -    bool running, s_time_t new_entry_time)
>> +void sched_guest_idle(void (*idle) (void), unsigned int cpu)
> 
> Stray blank between closing and opening parenthesis.

Oh, indeed.

> 
>>   {
>> -    struct vcpu *v;
>> -
>> -    for_each_sched_unit_vcpu ( unit, v )
>> -        if ( running )
>> -            vcpu_runstate_change(v, v->new_state, new_entry_time);
>> -        else
>> -            vcpu_runstate_change(v,
>> -                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
>> -                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
>> -                new_entry_time);
>> +    atomic_inc(&get_sched_res(cpu)->urgent_count);
>> +    idle();
>> +    atomic_dec(&get_sched_res(cpu)->urgent_count);
>>   }
> 
> What is "urgent" about an idle vCPU filling an empty sched unit slot?
> That is, why do you need to prevent the thread from sleeping as
> power efficiently as possible (potentially allowing the sibling thread
> to even use more resources)?

The deeper the thread is sleeping the longer it will take to wake it up
for synchronized context switching. I'd like to avoid additional
latencies.

> 
>> @@ -1637,33 +1644,67 @@ static void sched_switch_units(struct sched_resource *sd,
>>                                  struct sched_unit *next, struct sched_unit *prev,
>>                                  s_time_t now)
>>   {
>> -    sd->curr = next;
>> -
>> -    TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id, prev->unit_id,
>> -             now - prev->state_entry_time);
>> -    TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id, next->unit_id,
>> -             (next->vcpu->runstate.state == RUNSTATE_runnable) ?
>> -             (now - next->state_entry_time) : 0, prev->next_time);
>> +    int cpu;
> 
> unsigned int

Okay.

> 
>> @@ -1719,25 +1760,25 @@ static struct sched_unit *do_schedule(struct sched_unit *prev, s_time_t now,
>>       if ( prev->next_time >= 0 ) /* -ve means no limit */
>>           set_timer(&sd->s_timer, now + prev->next_time);
>>   
>> -    if ( likely(prev != next) )
>> -        sched_switch_units(sd, next, prev, now);
>> +    sched_switch_units(sd, next, prev, now);
>>   
>>       return next;
>>   }
>>   
>> -static void context_saved(struct vcpu *prev)
>> +static void context_saved(struct sched_unit *unit)
>>   {
>> -    struct sched_unit *unit = prev->sched_unit;
>> -
>>       unit->is_running = 0;
>>       unit->state_entry_time = NOW();
>> +    get_sched_res(smp_processor_id())->prev = NULL;
>>   
>>       /* Check for migration request /after/ clearing running flag. */
>>       smp_mb();
>>   
>> -    sched_context_saved(vcpu_scheduler(prev), unit);
>> +    sched_context_saved(vcpu_scheduler(unit->vcpu), unit);
> 
> An example of it unlikely being just one of the vCPU-s in a unit that
> you actually want to deal with.

No. All vcpus of a unit share the same scheduler.

OTOH I think there is some room for tuning here: vcpu_scheduler is
doing quite some work to find the correct struct scheduler. Replacing
it by unit_scheduler() might be a good idea.

> 
>> @@ -1870,7 +1921,8 @@ static void sched_slave(void)
>>   
>>       pcpu_schedule_unlock_irq(lock, cpu);
>>   
>> -    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), now);
>> +    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu),
>> +                         is_idle_unit(next) && !is_idle_unit(prev), now);
>>   }
>>   
>>   /*
>> @@ -1930,7 +1982,8 @@ static void schedule(void)
>>       pcpu_schedule_unlock_irq(lock, cpu);
>>   
>>       vnext = sched_unit2vcpu_cpu(next, cpu);
>> -    sched_context_switch(vprev, vnext, now);
>> +    sched_context_switch(vprev, vnext,
>> +                         !is_idle_unit(prev) && is_idle_unit(next), now);
>>   }
> 
> Seeing these two calls I'm not only slightly puzzled by the expressions
> having operands in opposite order wrt one another, but also why the

Ah, yes, will change one of the calls.

> callee can't work out the condition without the new parameter/argument.

The next patch adds other users of sched_context_switch().

> 
>> --- a/xen/include/asm-arm/current.h
>> +++ b/xen/include/asm-arm/current.h
>> @@ -18,6 +18,7 @@ DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
>>   
>>   #define current            (this_cpu(curr_vcpu))
>>   #define set_current(vcpu)  do { current = (vcpu); } while (0)
>> +#define get_cpu_current(cpu)  (per_cpu(curr_vcpu, cpu))
> 
> Pointless outer pair of parentheses.

Okay.

> 
>> --- a/xen/include/asm-x86/current.h
>> +++ b/xen/include/asm-x86/current.h
>> @@ -77,6 +77,11 @@ struct cpu_info {
>>       /* get_stack_bottom() must be 16-byte aligned */
>>   };
>>   
>> +static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
>> +{
>> +    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
>> +}
>> +
>>   static inline struct cpu_info *get_cpu_info(void)
>>   {
>>   #ifdef __clang__
>> @@ -87,7 +92,7 @@ static inline struct cpu_info *get_cpu_info(void)
>>       register unsigned long sp asm("rsp");
>>   #endif
>>   
>> -    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
>> +    return get_cpu_info_from_stack(sp);
>>   }
> 
> With these, why does ...
> 
>> --- a/xen/include/asm-x86/smp.h
>> +++ b/xen/include/asm-x86/smp.h
>> @@ -76,6 +76,9 @@ void set_nr_sockets(void);
>>   /* Representing HT and core siblings in each socket. */
>>   extern cpumask_t **socket_cpumask;
>>   
>> +#define get_cpu_current(cpu) \
>> +    (get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu)
> 
> ... this end up in a different header? (The outermost pair of parentheses
> isn't strictly needed here.)

Hmm, must be a leftover from a previous version. Will move it.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit
  2019-07-03  7:13 ` Juergen Gross
@ 2019-07-03  7:54   ` Jan Beulich
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2019-07-03  7:54 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Wilk, George Dunlap,
	Andrew Cooper, Tim Deegan, Dario Faggioli, Julien Grall,
	Ian Jackson, xen-devel, Roger Pau Monné

On 03.07.2019 09:13, Juergen Gross wrote:
> On 02.07.19 17:01, Jan Beulich wrote:
>>>>> On 28.05.19 at 12:32, <jgross@suse.com> wrote:
>>> When running an idle vcpu in a non-idle scheduling unit use a specific
>>> guest idle loop not performing any tasklets, memory scrubbing and
>>> livepatching in order to avoid populating the cpu caches with memory
>>> used by other domains (as far as possible). Softirqs are considered to
>>> be save (timers might want to be excluded, but this can be fine-tuned
>>> later).
>>
>> How could timers be legitimately excluded? And how are softirqs
>> (which similarly can't be excluded here) any less risky than e.g.
>> tasklets?
> 
> At least some timers are for other guests. I can drop mentioning
> timers.
> 
> Tasklets are sometimes used for deferred processing of guest specific
> actions, like continue_hypercall_on_cpu(). This is something we really
> don't want here.

Yet what softirqs act on you won't know either.

>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -159,6 +159,23 @@ static void idle_loop(void)
>>>       }
>>>   }
>>> +/*
>>> + * Idle loop for siblings of active schedule units.
>>> + * We don't do any standard idle work like tasklets, page scrubbing or
>>> + * livepatching.
>>> + */
>>> +static void guest_idle_loop(void)
>>> +{
>>> +    unsigned int cpu = smp_processor_id();
>>> +
>>> +    for ( ; ; )
>>> +    {
>>> +        if ( !softirq_pending(cpu) )
>>> +            sched_guest_idle(pm_idle, cpu);
>>> +        do_softirq();
>>> +    }
>>> +}
>>
>> In the comment I think you mean "siblings of <whatever> in
>> active schedule units"?
> 
> Is "siblings of cpus in guest mode" fine?

Sure. As would perhaps be "siblings in active schedule units".

>>> @@ -172,6 +189,10 @@ void startup_cpu_idle_loop(void)
>>>   static void noreturn continue_idle_domain(struct vcpu *v)
>>>   {
>>> +    /* Idle vcpus might be attached to non-idle units! */
>>> +    if ( !is_idle_domain(v->sched_unit->domain) )
>>> +        reset_stack_and_jump(guest_idle_loop);
>>> +
>>>       reset_stack_and_jump(idle_loop);
>>>   }
>>
>> You're aware that there's a call to check_for_livepatch_work() hidden
>> in reset_stack_and_jump(), which you say you don't want to allow in
>> this context?
> 
> Good point.
> 
> IMO it would be best to have a "no-livepatch" variant of
> reset_stack_and_jump().

That's what I too was thinking.

>>> --- a/xen/common/schedule.c
>>> +++ b/xen/common/schedule.c
>>> @@ -82,7 +82,18 @@ static struct scheduler __read_mostly ops;
>>>   static inline struct vcpu *sched_unit2vcpu_cpu(struct sched_unit *unit,
>>>                                                  unsigned int cpu)
>>>   {
>>> -    return unit->domain->vcpu[unit->unit_id + per_cpu(sched_res_idx, cpu)];
>>> +    unsigned int idx = unit->unit_id + per_cpu(sched_res_idx, cpu);
>>> +    const struct domain *d = unit->domain;
>>> +    struct vcpu *v;
>>> +
>>> +    if ( idx < d->max_vcpus && d->vcpu[idx] )
>>> +    {
>>> +        v = d->vcpu[idx];
>>> +        if ( v->new_state == RUNSTATE_running )
>>> +            return v;
>>
>> Isn't this enough of the cache fill half of a gadget to warrant use of
>> array_index_nospec() or alike?
> 
> The input data is in no way user controlled. Do we really want to add
> barriers before each array access?

array_index_nospec() does not involve any barriers. Indeed I'd have
been more hesitant to suggest a change here if that would involve
adding a barrier.

As to "in no way guest controlled" - did you perhaps see the
discussion regarding speculative NULL derefs that Norbert and I had
on his grant table changes? If NULL can plausibly be found in
d->vcpu[idx] (after all you check for it), then a PV guest could
control what a speculative deref would produce as data, and hence
what might be used for further indirection.

>>> @@ -209,19 +223,11 @@ static inline void vcpu_runstate_change(
>>>       v->runstate.state = new_state;
>>>   }
>>> -static inline void sched_unit_runstate_change(struct sched_unit *unit,
>>> -    bool running, s_time_t new_entry_time)
>>> +void sched_guest_idle(void (*idle) (void), unsigned int cpu)
>>>   {
>>> -    struct vcpu *v;
>>> -
>>> -    for_each_sched_unit_vcpu ( unit, v )
>>> -        if ( running )
>>> -            vcpu_runstate_change(v, v->new_state, new_entry_time);
>>> -        else
>>> -            vcpu_runstate_change(v,
>>> -                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
>>> -                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
>>> -                new_entry_time);
>>> +    atomic_inc(&get_sched_res(cpu)->urgent_count);
>>> +    idle();
>>> +    atomic_dec(&get_sched_res(cpu)->urgent_count);
>>>   }
>>
>> What is "urgent" about an idle vCPU filling an empty sched unit slot?
>> That is, why do you need to prevent the thread from sleeping as
>> power efficiently as possible (potentially allowing the sibling thread
>> to even use more resources)?
> 
> The deeper the thread is sleeping the longer it will take to wake it up
> for synchronized context switching. I'd like to avoid additional
> latencies.

Whether to trade latencies for power savings imo is a decision to be made
by an admin, not us. Use of C-states incurs higher latencies elsewhere as
well, so if they're deemed a problem, use of deeper C-states could be
disallowed via already available command line options.

>>> @@ -1719,25 +1760,25 @@ static struct sched_unit *do_schedule(struct sched_unit *prev, s_time_t now,
>>>       if ( prev->next_time >= 0 ) /* -ve means no limit */
>>>           set_timer(&sd->s_timer, now + prev->next_time);
>>> -    if ( likely(prev != next) )
>>> -        sched_switch_units(sd, next, prev, now);
>>> +    sched_switch_units(sd, next, prev, now);
>>>       return next;
>>>   }
>>> -static void context_saved(struct vcpu *prev)
>>> +static void context_saved(struct sched_unit *unit)
>>>   {
>>> -    struct sched_unit *unit = prev->sched_unit;
>>> -
>>>       unit->is_running = 0;
>>>       unit->state_entry_time = NOW();
>>> +    get_sched_res(smp_processor_id())->prev = NULL;
>>>       /* Check for migration request /after/ clearing running flag. */
>>>       smp_mb();
>>> -    sched_context_saved(vcpu_scheduler(prev), unit);
>>> +    sched_context_saved(vcpu_scheduler(unit->vcpu), unit);
>>
>> An example of it unlikely being just one of the vCPU-s in a unit that
>> you actually want to deal with.
> 
> No. All vcpus of a unit share the same scheduler.

Oh, right.

> OTOH I think there is some room for tuning here: vcpu_scheduler is
> doing quite some work to find the correct struct scheduler. Replacing
> it by unit_scheduler() might be a good idea.

Yes please, if such is available; otherwise I'd have suggested to have
it available. If all vCPU-s of a unit share the same scheduler (which is
to be expected, i.e. I should have paid more attention to the further
context above), why would vcpu_scheduler() be any more expensive than
unit_scheduler() anyway? It ought to simply be unit_scheduler(v->unit).

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit
@ 2019-05-28 10:32   ` Juergen Gross
  0 siblings, 0 replies; 5+ messages in thread
From: Juergen Gross @ 2019-05-28 10:32 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich, Dario Faggioli,
	Roger Pau Monné

When scheduling an unit with multiple vcpus there is no guarantee all
vcpus are available (e.g. above maxvcpus or vcpu offline). Fall back to
idle vcpu of the current cpu in that case. This requires to store the
correct schedule_unit pointer in the idle vcpu as long as it used as
fallback vcpu.

In order to modify the runstates of the correct vcpus when switching
schedule units merge sched_unit_runstate_change() into
sched_switch_units() and loop over the affected physical cpus instead
of the unit's vcpus. This in turn requires an access function to the
current variable of other cpus.

Today context_saved() is called in case previous and next vcpus differ
when doing a context switch. With an idle vcpu being capable to be a
substitute for an offline vcpu this is problematic when switching to
an idle scheduling unit. An idle previous vcpu leaves us in doubt which
schedule unit was active previously, so save the previous unit pointer
in the per-schedule resource area and use its value being non-NULL as
a hint whether context_saved() should be called.

When running an idle vcpu in a non-idle scheduling unit use a specific
guest idle loop not performing any tasklets, memory scrubbing and
livepatching in order to avoid populating the cpu caches with memory
used by other domains (as far as possible). Softirqs are considered to
be save (timers might want to be excluded, but this can be fine-tuned
later).

Signed-off-by: Juergen Gross <jgross@suse.com>
---
RFC V2: new patch (Andrew Cooper)
V1: use urgent_count to select correct idle routine (Jan Beulich)
---
 xen/arch/x86/domain.c         |  21 ++++++
 xen/common/schedule.c         | 155 ++++++++++++++++++++++++++++--------------
 xen/include/asm-arm/current.h |   1 +
 xen/include/asm-x86/current.h |   7 +-
 xen/include/asm-x86/smp.h     |   3 +
 xen/include/xen/sched-if.h    |   4 +-
 xen/include/xen/sched.h       |   1 +
 7 files changed, 140 insertions(+), 52 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 98d2939daf..39a2c1a047 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -159,6 +159,23 @@ static void idle_loop(void)
     }
 }
 
+/*
+ * Idle loop for siblings of active schedule units.
+ * We don't do any standard idle work like tasklets, page scrubbing or
+ * livepatching.
+ */
+static void guest_idle_loop(void)
+{
+    unsigned int cpu = smp_processor_id();
+
+    for ( ; ; )
+    {
+        if ( !softirq_pending(cpu) )
+            sched_guest_idle(pm_idle, cpu);
+        do_softirq();
+    }
+}
+
 void startup_cpu_idle_loop(void)
 {
     struct vcpu *v = current;
@@ -172,6 +189,10 @@ void startup_cpu_idle_loop(void)
 
 static void noreturn continue_idle_domain(struct vcpu *v)
 {
+    /* Idle vcpus might be attached to non-idle units! */
+    if ( !is_idle_domain(v->sched_unit->domain) )
+        reset_stack_and_jump(guest_idle_loop);
+
     reset_stack_and_jump(idle_loop);
 }
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 58d3de340e..e0103fdb1d 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -82,7 +82,18 @@ static struct scheduler __read_mostly ops;
 static inline struct vcpu *sched_unit2vcpu_cpu(struct sched_unit *unit,
                                                unsigned int cpu)
 {
-    return unit->domain->vcpu[unit->unit_id + per_cpu(sched_res_idx, cpu)];
+    unsigned int idx = unit->unit_id + per_cpu(sched_res_idx, cpu);
+    const struct domain *d = unit->domain;
+    struct vcpu *v;
+
+    if ( idx < d->max_vcpus && d->vcpu[idx] )
+    {
+        v = d->vcpu[idx];
+        if ( v->new_state == RUNSTATE_running )
+            return v;
+    }
+
+    return idle_vcpu[cpu];
 }
 
 static inline struct scheduler *dom_scheduler(const struct domain *d)
@@ -196,8 +207,11 @@ static inline void vcpu_runstate_change(
 
     trace_runstate_change(v, new_state);
 
-    unit->runstate_cnt[v->runstate.state]--;
-    unit->runstate_cnt[new_state]++;
+    if ( !is_idle_vcpu(v) )
+    {
+        unit->runstate_cnt[v->runstate.state]--;
+        unit->runstate_cnt[new_state]++;
+    }
 
     delta = new_entry_time - v->runstate.state_entry_time;
     if ( delta > 0 )
@@ -209,19 +223,11 @@ static inline void vcpu_runstate_change(
     v->runstate.state = new_state;
 }
 
-static inline void sched_unit_runstate_change(struct sched_unit *unit,
-    bool running, s_time_t new_entry_time)
+void sched_guest_idle(void (*idle) (void), unsigned int cpu)
 {
-    struct vcpu *v;
-
-    for_each_sched_unit_vcpu ( unit, v )
-        if ( running )
-            vcpu_runstate_change(v, v->new_state, new_entry_time);
-        else
-            vcpu_runstate_change(v,
-                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
-                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
-                new_entry_time);
+    atomic_inc(&get_sched_res(cpu)->urgent_count);
+    idle();
+    atomic_dec(&get_sched_res(cpu)->urgent_count);
 }
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
@@ -461,6 +467,7 @@ int sched_init_vcpu(struct vcpu *v)
     if ( is_idle_domain(d) )
     {
         get_sched_res(v->processor)->curr = unit;
+        get_sched_res(v->processor)->sched_unit_idle = unit;
         v->is_running = 1;
         unit->is_running = 1;
         unit->state_entry_time = NOW();
@@ -1637,33 +1644,67 @@ static void sched_switch_units(struct sched_resource *sd,
                                struct sched_unit *next, struct sched_unit *prev,
                                s_time_t now)
 {
-    sd->curr = next;
-
-    TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id, prev->unit_id,
-             now - prev->state_entry_time);
-    TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id, next->unit_id,
-             (next->vcpu->runstate.state == RUNSTATE_runnable) ?
-             (now - next->state_entry_time) : 0, prev->next_time);
+    int cpu;
 
     ASSERT(unit_running(prev));
 
-    TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
-             next->domain->domain_id, next->unit_id);
+    if ( prev != next )
+    {
+        sd->curr = next;
+        sd->prev = prev;
 
-    sched_unit_runstate_change(prev, false, now);
-    prev->last_run_time = now;
+        TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id,
+                 prev->unit_id, now - prev->state_entry_time);
+        TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id,
+                 next->unit_id,
+                 (next->vcpu->runstate.state == RUNSTATE_runnable) ?
+                 (now - next->state_entry_time) : 0, prev->next_time);
+        TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
+                 next->domain->domain_id, next->unit_id);
 
-    ASSERT(!unit_running(next));
-    sched_unit_runstate_change(next, true, now);
+        prev->last_run_time = now;
 
-    /*
-     * NB. Don't add any trace records from here until the actual context
-     * switch, else lost_records resume will not work properly.
-     */
+        ASSERT(!unit_running(next));
+
+        /*
+         * NB. Don't add any trace records from here until the actual context
+         * switch, else lost_records resume will not work properly.
+         */
 
-    ASSERT(!next->is_running);
-    next->vcpu->is_running = 1;
-    next->is_running = 1;
+        ASSERT(!next->is_running);
+        next->is_running = 1;
+
+        if ( is_idle_unit(prev) )
+        {
+            prev->runstate_cnt[RUNSTATE_running] = 0;
+            prev->runstate_cnt[RUNSTATE_runnable] = sched_granularity;
+        }
+        if ( is_idle_unit(next) )
+        {
+            next->runstate_cnt[RUNSTATE_running] = sched_granularity;
+            next->runstate_cnt[RUNSTATE_runnable] = 0;
+        }
+    }
+
+    for_each_cpu ( cpu, sd->cpus )
+    {
+        struct vcpu *vprev = get_cpu_current(cpu);
+        struct vcpu *vnext = sched_unit2vcpu_cpu(next, cpu);
+
+        if ( vprev != vnext || vprev->runstate.state != vnext->new_state )
+        {
+            vcpu_runstate_change(vprev,
+                ((vprev->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
+                 (vcpu_runnable(vprev) ? RUNSTATE_runnable : RUNSTATE_offline)),
+                now);
+            vcpu_runstate_change(vnext, vnext->new_state, now);
+        }
+
+        vnext->is_running = 1;
+
+        if ( is_idle_vcpu(vnext) )
+            vnext->sched_unit = next;
+    }
 }
 
 static bool sched_tasklet_check_cpu(unsigned int cpu)
@@ -1719,25 +1760,25 @@ static struct sched_unit *do_schedule(struct sched_unit *prev, s_time_t now,
     if ( prev->next_time >= 0 ) /* -ve means no limit */
         set_timer(&sd->s_timer, now + prev->next_time);
 
-    if ( likely(prev != next) )
-        sched_switch_units(sd, next, prev, now);
+    sched_switch_units(sd, next, prev, now);
 
     return next;
 }
 
-static void context_saved(struct vcpu *prev)
+static void context_saved(struct sched_unit *unit)
 {
-    struct sched_unit *unit = prev->sched_unit;
-
     unit->is_running = 0;
     unit->state_entry_time = NOW();
+    get_sched_res(smp_processor_id())->prev = NULL;
 
     /* Check for migration request /after/ clearing running flag. */
     smp_mb();
 
-    sched_context_saved(vcpu_scheduler(prev), unit);
+    sched_context_saved(vcpu_scheduler(unit->vcpu), unit);
 
-    sched_unit_migrate_finish(unit);
+    /* Idle never migrates and idle vcpus might belong to other units. */
+    if ( !is_idle_unit(unit) )
+        sched_unit_migrate_finish(unit);
 }
 
 /*
@@ -1754,11 +1795,13 @@ static void context_saved(struct vcpu *prev)
 void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext)
 {
     struct sched_unit *next = vnext->sched_unit;
+    struct sched_resource *sd = get_sched_res(smp_processor_id());
 
     /* Clear running flag /after/ writing context to memory. */
     smp_wmb();
 
-    vprev->is_running = 0;
+    if ( vprev != vnext )
+        vprev->is_running = 0;
 
     if ( atomic_read(&next->rendezvous_out_cnt) )
     {
@@ -1767,20 +1810,23 @@ void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext)
         /* Call context_saved() before releasing other waiters. */
         if ( cnt == 1 )
         {
-            if ( vprev != vnext )
-                context_saved(vprev);
+            if ( sd->prev )
+                context_saved(sd->prev);
             atomic_set(&next->rendezvous_out_cnt, 0);
         }
         else
             while ( atomic_read(&next->rendezvous_out_cnt) )
                 cpu_relax();
     }
-    else if ( vprev != vnext && sched_granularity == 1 )
-        context_saved(vprev);
+    else if ( sd->prev )
+        context_saved(sd->prev);
+
+    if ( is_idle_vcpu(vprev) && vprev != vnext )
+        vprev->sched_unit = sd->sched_unit_idle;
 }
 
 static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext,
-                                 s_time_t now)
+                                 bool reset_idle_unit, s_time_t now)
 {
     if ( unlikely(vprev == vnext) )
     {
@@ -1789,6 +1835,11 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext,
                  now - vprev->runstate.state_entry_time,
                  vprev->sched_unit->next_time);
         sched_context_switched(vprev, vnext);
+
+        if ( reset_idle_unit )
+            vnext->sched_unit =
+                get_sched_res(smp_processor_id())->sched_unit_idle;
+
         trace_continue_running(vnext);
         return continue_running(vprev);
     }
@@ -1870,7 +1921,8 @@ static void sched_slave(void)
 
     pcpu_schedule_unlock_irq(lock, cpu);
 
-    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), now);
+    sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu),
+                         is_idle_unit(next) && !is_idle_unit(prev), now);
 }
 
 /*
@@ -1930,7 +1982,8 @@ static void schedule(void)
     pcpu_schedule_unlock_irq(lock, cpu);
 
     vnext = sched_unit2vcpu_cpu(next, cpu);
-    sched_context_switch(vprev, vnext, now);
+    sched_context_switch(vprev, vnext,
+                         !is_idle_unit(prev) && is_idle_unit(next), now);
 }
 
 /* The scheduler timer: force a run through the scheduler */
@@ -2015,6 +2068,7 @@ static int cpu_schedule_up(unsigned int cpu)
         return -ENOMEM;
 
     sd->curr = idle_vcpu[cpu]->sched_unit;
+    sd->sched_unit_idle = idle_vcpu[cpu]->sched_unit;
 
     /*
      * We don't want to risk calling xfree() on an sd->sched_priv
@@ -2216,6 +2270,7 @@ void __init scheduler_init(void)
     if ( vcpu_create(idle_domain, 0) == NULL )
         BUG();
     get_sched_res(0)->curr = idle_vcpu[0]->sched_unit;
+    get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit;
     get_sched_res(0)->sched_priv = sched_alloc_pdata(&ops, 0);
     BUG_ON(IS_ERR(get_sched_res(0)->sched_priv));
     scheduler_percpu_init(0);
diff --git a/xen/include/asm-arm/current.h b/xen/include/asm-arm/current.h
index c4af66fbb9..a7602eef8c 100644
--- a/xen/include/asm-arm/current.h
+++ b/xen/include/asm-arm/current.h
@@ -18,6 +18,7 @@ DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
 
 #define current            (this_cpu(curr_vcpu))
 #define set_current(vcpu)  do { current = (vcpu); } while (0)
+#define get_cpu_current(cpu)  (per_cpu(curr_vcpu, cpu))
 
 /* Per-VCPU state that lives at the top of the stack */
 struct cpu_info {
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index f3508c3c08..1e807d63cf 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -77,6 +77,11 @@ struct cpu_info {
     /* get_stack_bottom() must be 16-byte aligned */
 };
 
+static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
+{
+    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
+}
+
 static inline struct cpu_info *get_cpu_info(void)
 {
 #ifdef __clang__
@@ -87,7 +92,7 @@ static inline struct cpu_info *get_cpu_info(void)
     register unsigned long sp asm("rsp");
 #endif
 
-    return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
+    return get_cpu_info_from_stack(sp);
 }
 
 #define get_current()         (get_cpu_info()->current_vcpu)
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index 9f533f9072..51a31ab00a 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -76,6 +76,9 @@ void set_nr_sockets(void);
 /* Representing HT and core siblings in each socket. */
 extern cpumask_t **socket_cpumask;
 
+#define get_cpu_current(cpu) \
+    (get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index e93fe9f3be..a1aefa2a25 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -39,6 +39,8 @@ struct sched_resource {
     spinlock_t         *schedule_lock,
                        _lock;
     struct sched_unit  *curr;           /* current task                    */
+    struct sched_unit  *sched_unit_idle;
+    struct sched_unit  *prev;           /* previous task                   */
     void               *sched_priv;
     struct timer        s_timer;        /* scheduling timer                */
     atomic_t            urgent_count;   /* how many urgent vcpus           */
@@ -168,7 +170,7 @@ static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
 
 static inline struct sched_unit *sched_idle_unit(unsigned int cpu)
 {
-    return idle_vcpu[cpu]->sched_unit;
+    return get_sched_res(cpu)->sched_unit_idle;
 }
 
 static inline unsigned int sched_get_resource_cpu(unsigned int cpu)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index af4c934e0b..0581c7d44f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -921,6 +921,7 @@ int vcpu_pin_override(struct vcpu *v, int cpu);
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);
 int sched_has_urgent_vcpu(void);
+void sched_guest_idle(void (*idle) (void), unsigned int cpu);
 
 /*
  * Used by idle loop to decide whether there is work to do:
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-07-03  7:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-02 15:01 [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit Jan Beulich
2019-07-02 16:22 ` Andrew Cooper
2019-07-03  7:13 ` Juergen Gross
2019-07-03  7:54   ` Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2019-05-28 10:32 [PATCH 00/60] xen: add core scheduling support Juergen Gross
2019-05-28 10:32 ` [Xen-devel] [PATCH 44/60] xen/sched: add fall back to idle vcpu when scheduling unit Juergen Gross
2019-05-28 10:32   ` Juergen Gross

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.