Re: [Xen-devel] [PATCH 13/60] xen/sched: move some per-vcpu items to struct sched_unit

* Re: [Xen-devel] [PATCH 13/60] xen/sched: move some per-vcpu items to struct sched_unit
@ 2019-07-01 14:08 Jan Beulich
  2019-07-01 15:10 ` Juergen Gross
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2019-07-01 14:08 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Wilk, George Dunlap,
	Andrew Cooper, Tim Deegan, Dario Faggioli, Julien Grall, Meng Xu,
	Ian Jackson, xen-devel, Roger Pau Monne

>>> On 28.05.19 at 12:32, <jgross@suse.com> wrote:
> @@ -155,8 +156,8 @@ static void nmi_mce_softirq(void)
>       * Set the tmp value unconditionally, so that the check in the iret
>       * hypercall works.
>       */
> -    cpumask_copy(st->vcpu->cpu_hard_affinity_tmp,
> -                 st->vcpu->cpu_hard_affinity);
> +    cpumask_copy(st->vcpu->sched_unit->cpu_hard_affinity_tmp,
> +                 st->vcpu->sched_unit->cpu_hard_affinity);

Aiui this affects all vCPU-s in the unit, which is unlikely to be what we
want here: There's now only one cpu_hard_affinity_tmp for all vCPU-s
in the unit, yet every vCPU in there may want to make use of the
field in parallel.

I also wonder how the code further down in this function fits with
the scheduler unit concept. But perhaps that's going to be dealt with
by later patches...

> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -125,11 +125,6 @@ static void vcpu_info_reset(struct vcpu *v)
>  
>  static void vcpu_destroy(struct vcpu *v)
>  {
> -    free_cpumask_var(v->cpu_hard_affinity);
> -    free_cpumask_var(v->cpu_hard_affinity_tmp);
> -    free_cpumask_var(v->cpu_hard_affinity_saved);
> -    free_cpumask_var(v->cpu_soft_affinity);
> -
>      free_vcpu_struct(v);
>  }
>  
> @@ -153,12 +148,6 @@ struct vcpu *vcpu_create(
>  
>      grant_table_init_vcpu(v);
>  
> -    if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
> -         !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
> -         !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
> -         !zalloc_cpumask_var(&v->cpu_soft_affinity) )
> -        goto fail;

Seeing these, I'm actually having trouble understanding how you mean
to retain the user visible interface behavior here: If you only store an
affinity per sched unit, then how are you meaning to honor the vCPU
granular requests coming in?

> @@ -557,9 +545,10 @@ void domain_update_node_affinity(struct domain *d)
>           */
>          for_each_vcpu ( d, v )
>          {
> -            cpumask_or(dom_cpumask, dom_cpumask, v->cpu_hard_affinity);
> +            cpumask_or(dom_cpumask, dom_cpumask,
> +                       v->sched_unit->cpu_hard_affinity);
>              cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
> -                       v->cpu_soft_affinity);
> +                       v->sched_unit->cpu_soft_affinity);
>          }

There's not going to be a for_each_sched_unit(), is there? It
would mean less iterations here, and less redundant CPU mask
operations. Ah, that's the subject of patch 30.

> @@ -1226,7 +1215,7 @@ int vcpu_reset(struct vcpu *v)
>      v->async_exception_mask = 0;
>      memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
>  #endif
> -    cpumask_clear(v->cpu_hard_affinity_tmp);
> +    cpumask_clear(v->sched_unit->cpu_hard_affinity_tmp);

Same issue as above - you're affecting other vCPU-s here.

> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -614,6 +614,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>      case XEN_DOMCTL_getvcpuaffinity:
>      {
>          struct vcpu *v;
> +        struct sched_unit *unit;

const?

> --- a/xen/common/keyhandler.c
> +++ b/xen/common/keyhandler.c
> @@ -312,8 +312,8 @@ static void dump_domains(unsigned char key)
>                  printk("dirty_cpu=%u", v->dirty_cpu);
>              printk("\n");
>              printk("    cpu_hard_affinity={%*pbl} cpu_soft_affinity={%*pbl}\n",
> -                   nr_cpu_ids, cpumask_bits(v->cpu_hard_affinity),
> -                   nr_cpu_ids, cpumask_bits(v->cpu_soft_affinity));
> +                   nr_cpu_ids, cpumask_bits(v->sched_unit->cpu_hard_affinity),
> +                   nr_cpu_ids, cpumask_bits(v->sched_unit->cpu_soft_affinity));

I don't see the value of logging the same information multiple times
(for each vCPU in a sched unit). I think you want to split this up.

> --- a/xen/common/wait.c
> +++ b/xen/common/wait.c
> @@ -132,7 +132,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>  
>      /* Save current VCPU affinity; force wakeup on *this* CPU only. */
>      wqv->wakeup_cpu = smp_processor_id();
> -    cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
> +    cpumask_copy(&wqv->saved_affinity, curr->sched_unit->cpu_hard_affinity);
>      if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
>      {
>          gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
> @@ -199,7 +199,7 @@ void check_wakeup_from_wait(void)
>      {
>          /* Re-set VCPU affinity and re-enter the scheduler. */
>          struct vcpu *curr = current;
> -        cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
> +        cpumask_copy(&wqv->saved_affinity, curr->sched_unit->cpu_hard_affinity);
>          if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
>          {
>              gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");

Same problem as above - the consumer of ->saved_affinity will affect
vCPU-s other than the subject one.

> --- a/xen/include/xen/sched-if.h
> +++ b/xen/include/xen/sched-if.h
> @@ -438,11 +438,11 @@ static inline cpumask_t* cpupool_domain_cpumask(struct domain *d)
>   * * The hard affinity is not a subset of soft affinity
>   * * There is an overlap between the soft and hard affinity masks
>   */
> -static inline int has_soft_affinity(const struct vcpu *v)
> +static inline int has_soft_affinity(const struct sched_unit *unit)
>  {
> -    return v->soft_aff_effective &&
> -           !cpumask_subset(cpupool_domain_cpumask(v->domain),
> -                           v->cpu_soft_affinity);
> +    return unit->soft_aff_effective &&
> +           !cpumask_subset(cpupool_domain_cpumask(unit->vcpu->domain),
> +                           unit->cpu_soft_affinity);
>  }

Okay, at the moment there looks to be a 1:1 relationship between sched
units and vCPU-s. This would - at this point of the series - invalidate most
my earlier comments. However, in patch 57 I don't see how this unit->vcpu
mapping would get broken, and I can't seem to identify any other patch
where this might be happening. Looking at the github branch I also get the
impression that the struct vcpu * pointer out of struct sched_unit survives
until the end of the series, which doesn't seem right to me.

In any event, for the purpose here, I think there should be a backlink to
struct domain in struct sched_unit right away, and it should get used here.

> @@ -283,6 +265,22 @@ struct sched_unit {
>      void                  *priv;      /* scheduler private data */
>      struct sched_unit     *next_in_list;
>      struct sched_resource *res;
> +
> +    /* Last time when unit has been scheduled out. */
> +    uint64_t               last_run_time;
> +
> +    /* Item needs affinity restored. */
> +    bool                   affinity_broken;
> +    /* Does soft affinity actually play a role (given hard affinity)? */
> +    bool                   soft_aff_effective;
> +    /* Bitmask of CPUs on which this VCPU may run. */
> +    cpumask_var_t          cpu_hard_affinity;
> +    /* Used to change affinity temporarily. */
> +    cpumask_var_t          cpu_hard_affinity_tmp;
> +    /* Used to restore affinity across S3. */
> +    cpumask_var_t          cpu_hard_affinity_saved;
> +    /* Bitmask of CPUs on which this VCPU prefers to run. */
> +    cpumask_var_t          cpu_soft_affinity;
>  };

The mentions of "VCPU" in the comments here also survive till the end
of the series, which I also don't think is quite right.

> @@ -980,7 +978,7 @@ static inline bool is_hvm_vcpu(const struct vcpu *v)
>  static inline bool is_hwdom_pinned_vcpu(const struct vcpu *v)
>  {
>      return (is_hardware_domain(v->domain) &&
> -            cpumask_weight(v->cpu_hard_affinity) == 1);
> +            cpumask_weight(v->sched_unit->cpu_hard_affinity) == 1);
>  }

Seeing this - how is pinning (by command line option or by Dom0
doing this on itself transiently) going to work with core scheduling?

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread