On Wed, 2016-03-16 at 11:47 -0500, Chong Li wrote: > --- a/xen/common/sched_credit.c > +++ b/xen/common/sched_credit.c > @@ -1054,15 +1054,16 @@ csched_dom_cntl( >       * lock. Runq lock not needed anywhere in here. */ >      spin_lock_irqsave(&prv->lock, flags); >   > -    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) > +    switch ( op->cmd ) >      { > +    case XEN_DOMCTL_SCHEDOP_putvcpuinfo: > +    case XEN_DOMCTL_SCHEDOP_getvcpuinfo: > +        return -EINVAL; > +    case XEN_DOMCTL_SCHEDOP_getinfo: >          op->u.credit.weight = sdom->weight; >          op->u.credit.cap = sdom->cap; > Not feeling to strong about it, but I think     switch ( op->cmd )     {     case XEN_DOMCTL_SCHEDOP_getinfo:         op->u.credit.weight = sdom->weight;         op->u.credit.cap = sdom->cap;         break;     case XEN_DOMCTL_SCHEDOP_putinfo:         if ( op->u.credit.weight != 0 )         {             if ( !list_empty(&sdom->active_sdom_elem) )             {                 prv->weight -= sdom->weight * sdom->active_vcpu_count;                 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;             }             sdom->weight = op->u.credit.weight;         }         if ( op->u.credit.cap != (uint16_t)~0U )             sdom->cap = op->u.credit.cap;         break;     case XEN_DOMCTL_SCHEDOP_putvcpuinfo:     case XEN_DOMCTL_SCHEDOP_getvcpuinfo:     default:         return -EINVAL;     } (i.e., grouping the cases that needs only returning -EINVAL) is better, the final result, more than the patch itself. And the same for Credit2, of course. > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -1129,24 +1145,22 @@ rt_dom_cntl( >      struct vcpu *v; >      unsigned long flags; >      int rc = 0; > +    xen_domctl_schedparam_vcpu_t local_sched; > +    s_time_t period, budget; > +    uint32_t index = 0; >   >      switch ( op->cmd ) >      { >      case XEN_DOMCTL_SCHEDOP_getinfo: > -        if ( d->max_vcpus > 0 ) > -        { > -            spin_lock_irqsave(&prv->lock, flags); > -            svc = rt_vcpu(d->vcpu[0]); > -            op->u.rtds.period = svc->period / MICROSECS(1); > -            op->u.rtds.budget = svc->budget / MICROSECS(1); > -            spin_unlock_irqrestore(&prv->lock, flags); > -        } > -        else > -        { > -            /* If we don't have vcpus yet, let's just return the > defaults. */ > -            op->u.rtds.period = RTDS_DEFAULT_PERIOD; > -            op->u.rtds.budget = RTDS_DEFAULT_BUDGET; > -        } > +        /* Return the default parameters. > +         * A previous bug fixed here: > +         * The default PERIOD or BUDGET should be divided by > MICROSECS(1), > +         * before returned to upper caller. > +         */ Comment style (missing opening '/*'). Also, putting this kind of things in comments is not at all ideal. If doing this in this patch, you should mention it in the changelog. Or you do it in a separate patch (that you put before this one in the series). I'd say that, in this specific case, is not a big deal which one of the two approaches you take (mentioning or separate patch), but the having a separate one is indeed almost always preferable (e.g., the fix can be backported). > +        spin_lock_irqsave(&prv->lock, flags); > +        op->u.rtds.period = RTDS_DEFAULT_PERIOD / MICROSECS(1); > +        op->u.rtds.budget = RTDS_DEFAULT_BUDGET / MICROSECS(1); > +        spin_unlock_irqrestore(&prv->lock, flags); > I don't think we need to take the lock here... RTDS_DEFAULT_PERIOD is not going to change under our feet. :-) > @@ -1163,6 +1177,78 @@ rt_dom_cntl( >          } >          spin_unlock_irqrestore(&prv->lock, flags); >          break; > +    case XEN_DOMCTL_SCHEDOP_getvcpuinfo: > +        while ( index < op->u.v.nr_vcpus ) > +        { > +            if ( copy_from_guest_offset(&local_sched, > +                                        op->u.v.vcpus, index, 1) ) > +            { > +                rc = -EFAULT; > +                break; > +            } > +            if ( local_sched.vcpuid >= d->max_vcpus || > +                 d->vcpu[local_sched.vcpuid] == NULL ) > +            { > +                rc = -EINVAL; > +                break; > +            } > + > +            spin_lock_irqsave(&prv->lock, flags); > +            svc = rt_vcpu(d->vcpu[local_sched.vcpuid]); > +            local_sched.u.rtds.budget = svc->budget / MICROSECS(1); > +            local_sched.u.rtds.period = svc->period / MICROSECS(1); > +            spin_unlock_irqrestore(&prv->lock, flags); > + > +            if ( copy_to_guest_offset(op->u.v.vcpus, index, > +                                        &local_sched, 1) ) > +            { > +                rc = -EFAULT; > +                break; > +            } > +            if ( (++index > 0x3f) && hypercall_preempt_check() ) > +                break; > So, I know it's 0x3f in XEN_SYSCTL_pcitopoinfo, but unless I'm missing something, I don't see why this can't be "63". I'd also put a short comment right above, something like:  /* Process a most 64 vCPUs without checking for preemptions. */ > +        } > +        if ( !rc ) /* notify upper caller how many vcpus have been > processed */ > Move the comment to the line above (and terminate it with a full stop). And by the way, there's a lot of code repetition here. What about handling get and set together, sort of like this:     case XEN_DOMCTL_SCHEDOP_getvcpuinfo:                                                                              |     case XEN_DOMCTL_SCHEDOP_putvcpuinfo:                                                                              |#ifdef CONFIG_HAS_PCI         while ( index < op->u.v.nr_vcpus )                                                                            |    case XEN_SYSCTL_pcitopoinfo:         {                                                                                                             |    {             if ( copy_from_guest_offset(&local_sched,                                                                 |        xen_sysctl_pcitopoinfo_t *ti = &op->u.pcitopoinfo;                                         op->u.v.vcpus, index, 1) )                                                    |        unsigned int i = 0;             {                                                                                                         |                 rc = -EFAULT;                                                                                         |        if ( guest_handle_is_null(ti->devs) ||                 break;                                                                                                |             guest_handle_is_null(ti->nodes) )             }                                                                                                         |        {                                                                                                                       |            ret = -EINVAL;             if ( local_sched.vcpuid >= d->max_vcpus ||                                                                |            break;                  d->vcpu[local_sched.vcpuid] == NULL )                                                                |        }             {                                                                                                         |                 rc = -EINVAL;                                                                                         |        while ( i < ti->num_devs )                 break;                                                                                                |        {             }                                                                                                         |            physdev_pci_device_t dev;                                                                                                                       |            uint32_t node;             if ( op->cmd == XEN_DOMCTL_SCHEDOP_getvcpuinfo )                                                          |            const struct pci_dev *pdev;             {                                                                                                         |                 spin_lock_irqsave(&prv->lock, flags);                                                                 |            if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )                 svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);                                                           |            {                 local_sched.u.rtds.budget = svc->budget / MICROSECS(1);                                               |                ret = -EFAULT;                 local_sched.u.rtds.period = svc->period / MICROSECS(1);                                               |                break;                 spin_unlock_irqrestore(&prv->lock, flags);                                                            |            }                                                                                                                       |                 if ( copy_to_guest_offset(op->u.v.vcpus, index,                                                       |            pcidevs_lock();                                           &local_sched, 1) )                                                          |            pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn);                 {                                                                                                     |            if ( !pdev )                     rc = -EFAULT;                                                                                     |                node = XEN_INVALID_DEV;                     break;                                                                                            |            else if ( pdev->node == NUMA_NO_NODE )                 }                                                                                                     |                node = XEN_INVALID_NODE_ID;             }                                                                                                         |            else             else                                                                                                      |                node = pdev->node;             {                                                                                                         |            pcidevs_unlock();                 period = MICROSECS(local_sched.u.rtds.period);                                                        |                 budget = MICROSECS(local_sched.u.rtds.budget);                                                        |            if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )                 if ( period > RTDS_MAX_PERIOD || budget < RTDS_MIN_BUDGET ||                                          |            {                      budget > period || period < RTDS_MIN_PERIOD )                                                    |                ret = -EFAULT;                 {                                                                                                     |                break;                     rc = -EINVAL;                                                                                     |            }                     break;                                                                                            |                 }                                                                                                     |            /* Process a most 64 vCPUs without checking for preemptions. */                                                                                                                       |            if ( (++i > 0x3f) && hypercall_preempt_check() )                 spin_lock_irqsave(&prv->lock, flags);                                                                 |                break;                 svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);                                                           |        }                 svc->period = period;                                                                                 |                 svc->budget = budget;                                                                                 |        if ( !ret && (ti->num_devs != i) )                 spin_unlock_irqrestore(&prv->lock, flags);                                                            |        {                                                                                                                       |            ti->num_devs = i;             }                                                                                                         |            if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )             /* Process a most 64 vCPUs without checking for preemptions. */                                           |                ret = -EFAULT;             if ( (++index > 63) && hypercall_preempt_check() )                                                        |        }                 break;                                                                                                |        break;         }                                                                                                             |    }                                                                                                                       |#endif         /* Notify upper caller how many vcpus have been processed. */                                                 |         if ( !rc )                                                                                                    |    case XEN_SYSCTL_tmem_op:             op->u.v.nr_vcpus = index;                                                                                 |        ret = tmem_control(&op->u.tmem_op);         break; I have only compile tested this, but it looks to me that it can fly... > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > + * Set or get info? > + * For schedulers supporting per-vcpu settings (e.g., RTDS): > + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus; > + *  XEN_DOMCTL_SCHEDOP_getinfo gets default params; > + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo sets (gets) params of vcpus; > + * > + * For schedulers not supporting per-vcpu settings: > + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus; > + *  XEN_DOMCTL_SCHEDOP_getinfo gets domain-wise params; > + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo returns error; > + */ >  #define XEN_DOMCTL_SCHEDOP_putinfo 0 >  #define XEN_DOMCTL_SCHEDOP_getinfo 1 > +#define XEN_DOMCTL_SCHEDOP_putvcpuinfo 2 > +#define XEN_DOMCTL_SCHEDOP_getvcpuinfo 3 >  struct xen_domctl_scheduler_op { >      uint32_t sched_id;  /* XEN_SCHEDULER_* */ >      uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */        /* IN/OUT */ >      union { > -        struct xen_domctl_sched_credit { > -            uint16_t weight; > -            uint16_t cap; > -        } credit; > -        struct xen_domctl_sched_credit2 { > -            uint16_t weight; > -        } credit2; > -        struct xen_domctl_sched_rtds { > -            uint32_t period; > -            uint32_t budget; > -        } rtds; > +        xen_domctl_sched_credit_t credit; > +        xen_domctl_sched_credit2_t credit2; > +        xen_domctl_sched_rtds_t rtds; > +        struct { > +            XEN_GUEST_HANDLE_64(xen_domctl_schedparam_vcpu_t) vcpus; > +            /* > +             * IN: Number of elements in vcpus array. > +             * OUT: Number of processed elements of vcpus array. > +             */ > +            uint32_t nr_vcpus; > +            uint32_t padding; > +        } v; >      } u; >  }; > That is: make it even more clear that the whole union is used as IN/OUT. Then, indeed, inside v, what is the meaning of the nr_vcpus field in each direction, as you're doing already. Regards, Dario -- <> (Raistlin Majere) ----------------------------------------------------------------- Dario Faggioli, Ph.D, http://about.me/dario.faggioli Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)