From mboxrd@z Thu Jan  1 00:00:00 1970
From: George Dunlap <george.dunlap@eu.citrix.com>
Subject: Re: [PATCH v2 2/5] xen: rework locking for dump of
 scheduler info (debug-key r)
Date: Mon, 30 Mar 2015 14:34:20 +0100
Message-ID: <551950DC.4040309@eu.citrix.com>
References: <20150317152615.9867.48676.stgit@Solace.station>
	<20150317153301.9867.36514.stgit@Solace.station>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
In-Reply-To: <20150317153301.9867.36514.stgit@Solace.station>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: Dario Faggioli <dario.faggioli@citrix.com>, Xen-devel <xen-devel@lists.xen.org>
Cc: Keir Fraser <keir@xen.org>, Meng Xu <xumengpanda@gmail.com>, Meng Xu <mengxu@cis.upenn.edu>, Jan Beulich <JBeulich@suse.com>
List-Id: xen-devel@lists.xenproject.org

On 03/17/2015 03:33 PM, Dario Faggioli wrote:
> such as it is taken care of by the various schedulers, rather
> than happening in schedule.c. In fact, it is the schedulers
> that know better which locks are necessary for the specific
> dumping operations.
> 
> While there, fix a few style issues (indentation, trailing
> whitespace, parentheses and blank line after var declarations)
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Meng Xu <xumengpanda@gmail.com>
> Cc: Jan Beulich <JBeulich@suse.com>
> Cc: Keir Fraser <keir@xen.org>
> Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>
> ---
> Changes from v1:
>  * take care of SEDF too, as requested during review;
> ---
> As far as tags are concerned, I kept Meng's 'Reviewed-by', as I think this
> applies mostly to chenges to sched_rt.c. I, OTOH, dropped George's one, to
> give him the chance to look at changes to sched_sedf.c.
> ---
>  xen/common/sched_credit.c  |   42 ++++++++++++++++++++++++++++++++++++++++--
>  xen/common/sched_credit2.c |   40 ++++++++++++++++++++++++++++++++--------
>  xen/common/sched_rt.c      |    7 +++++--
>  xen/common/sched_sedf.c    |   16 ++++++++++++++++
>  xen/common/schedule.c      |    5 ++---
>  5 files changed, 95 insertions(+), 15 deletions(-)
> 
> diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
> index bec67ff..953ecb0 100644
> --- a/xen/common/sched_credit.c
> +++ b/xen/common/sched_credit.c
> @@ -26,6 +26,23 @@
>  
>  
>  /*
> + * Locking:
> + * - Scheduler-lock (a.k.a. runqueue lock):
> + *  + is per-runqueue, and there is one runqueue per-cpu;
> + *  + serializes all runqueue manipulation operations;
> + * - Private data lock (a.k.a. private scheduler lock):
> + *  + serializes accesses to the scheduler global state (weight,
> + *    credit, balance_credit, etc);
> + *  + serializes updates to the domains' scheduling parameters.
> + *
> + * Ordering is "private lock always comes first":
> + *  + if we need both locks, we must acquire the private
> + *    scheduler lock for first;
> + *  + if we already own a runqueue lock, we must never acquire
> + *    the private scheduler lock.
> + */
> +
> +/*
>   * Basic constants
>   */
>  #define CSCHED_DEFAULT_WEIGHT       256
> @@ -1750,11 +1767,24 @@ static void
>  csched_dump_pcpu(const struct scheduler *ops, int cpu)
>  {
>      struct list_head *runq, *iter;
> +    struct csched_private *prv = CSCHED_PRIV(ops);
>      struct csched_pcpu *spc;
>      struct csched_vcpu *svc;
> +    spinlock_t *lock = lock;
> +    unsigned long flags;
>      int loop;
>  #define cpustr keyhandler_scratch
>  
> +    /*
> +     * We need both locks:
> +     * - csched_dump_vcpu() wants to access domains' scheduling
> +     *   parameters, which are protected by the private scheduler lock;
> +     * - we scan through the runqueue, so we need the proper runqueue
> +     *   lock (the one of the runqueue of this cpu).
> +     */
> +    spin_lock_irqsave(&prv->lock, flags);
> +    lock = pcpu_schedule_lock(cpu);
> +
>      spc = CSCHED_PCPU(cpu);
>      runq = &spc->runq;
>  
> @@ -1781,6 +1811,9 @@ csched_dump_pcpu(const struct scheduler *ops, int cpu)
>              csched_dump_vcpu(svc);
>          }
>      }
> +
> +    pcpu_schedule_unlock(lock, cpu);
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  #undef cpustr
>  }
>  
> @@ -1792,7 +1825,7 @@ csched_dump(const struct scheduler *ops)
>      int loop;
>      unsigned long flags;
>  
> -    spin_lock_irqsave(&(prv->lock), flags);
> +    spin_lock_irqsave(&prv->lock, flags);
>  
>  #define idlers_buf keyhandler_scratch
>  
> @@ -1835,15 +1868,20 @@ csched_dump(const struct scheduler *ops)
>          list_for_each( iter_svc, &sdom->active_vcpu )
>          {
>              struct csched_vcpu *svc;
> +            spinlock_t *lock;
> +
>              svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
> +            lock = vcpu_schedule_lock(svc->vcpu);
>  
>              printk("\t%3d: ", ++loop);
>              csched_dump_vcpu(svc);
> +
> +            vcpu_schedule_unlock(lock, svc->vcpu);
>          }
>      }
>  #undef idlers_buf
>  
> -    spin_unlock_irqrestore(&(prv->lock), flags);
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }
>  
>  static int
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index be6859a..ae9b359 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -51,8 +51,6 @@
>   * credit2 wiki page:
>   *  http://wiki.xen.org/wiki/Credit2_Scheduler_Development
>   * TODO:
> - * + Immediate bug-fixes
> - *  - Do per-runqueue, grab proper lock for dump debugkey
>   * + Multiple sockets
>   *  - Detect cpu layout and make runqueue map, one per L2 (make_runq_map())
>   *  - Simple load balancer / runqueue assignment
> @@ -1832,12 +1830,24 @@ csched2_dump_vcpu(struct csched2_vcpu *svc)
>  static void
>  csched2_dump_pcpu(const struct scheduler *ops, int cpu)
>  {
> +    struct csched2_private *prv = CSCHED2_PRIV(ops);
>      struct list_head *runq, *iter;
>      struct csched2_vcpu *svc;
> +    unsigned long flags;
> +    spinlock_t *lock;
>      int loop;
>      char cpustr[100];
>  
> -    /* FIXME: Do locking properly for access to runqueue structures */
> +    /*
> +     * We need both locks:
> +     * - csched2_dump_vcpu() wants to access domains' scheduling
> +     *   parameters, which are protected by the private scheduler lock;
> +     * - we scan through the runqueue, so we need the proper runqueue
> +     *   lock (the one of the runqueue this cpu is associated to).
> +     */
> +    spin_lock_irqsave(&prv->lock, flags);
> +    lock = per_cpu(schedule_data, cpu).schedule_lock;
> +    spin_lock(lock);
>  
>      runq = &RQD(ops, cpu)->runq;
>  
> @@ -1864,6 +1874,9 @@ csched2_dump_pcpu(const struct scheduler *ops, int cpu)
>              csched2_dump_vcpu(svc);
>          }
>      }
> +
> +    spin_unlock(lock);
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }
>  
>  static void
> @@ -1871,8 +1884,13 @@ csched2_dump(const struct scheduler *ops)
>  {
>      struct list_head *iter_sdom, *iter_svc;
>      struct csched2_private *prv = CSCHED2_PRIV(ops);
> +    unsigned long flags;
>      int i, loop;
>  
> +    /* We need the private lock as we access global scheduler data
> +     * and (below) the list of active domains. */
> +    spin_lock_irqsave(&prv->lock, flags);
> +
>      printk("Active queues: %d\n"
>             "\tdefault-weight     = %d\n",
>             cpumask_weight(&prv->active_queues),
> @@ -1895,7 +1913,6 @@ csched2_dump(const struct scheduler *ops)
>                 fraction);
>  
>      }
> -    /* FIXME: Locking! */
>  
>      printk("Domain info:\n");
>      loop = 0;
> @@ -1904,20 +1921,27 @@ csched2_dump(const struct scheduler *ops)
>          struct csched2_dom *sdom;
>          sdom = list_entry(iter_sdom, struct csched2_dom, sdom_elem);
>  
> -       printk("\tDomain: %d w %d v %d\n\t", 
> -              sdom->dom->domain_id, 
> -              sdom->weight, 
> -              sdom->nr_vcpus);
> +        printk("\tDomain: %d w %d v %d\n\t",
> +               sdom->dom->domain_id,
> +               sdom->weight,
> +               sdom->nr_vcpus);
>  
>          list_for_each( iter_svc, &sdom->vcpu )
>          {
>              struct csched2_vcpu *svc;
> +            spinlock_t *lock;
> +
>              svc = list_entry(iter_svc, struct csched2_vcpu, sdom_elem);
> +            lock = vcpu_schedule_lock(svc->vcpu);
>  
>              printk("\t%3d: ", ++loop);
>              csched2_dump_vcpu(svc);
> +
> +            vcpu_schedule_unlock(lock, svc->vcpu);
>          }
>      }
> +
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }
>  
>  static void activate_runqueue(struct csched2_private *prv, int rqi)
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2b0b7c6..7c39a9e 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -253,9 +253,12 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
>  static void
>  rt_dump_pcpu(const struct scheduler *ops, int cpu)
>  {
> -    struct rt_vcpu *svc = rt_vcpu(curr_on_cpu(cpu));
> +    struct rt_private *prv = rt_priv(ops);
> +    unsigned long flags;
>  
> -    rt_dump_vcpu(ops, svc);
> +    spin_lock_irqsave(&prv->lock, flags);
> +    rt_dump_vcpu(ops, rt_vcpu(curr_on_cpu(cpu)));
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }
>  
>  static void
> diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
> index c4f4b60..a1a4cb7 100644
> --- a/xen/common/sched_sedf.c
> +++ b/xen/common/sched_sedf.c
> @@ -1206,12 +1206,25 @@ static void sedf_dump_domain(struct vcpu *d)
>  /* Dumps all domains on the specified cpu */
>  static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
>  {
> +    struct sedf_priv_info *prv = SEDF_PRIV(ops);
>      struct list_head      *list, *queue, *tmp;
>      struct sedf_vcpu_info *d_inf;
>      struct domain         *d;
>      struct vcpu    *ed;
> +    spinlock_t *lock;
> +    unsigned long flags;
>      int loop = 0;
>   
> +    /*
> +     * We need both locks, as:
> +     * - we access domains' parameters, which are protected by the
> +     *   private scheduler lock;
> +     * - we scan through the various queues, so we need the proper
> +     *   runqueue lock (i.e., the one for this pCPU).
> +     */
> +    spin_lock_irqsave(&prv->lock, flags);
> +    lock = pcpu_schedule_lock(i);
> +
>      printk("now=%"PRIu64"\n",NOW());
>      queue = RUNQ(i);
>      printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
> @@ -1275,6 +1288,9 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i)
>          }
>      }
>      rcu_read_unlock(&domlist_read_lock);
> +
> +    pcpu_schedule_unlock(lock, i);
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }

We're grabbing the private lock to protect reading the domain-related
data, but AFAICT the only other place the lock is taken is when the data
is being *written* (in sched_adjust()).

Well anyway; you're making it more correct than it was, and you're
fixing the regression in v1 of the patch, so I think this is fine. :-)

Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>

(Also, I have no idea how it got to be nearly 2 weeks without reviewing
this one...)