From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753150Ab0CCPgq (ORCPT ); Wed, 3 Mar 2010 10:36:46 -0500 Received: from mx1.redhat.com ([209.132.183.28]:7213 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752446Ab0CCPgp (ORCPT ); Wed, 3 Mar 2010 10:36:45 -0500 Date: Wed, 3 Mar 2010 10:36:35 -0500 From: Vivek Goyal To: Gui Jianfeng Cc: Jens Axboe , linux kernel mailing list Subject: Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Message-ID: <20100303153635.GB2866@redhat.com> References: <4B8DFFD7.2000908@cn.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <4B8DFFD7.2000908@cn.fujitsu.com> User-Agent: Mutt/1.5.19 (2009-01-05) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote: > Currently, IO Controller makes use of blkio.weight to assign weight for > all devices. Here a new user interface "blkio.policy" is introduced to > assign different weights for different devices. blkio.weight becomes the > default value for devices which are not configured by blkio.policy. > > You can use the following format to assigned specific weight for a given > device: > #echo "major:minor weight" > blkio.policy > > major:minor represents device number. > > And you can remove a policy as following: > #echo "major:minor 0" > blkio.policy > > Signed-off-by: Gui Jianfeng > --- > block/blk-cgroup.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++ > block/blk-cgroup.h | 9 ++ > block/cfq-iosched.c | 2 +- > 3 files changed, 258 insertions(+), 1 deletions(-) > > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c > index e7dbbaf..1fddb98 100644 > --- a/block/blk-cgroup.c > +++ b/block/blk-cgroup.c > @@ -16,6 +16,7 @@ > #include > #include > #include "blk-cgroup.h" > +#include > > static DEFINE_SPINLOCK(blkio_list_lock); > static LIST_HEAD(blkio_list); > @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list); > struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; > EXPORT_SYMBOL_GPL(blkio_root_cgroup); > > +static inline void policy_insert_node(struct blkio_cgroup *blkcg, > + struct io_policy_node *pn); ^^^^^^^^^^ How about blkio_policy_node? So this structure will represent per cgroup per device policy as specified by user. We can re-use the same structure if there are more per cgroup per device policies down the line. > +static inline void policy_delete_node(struct io_policy_node *pn); > +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg, > + dev_t dev); Not very sure about it but may be a good idea to prefix "blkio_" or "blkcg_" before all the functions you are defining. Just some uniformity in function names like cfq. > + > bool blkiocg_css_tryget(struct blkio_cgroup *blkcg) You might want to rebase it on Jen's for-linus branch as blkiocg_css_tryget() is gone in that branch Apart from above minor nits, this patch looks good to me. Will do some testing on it. Thanks Vivek > { > if (!css_tryget(&blkcg->css)) > @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) > struct blkio_group *blkg; > struct hlist_node *n; > struct blkio_policy_type *blkiop; > + struct io_policy_node *pn; > > if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) > return -EINVAL; > @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) > spin_lock(&blkio_list_lock); > spin_lock_irq(&blkcg->lock); > blkcg->weight = (unsigned int)val; > + > hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { > + pn = policy_search_node(blkcg, blkg->dev); > + > + if (pn) > + continue; Ok, so if we have specified a weight for a device, then blkio.weight update does not affect the weight on that device. Makes sense. > + > list_for_each_entry(blkiop, &blkio_list, list) > blkiop->ops.blkio_update_group_weight_fn(blkg, > blkcg->weight); > @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, > EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats); > #endif > > +static int check_dev_num(dev_t dev) > +{ > + int part = 0; > + struct gendisk *disk; > + > + disk = get_gendisk(dev, &part); > + > + if (!disk || part) > + return -ENODEV; > + > + return 0; > +} > + > +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn) > +{ > + char *s[4], *p, *major_s = NULL, *minor_s = NULL; > + int ret; > + unsigned long major, minor, temp; > + int i = 0; > + dev_t dev; > + > + memset(s, 0, sizeof(s)); > + > + while ((p = strsep(&buf, " ")) != NULL) { > + if (!*p) > + continue; > + > + s[i++] = p; > + > + /* Prevent from inputing too many things */ > + if (i == 3) > + break; > + } > + > + if (i != 2) > + return -EINVAL; > + > + p = strsep(&s[0], ":"); > + > + if (p != NULL) > + major_s = p; > + else > + return -EINVAL; > + > + minor_s = s[0]; > + > + if (!minor_s) > + return -EINVAL; > + > + ret = strict_strtoul(major_s, 10, &major); > + if (ret) > + return -EINVAL; > + > + ret = strict_strtoul(minor_s, 10, &minor); > + if (ret) > + return -EINVAL; > + > + dev = MKDEV(major, minor); > + > + ret = check_dev_num(dev); > + if (ret) > + return ret; > + > + newpn->dev = dev; > + > + if (s[1] == NULL) > + return -EINVAL; > + > + ret = strict_strtoul(s[1], 10, &temp); > + if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || > + temp > BLKIO_WEIGHT_MAX) > + return -EINVAL; > + > + newpn->weight = temp; > + > + return 0; > +} > + > +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, > + dev_t dev) > +{ > + struct io_policy_node *pn; > + > + pn = policy_search_node(blkcg, dev); > + > + if (pn) > + return pn->weight; > + else > + return blkcg->weight; > +} > +EXPORT_SYMBOL_GPL(blkcg_get_weight); > + > +static inline void policy_insert_node(struct blkio_cgroup *blkcg, > + struct io_policy_node *pn) > +{ > + list_add(&pn->node, &blkcg->policy_list); > +} > + > +/* Must be called with blkcg->lock held */ > +static inline void policy_delete_node(struct io_policy_node *pn) > +{ > + list_del(&pn->node); > +} > + > +/* Must be called with blkcg->lock held */ > +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg, > + dev_t dev) > +{ > + struct io_policy_node *pn; > + > + if (list_empty(&blkcg->policy_list)) > + return NULL; > + > + list_for_each_entry(pn, &blkcg->policy_list, node) { > + if (pn->dev == dev) > + return pn; > + } > + > + return NULL; > +} > + > + > +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft, > + const char *buffer) > +{ > + int ret = 0; > + char *buf; > + struct io_policy_node *newpn, *pn; > + struct blkio_cgroup *blkcg; > + struct blkio_group *blkg; > + int keep_newpn = 0; > + struct hlist_node *n; > + struct blkio_policy_type *blkiop; > + > + buf = kstrdup(buffer, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + > + newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); > + if (!newpn) { > + ret = -ENOMEM; > + goto free_buf; > + } > + > + ret = policy_parse_and_set(buf, newpn); > + if (ret) > + goto free_newpn; > + > + blkcg = cgroup_to_blkio_cgroup(cgrp); > + > + spin_lock_irq(&blkcg->lock); > + > + pn = policy_search_node(blkcg, newpn->dev); > + if (!pn) { > + if (newpn->weight != 0) { > + policy_insert_node(blkcg, newpn); > + keep_newpn = 1; > + } > + spin_unlock_irq(&blkcg->lock); > + goto update_io_group; > + } > + > + if (newpn->weight == 0) { > + /* weight == 0 means deleteing a policy */ > + policy_delete_node(pn); > + spin_unlock_irq(&blkcg->lock); > + goto update_io_group; > + } > + spin_unlock_irq(&blkcg->lock); > + > + pn->weight = newpn->weight; > + > +update_io_group: > + /* update weight for each cfqg */ > + spin_lock(&blkio_list_lock); > + spin_lock_irq(&blkcg->lock); > + > + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { > + if (newpn->dev == blkg->dev) { > + list_for_each_entry(blkiop, &blkio_list, list) > + blkiop->ops.blkio_update_group_weight_fn(blkg, > + newpn->weight ? > + newpn->weight : > + blkcg->weight); > + } > + } > + > + spin_unlock_irq(&blkcg->lock); > + spin_unlock(&blkio_list_lock); > + > +free_newpn: > + if (!keep_newpn) > + kfree(newpn); > +free_buf: > + kfree(buf); > + return ret; > +} > + > +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft, > + struct seq_file *m) > +{ > + struct blkio_cgroup *blkcg; > + struct io_policy_node *pn; > + > + blkcg = cgroup_to_blkio_cgroup(cgrp); > + > + if (list_empty(&blkcg->policy_list)) > + goto out; > + > + seq_printf(m, "dev\tweight\n"); > + > + spin_lock_irq(&blkcg->lock); > + list_for_each_entry(pn, &blkcg->policy_list, node) { > + seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), > + MINOR(pn->dev), pn->weight); > + } > + spin_unlock_irq(&blkcg->lock); > +out: > + return 0; > +} > + > struct cftype blkio_files[] = { > { > + .name = "policy", > + .read_seq_string = blkiocg_policy_read, > + .write_string = blkiocg_policy_write, > + .max_write_len = 256, > + }, > + { > .name = "weight", > .read_u64 = blkiocg_weight_read, > .write_u64 = blkiocg_weight_write, > @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) > struct blkio_group *blkg; > void *key; > struct blkio_policy_type *blkiop; > + struct io_policy_node *pn, *pntmp; > > rcu_read_lock(); > remove_entry: > @@ -264,7 +506,12 @@ remove_entry: > blkiop->ops.blkio_unlink_group_fn(key, blkg); > spin_unlock(&blkio_list_lock); > goto remove_entry; > + > done: > + list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { > + policy_delete_node(pn); > + kfree(pn); > + } > free_css_id(&blkio_subsys, &blkcg->css); > rcu_read_unlock(); > kfree(blkcg); > @@ -294,6 +541,7 @@ done: > spin_lock_init(&blkcg->lock); > INIT_HLIST_HEAD(&blkcg->blkg_list); > > + INIT_LIST_HEAD(&blkcg->policy_list); > return &blkcg->css; > } > > diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h > index 4d316df..0551aa1 100644 > --- a/block/blk-cgroup.h > +++ b/block/blk-cgroup.h > @@ -22,6 +22,7 @@ struct blkio_cgroup { > unsigned int weight; > spinlock_t lock; > struct hlist_head blkg_list; > + struct list_head policy_list; /* list of io_policy_node */ > }; > > struct blkio_group { > @@ -43,8 +44,16 @@ struct blkio_group { > unsigned long sectors; > }; > > +struct io_policy_node { > + struct list_head node; > + dev_t dev; > + unsigned int weight; > +}; > + > extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg); > extern void blkiocg_css_put(struct blkio_cgroup *blkcg); > +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, > + dev_t dev); > > typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); > typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c > index 023f4e6..3d0dea1 100644 > --- a/block/cfq-iosched.c > +++ b/block/cfq-iosched.c > @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) > if (!cfqg) > goto done; > > - cfqg->weight = blkcg->weight; > for_each_cfqg_st(cfqg, i, j, st) > *st = CFQ_RB_ROOT; > RB_CLEAR_NODE(&cfqg->rb_node); > @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) > sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); > blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, > MKDEV(major, minor)); > + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); > > /* Add group on cfqd list */ > hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); > -- > 1.5.4.rc3