All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
@ 2010-03-03  6:21 Gui Jianfeng
  2010-03-03  9:01 ` Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-03  6:21 UTC (permalink / raw)
  To: Jens Axboe, Vivek Goyal; +Cc: linux kernel mailing list

Currently, IO Controller makes use of blkio.weight to assign weight for
all devices. Here a new user interface "blkio.policy" is introduced to 
assign different weights for different devices. blkio.weight becomes the
default value for devices which are not configured by blkio.policy.

You can use the following format to assigned specific weight for a given
device:
#echo "major:minor weight" > blkio.policy

major:minor represents device number.

And you can remove a policy as following:
#echo "major:minor 0" > blkio.policy

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-cgroup.h  |    9 ++
 block/cfq-iosched.c |    2 +-
 3 files changed, 258 insertions(+), 1 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e7dbbaf..1fddb98 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include "blk-cgroup.h"
+#include <linux/genhd.h>
 
 static DEFINE_SPINLOCK(blkio_list_lock);
 static LIST_HEAD(blkio_list);
@@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static inline void policy_insert_node(struct blkio_cgroup *blkcg,
+				      struct io_policy_node *pn);
+static inline void policy_delete_node(struct io_policy_node *pn);
+static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
+						 dev_t dev);
+
 bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
 {
 	if (!css_tryget(&blkcg->css))
@@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	struct blkio_group *blkg;
 	struct hlist_node *n;
 	struct blkio_policy_type *blkiop;
+	struct io_policy_node *pn;
 
 	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
 		return -EINVAL;
@@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
 	blkcg->weight = (unsigned int)val;
+
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		pn = policy_search_node(blkcg, blkg->dev);
+
+		if (pn)
+			continue;
+
 		list_for_each_entry(blkiop, &blkio_list, list)
 			blkiop->ops.blkio_update_group_weight_fn(blkg,
 					blkcg->weight);
@@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
 EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
 #endif
 
+static int check_dev_num(dev_t dev)
+{
+	int part = 0;
+	struct gendisk *disk;
+
+	disk = get_gendisk(dev, &part);
+
+	if (!disk || part)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
+{
+	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+	int ret;
+	unsigned long major, minor, temp;
+	int i = 0;
+	dev_t dev;
+
+	memset(s, 0, sizeof(s));
+
+	while ((p = strsep(&buf, " ")) != NULL) {
+		if (!*p)
+			continue;
+
+		s[i++] = p;
+
+		/* Prevent from inputing too many things */
+		if (i == 3)
+			break;
+	}
+
+	if (i != 2)
+		return -EINVAL;
+
+	p = strsep(&s[0], ":");
+
+	if (p != NULL)
+		major_s = p;
+	else
+		return -EINVAL;
+
+	minor_s = s[0];
+
+	if (!minor_s)
+		return -EINVAL;
+
+	ret = strict_strtoul(major_s, 10, &major);
+	if (ret)
+		return -EINVAL;
+
+	ret = strict_strtoul(minor_s, 10, &minor);
+	if (ret)
+		return -EINVAL;
+
+	dev = MKDEV(major, minor);
+
+	ret = check_dev_num(dev);
+	if (ret)
+		return ret;
+
+	newpn->dev = dev;
+
+	if (s[1] == NULL)
+		return -EINVAL;
+
+	ret = strict_strtoul(s[1], 10, &temp);
+	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+	    temp > BLKIO_WEIGHT_MAX)
+		return -EINVAL;
+
+	newpn->weight =  temp;
+
+	return 0;
+}
+
+unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+				    dev_t dev)
+{
+	struct io_policy_node *pn;
+
+	pn = policy_search_node(blkcg, dev);
+
+	if (pn)
+		return pn->weight;
+	else
+		return blkcg->weight;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_weight);
+
+static inline void policy_insert_node(struct blkio_cgroup *blkcg,
+				      struct io_policy_node *pn)
+{
+	list_add(&pn->node, &blkcg->policy_list);
+}
+
+/* Must be called with blkcg->lock held */
+static inline void policy_delete_node(struct io_policy_node *pn)
+{
+	list_del(&pn->node);
+}
+
+/* Must be called with blkcg->lock held */
+static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
+						 dev_t dev)
+{
+	struct io_policy_node *pn;
+
+	if (list_empty(&blkcg->policy_list))
+		return NULL;
+
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		if (pn->dev == dev)
+			return pn;
+	}
+
+	return NULL;
+}
+
+
+static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
+				const char *buffer)
+{
+	int ret = 0;
+	char *buf;
+	struct io_policy_node *newpn, *pn;
+	struct blkio_cgroup *blkcg;
+	struct blkio_group *blkg;
+	int keep_newpn = 0;
+	struct hlist_node *n;
+	struct blkio_policy_type *blkiop;
+
+	buf = kstrdup(buffer, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
+	if (!newpn) {
+		ret = -ENOMEM;
+		goto free_buf;
+	}
+
+	ret = policy_parse_and_set(buf, newpn);
+	if (ret)
+		goto free_newpn;
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	spin_lock_irq(&blkcg->lock);
+
+	pn = policy_search_node(blkcg, newpn->dev);
+	if (!pn) {
+		if (newpn->weight != 0) {
+			policy_insert_node(blkcg, newpn);
+			keep_newpn = 1;
+		}
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+
+	if (newpn->weight == 0) {
+		/* weight == 0 means deleteing a policy */
+		policy_delete_node(pn);
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+	spin_unlock_irq(&blkcg->lock);
+
+	pn->weight = newpn->weight;
+
+update_io_group:
+	/* update weight for each cfqg */
+	spin_lock(&blkio_list_lock);
+	spin_lock_irq(&blkcg->lock);
+
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		if (newpn->dev == blkg->dev) {
+			list_for_each_entry(blkiop, &blkio_list, list)
+				blkiop->ops.blkio_update_group_weight_fn(blkg,
+						newpn->weight ?
+						newpn->weight :
+						blkcg->weight);
+		}
+	}
+
+	spin_unlock_irq(&blkcg->lock);
+	spin_unlock(&blkio_list_lock);
+
+free_newpn:
+	if (!keep_newpn)
+		kfree(newpn);
+free_buf:
+	kfree(buf);
+	return ret;
+}
+
+static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
+			       struct seq_file *m)
+{
+	struct blkio_cgroup *blkcg;
+	struct io_policy_node *pn;
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	if (list_empty(&blkcg->policy_list))
+		goto out;
+
+	seq_printf(m, "dev\tweight\n");
+
+	spin_lock_irq(&blkcg->lock);
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
+			   MINOR(pn->dev), pn->weight);
+	}
+	spin_unlock_irq(&blkcg->lock);
+out:
+	return 0;
+}
+
 struct cftype blkio_files[] = {
 	{
+		.name = "policy",
+		.read_seq_string = blkiocg_policy_read,
+		.write_string = blkiocg_policy_write,
+		.max_write_len = 256,
+	},
+	{
 		.name = "weight",
 		.read_u64 = blkiocg_weight_read,
 		.write_u64 = blkiocg_weight_write,
@@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_group *blkg;
 	void *key;
 	struct blkio_policy_type *blkiop;
+	struct io_policy_node *pn, *pntmp;
 
 	rcu_read_lock();
 remove_entry:
@@ -264,7 +506,12 @@ remove_entry:
 		blkiop->ops.blkio_unlink_group_fn(key, blkg);
 	spin_unlock(&blkio_list_lock);
 	goto remove_entry;
+
 done:
+	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
+		policy_delete_node(pn);
+		kfree(pn);
+	}
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
 	kfree(blkcg);
@@ -294,6 +541,7 @@ done:
 	spin_lock_init(&blkcg->lock);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 
+	INIT_LIST_HEAD(&blkcg->policy_list);
 	return &blkcg->css;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 4d316df..0551aa1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -22,6 +22,7 @@ struct blkio_cgroup {
 	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
+	struct list_head policy_list; /* list of io_policy_node */
 };
 
 struct blkio_group {
@@ -43,8 +44,16 @@ struct blkio_group {
 	unsigned long sectors;
 };
 
+struct io_policy_node {
+	struct list_head node;
+	dev_t dev;
+	unsigned int weight;
+};
+
 extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
 extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
+extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+				     dev_t dev);
 
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 023f4e6..3d0dea1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	if (!cfqg)
 		goto done;
 
-	cfqg->weight = blkcg->weight;
 	for_each_cfqg_st(cfqg, i, j, st)
 		*st = CFQ_RB_ROOT;
 	RB_CLEAR_NODE(&cfqg->rb_node);
@@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
 	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
 					MKDEV(major, minor));
+	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
 	/* Add group on cfqd list */
 	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-- 
1.5.4.rc3


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03  6:21 [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Gui Jianfeng
@ 2010-03-03  9:01 ` Jens Axboe
  2010-03-03 14:33 ` Vivek Goyal
  2010-03-03 15:36 ` Vivek Goyal
  2 siblings, 0 replies; 40+ messages in thread
From: Jens Axboe @ 2010-03-03  9:01 UTC (permalink / raw)
  To: Gui Jianfeng; +Cc: Vivek Goyal, linux kernel mailing list

On Wed, Mar 03 2010, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.policy" is introduced to 
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by blkio.policy.
> 
> You can use the following format to assigned specific weight for a given
> device:
> #echo "major:minor weight" > blkio.policy
> 
> major:minor represents device number.
> 
> And you can remove a policy as following:
> #echo "major:minor 0" > blkio.policy

Looks good to me. Vivek, any objections?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03  6:21 [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Gui Jianfeng
  2010-03-03  9:01 ` Jens Axboe
@ 2010-03-03 14:33 ` Vivek Goyal
  2010-03-03 15:47   ` Vivek Goyal
  2010-03-04  0:27   ` Gui Jianfeng
  2010-03-03 15:36 ` Vivek Goyal
  2 siblings, 2 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-03 14:33 UTC (permalink / raw)
  To: Gui Jianfeng; +Cc: Jens Axboe, linux kernel mailing list

On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.policy" is introduced to 
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by blkio.policy.
> 
> You can use the following format to assigned specific weight for a given
> device:
> #echo "major:minor weight" > blkio.policy
> 

Hi Gui,

Can we use a different name for per device weight file name than "blkio.policy".
May be "blkio.weight_device" or "blkio.weight_per_device" etc.

The reason being that "blkio.policy" name might be more suitable to switch
between differnt kind of BW control policies (proportional, max bandwidth etc),
once we implement some kind of max BW controlling policies also. So it
might be a good idea to not use that file name for specifying per device
weights.

Secondly, where is it useful. I mean do you have a scenario where you will
use it. Why I am asking is that we never had ioprio per device kind of
interface. ioprio was associated with task and was global and same on
every device in the system.

I am wondering, what are the scenarios where an application is more important
on one device and less important on other device to warrant different weights
on different devices.

Thanks
Vivek
 
 
> major:minor represents device number.
> 
> And you can remove a policy as following:
> #echo "major:minor 0" > blkio.policy
> 
> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> ---
>  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/blk-cgroup.h  |    9 ++
>  block/cfq-iosched.c |    2 +-
>  3 files changed, 258 insertions(+), 1 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index e7dbbaf..1fddb98 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -16,6 +16,7 @@
>  #include <linux/module.h>
>  #include <linux/err.h>
>  #include "blk-cgroup.h"
> +#include <linux/genhd.h>
>  
>  static DEFINE_SPINLOCK(blkio_list_lock);
>  static LIST_HEAD(blkio_list);
> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>  
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn);
> +static inline void policy_delete_node(struct io_policy_node *pn);
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev);
> +
>  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
>  {
>  	if (!css_tryget(&blkcg->css))
> @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	struct blkio_group *blkg;
>  	struct hlist_node *n;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn;
>  
>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>  		return -EINVAL;
> @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	spin_lock(&blkio_list_lock);
>  	spin_lock_irq(&blkcg->lock);
>  	blkcg->weight = (unsigned int)val;
> +
>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		pn = policy_search_node(blkcg, blkg->dev);
> +
> +		if (pn)
> +			continue;
> +
>  		list_for_each_entry(blkiop, &blkio_list, list)
>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>  					blkcg->weight);
> @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>  #endif
>  
> +static int check_dev_num(dev_t dev)
> +{
> +	int part = 0;
> +	struct gendisk *disk;
> +
> +	disk = get_gendisk(dev, &part);
> +
> +	if (!disk || part)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
> +{
> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> +	int ret;
> +	unsigned long major, minor, temp;
> +	int i = 0;
> +	dev_t dev;
> +
> +	memset(s, 0, sizeof(s));
> +
> +	while ((p = strsep(&buf, " ")) != NULL) {
> +		if (!*p)
> +			continue;
> +
> +		s[i++] = p;
> +
> +		/* Prevent from inputing too many things */
> +		if (i == 3)
> +			break;
> +	}
> +
> +	if (i != 2)
> +		return -EINVAL;
> +
> +	p = strsep(&s[0], ":");
> +
> +	if (p != NULL)
> +		major_s = p;
> +	else
> +		return -EINVAL;
> +
> +	minor_s = s[0];
> +
> +	if (!minor_s)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(major_s, 10, &major);
> +	if (ret)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(minor_s, 10, &minor);
> +	if (ret)
> +		return -EINVAL;
> +
> +	dev = MKDEV(major, minor);
> +
> +	ret = check_dev_num(dev);
> +	if (ret)
> +		return ret;
> +
> +	newpn->dev = dev;
> +
> +	if (s[1] == NULL)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(s[1], 10, &temp);
> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +	    temp > BLKIO_WEIGHT_MAX)
> +		return -EINVAL;
> +
> +	newpn->weight =  temp;
> +
> +	return 0;
> +}
> +
> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				    dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	pn = policy_search_node(blkcg, dev);
> +
> +	if (pn)
> +		return pn->weight;
> +	else
> +		return blkcg->weight;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> +
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn)
> +{
> +	list_add(&pn->node, &blkcg->policy_list);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static inline void policy_delete_node(struct io_policy_node *pn)
> +{
> +	list_del(&pn->node);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	if (list_empty(&blkcg->policy_list))
> +		return NULL;
> +
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		if (pn->dev == dev)
> +			return pn;
> +	}
> +
> +	return NULL;
> +}
> +
> +
> +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
> +				const char *buffer)
> +{
> +	int ret = 0;
> +	char *buf;
> +	struct io_policy_node *newpn, *pn;
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_group *blkg;
> +	int keep_newpn = 0;
> +	struct hlist_node *n;
> +	struct blkio_policy_type *blkiop;
> +
> +	buf = kstrdup(buffer, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> +	if (!newpn) {
> +		ret = -ENOMEM;
> +		goto free_buf;
> +	}
> +
> +	ret = policy_parse_and_set(buf, newpn);
> +	if (ret)
> +		goto free_newpn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	spin_lock_irq(&blkcg->lock);
> +
> +	pn = policy_search_node(blkcg, newpn->dev);
> +	if (!pn) {
> +		if (newpn->weight != 0) {
> +			policy_insert_node(blkcg, newpn);
> +			keep_newpn = 1;
> +		}
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +
> +	if (newpn->weight == 0) {
> +		/* weight == 0 means deleteing a policy */
> +		policy_delete_node(pn);
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +
> +	pn->weight = newpn->weight;
> +
> +update_io_group:
> +	/* update weight for each cfqg */
> +	spin_lock(&blkio_list_lock);
> +	spin_lock_irq(&blkcg->lock);
> +
> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		if (newpn->dev == blkg->dev) {
> +			list_for_each_entry(blkiop, &blkio_list, list)
> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> +						newpn->weight ?
> +						newpn->weight :
> +						blkcg->weight);
> +		}
> +	}
> +
> +	spin_unlock_irq(&blkcg->lock);
> +	spin_unlock(&blkio_list_lock);
> +
> +free_newpn:
> +	if (!keep_newpn)
> +		kfree(newpn);
> +free_buf:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
> +			       struct seq_file *m)
> +{
> +	struct blkio_cgroup *blkcg;
> +	struct io_policy_node *pn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	if (list_empty(&blkcg->policy_list))
> +		goto out;
> +
> +	seq_printf(m, "dev\tweight\n");
> +
> +	spin_lock_irq(&blkcg->lock);
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +			   MINOR(pn->dev), pn->weight);
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +out:
> +	return 0;
> +}
> +
>  struct cftype blkio_files[] = {
>  	{
> +		.name = "policy",
> +		.read_seq_string = blkiocg_policy_read,
> +		.write_string = blkiocg_policy_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.read_u64 = blkiocg_weight_read,
>  		.write_u64 = blkiocg_weight_write,
> @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>  	struct blkio_group *blkg;
>  	void *key;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn, *pntmp;
>  
>  	rcu_read_lock();
>  remove_entry:
> @@ -264,7 +506,12 @@ remove_entry:
>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>  	spin_unlock(&blkio_list_lock);
>  	goto remove_entry;
> +
>  done:
> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> +		policy_delete_node(pn);
> +		kfree(pn);
> +	}
>  	free_css_id(&blkio_subsys, &blkcg->css);
>  	rcu_read_unlock();
>  	kfree(blkcg);
> @@ -294,6 +541,7 @@ done:
>  	spin_lock_init(&blkcg->lock);
>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>  
> +	INIT_LIST_HEAD(&blkcg->policy_list);
>  	return &blkcg->css;
>  }
>  
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 4d316df..0551aa1 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>  	unsigned int weight;
>  	spinlock_t lock;
>  	struct hlist_head blkg_list;
> +	struct list_head policy_list; /* list of io_policy_node */
>  };
>  
>  struct blkio_group {
> @@ -43,8 +44,16 @@ struct blkio_group {
>  	unsigned long sectors;
>  };
>  
> +struct io_policy_node {
> +	struct list_head node;
> +	dev_t dev;
> +	unsigned int weight;
> +};
> +
>  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
>  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				     dev_t dev);
>  
>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 023f4e6..3d0dea1 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	if (!cfqg)
>  		goto done;
>  
> -	cfqg->weight = blkcg->weight;
>  	for_each_cfqg_st(cfqg, i, j, st)
>  		*st = CFQ_RB_ROOT;
>  	RB_CLEAR_NODE(&cfqg->rb_node);
> @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>  					MKDEV(major, minor));
> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>  
>  	/* Add group on cfqd list */
>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> -- 
> 1.5.4.rc3

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03  6:21 [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Gui Jianfeng
  2010-03-03  9:01 ` Jens Axboe
  2010-03-03 14:33 ` Vivek Goyal
@ 2010-03-03 15:36 ` Vivek Goyal
  2010-03-04  0:29   ` Gui Jianfeng
  2 siblings, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-03 15:36 UTC (permalink / raw)
  To: Gui Jianfeng; +Cc: Jens Axboe, linux kernel mailing list

On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.policy" is introduced to 
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by blkio.policy.
> 
> You can use the following format to assigned specific weight for a given
> device:
> #echo "major:minor weight" > blkio.policy
> 
> major:minor represents device number.
> 
> And you can remove a policy as following:
> #echo "major:minor 0" > blkio.policy
> 
> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> ---
>  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/blk-cgroup.h  |    9 ++
>  block/cfq-iosched.c |    2 +-
>  3 files changed, 258 insertions(+), 1 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index e7dbbaf..1fddb98 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -16,6 +16,7 @@
>  #include <linux/module.h>
>  #include <linux/err.h>
>  #include "blk-cgroup.h"
> +#include <linux/genhd.h>
>  
>  static DEFINE_SPINLOCK(blkio_list_lock);
>  static LIST_HEAD(blkio_list);
> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>  
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn);
					     ^^^^^^^^^^
How about blkio_policy_node?

So this structure will represent per cgroup per device policy as specified
by user. We can re-use the same structure if there are more per cgroup
per device policies down the line. 

> +static inline void policy_delete_node(struct io_policy_node *pn);
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev);

Not very sure about it but may be a good idea to prefix "blkio_" or
"blkcg_" before all the functions you are defining. Just some uniformity in
function names like cfq.

> +
>  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)

You might want to rebase it on Jen's for-linus branch as blkiocg_css_tryget()
is gone in that branch

Apart from above minor nits, this patch looks good to me. Will do some
testing on it.

Thanks
Vivek

>  {
>  	if (!css_tryget(&blkcg->css))
> @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	struct blkio_group *blkg;
>  	struct hlist_node *n;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn;
>  
>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>  		return -EINVAL;
> @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	spin_lock(&blkio_list_lock);
>  	spin_lock_irq(&blkcg->lock);
>  	blkcg->weight = (unsigned int)val;
> +
>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		pn = policy_search_node(blkcg, blkg->dev);
> +
> +		if (pn)
> +			continue;

Ok, so if we have specified a weight for a device, then blkio.weight
update does not affect the weight on that device. Makes sense.

> +
>  		list_for_each_entry(blkiop, &blkio_list, list)
>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>  					blkcg->weight);
> @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>  #endif
>  
> +static int check_dev_num(dev_t dev)
> +{
> +	int part = 0;
> +	struct gendisk *disk;
> +
> +	disk = get_gendisk(dev, &part);
> +
> +	if (!disk || part)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
> +{
> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> +	int ret;
> +	unsigned long major, minor, temp;
> +	int i = 0;
> +	dev_t dev;
> +
> +	memset(s, 0, sizeof(s));
> +
> +	while ((p = strsep(&buf, " ")) != NULL) {
> +		if (!*p)
> +			continue;
> +
> +		s[i++] = p;
> +
> +		/* Prevent from inputing too many things */
> +		if (i == 3)
> +			break;
> +	}
> +
> +	if (i != 2)
> +		return -EINVAL;
> +
> +	p = strsep(&s[0], ":");
> +
> +	if (p != NULL)
> +		major_s = p;
> +	else
> +		return -EINVAL;
> +
> +	minor_s = s[0];
> +
> +	if (!minor_s)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(major_s, 10, &major);
> +	if (ret)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(minor_s, 10, &minor);
> +	if (ret)
> +		return -EINVAL;
> +
> +	dev = MKDEV(major, minor);
> +
> +	ret = check_dev_num(dev);
> +	if (ret)
> +		return ret;
> +
> +	newpn->dev = dev;
> +
> +	if (s[1] == NULL)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(s[1], 10, &temp);
> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +	    temp > BLKIO_WEIGHT_MAX)
> +		return -EINVAL;
> +
> +	newpn->weight =  temp;
> +
> +	return 0;
> +}
> +
> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				    dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	pn = policy_search_node(blkcg, dev);
> +
> +	if (pn)
> +		return pn->weight;
> +	else
> +		return blkcg->weight;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> +
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn)
> +{
> +	list_add(&pn->node, &blkcg->policy_list);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static inline void policy_delete_node(struct io_policy_node *pn)
> +{
> +	list_del(&pn->node);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	if (list_empty(&blkcg->policy_list))
> +		return NULL;
> +
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		if (pn->dev == dev)
> +			return pn;
> +	}
> +
> +	return NULL;
> +}
> +
> +
> +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
> +				const char *buffer)
> +{
> +	int ret = 0;
> +	char *buf;
> +	struct io_policy_node *newpn, *pn;
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_group *blkg;
> +	int keep_newpn = 0;
> +	struct hlist_node *n;
> +	struct blkio_policy_type *blkiop;
> +
> +	buf = kstrdup(buffer, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> +	if (!newpn) {
> +		ret = -ENOMEM;
> +		goto free_buf;
> +	}
> +
> +	ret = policy_parse_and_set(buf, newpn);
> +	if (ret)
> +		goto free_newpn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	spin_lock_irq(&blkcg->lock);
> +
> +	pn = policy_search_node(blkcg, newpn->dev);
> +	if (!pn) {
> +		if (newpn->weight != 0) {
> +			policy_insert_node(blkcg, newpn);
> +			keep_newpn = 1;
> +		}
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +
> +	if (newpn->weight == 0) {
> +		/* weight == 0 means deleteing a policy */
> +		policy_delete_node(pn);
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +
> +	pn->weight = newpn->weight;
> +
> +update_io_group:
> +	/* update weight for each cfqg */
> +	spin_lock(&blkio_list_lock);
> +	spin_lock_irq(&blkcg->lock);
> +
> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		if (newpn->dev == blkg->dev) {
> +			list_for_each_entry(blkiop, &blkio_list, list)
> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> +						newpn->weight ?
> +						newpn->weight :
> +						blkcg->weight);
> +		}
> +	}
> +
> +	spin_unlock_irq(&blkcg->lock);
> +	spin_unlock(&blkio_list_lock);
> +
> +free_newpn:
> +	if (!keep_newpn)
> +		kfree(newpn);
> +free_buf:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
> +			       struct seq_file *m)
> +{
> +	struct blkio_cgroup *blkcg;
> +	struct io_policy_node *pn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	if (list_empty(&blkcg->policy_list))
> +		goto out;
> +
> +	seq_printf(m, "dev\tweight\n");
> +
> +	spin_lock_irq(&blkcg->lock);
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +			   MINOR(pn->dev), pn->weight);
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +out:
> +	return 0;
> +}
> +
>  struct cftype blkio_files[] = {
>  	{
> +		.name = "policy",
> +		.read_seq_string = blkiocg_policy_read,
> +		.write_string = blkiocg_policy_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.read_u64 = blkiocg_weight_read,
>  		.write_u64 = blkiocg_weight_write,
> @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>  	struct blkio_group *blkg;
>  	void *key;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn, *pntmp;
>  
>  	rcu_read_lock();
>  remove_entry:
> @@ -264,7 +506,12 @@ remove_entry:
>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>  	spin_unlock(&blkio_list_lock);
>  	goto remove_entry;
> +
>  done:
> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> +		policy_delete_node(pn);
> +		kfree(pn);
> +	}
>  	free_css_id(&blkio_subsys, &blkcg->css);
>  	rcu_read_unlock();
>  	kfree(blkcg);
> @@ -294,6 +541,7 @@ done:
>  	spin_lock_init(&blkcg->lock);
>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>  
> +	INIT_LIST_HEAD(&blkcg->policy_list);
>  	return &blkcg->css;
>  }
>  
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 4d316df..0551aa1 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>  	unsigned int weight;
>  	spinlock_t lock;
>  	struct hlist_head blkg_list;
> +	struct list_head policy_list; /* list of io_policy_node */
>  };
>  
>  struct blkio_group {
> @@ -43,8 +44,16 @@ struct blkio_group {
>  	unsigned long sectors;
>  };
>  
> +struct io_policy_node {
> +	struct list_head node;
> +	dev_t dev;
> +	unsigned int weight;
> +};
> +
>  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
>  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				     dev_t dev);
>  
>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 023f4e6..3d0dea1 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	if (!cfqg)
>  		goto done;
>  
> -	cfqg->weight = blkcg->weight;
>  	for_each_cfqg_st(cfqg, i, j, st)
>  		*st = CFQ_RB_ROOT;
>  	RB_CLEAR_NODE(&cfqg->rb_node);
> @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>  					MKDEV(major, minor));
> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>  
>  	/* Add group on cfqd list */
>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> -- 
> 1.5.4.rc3

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03 14:33 ` Vivek Goyal
@ 2010-03-03 15:47   ` Vivek Goyal
       [not found]     ` <e98e18941003031519v14d4f79cg33da5c3cf1f5ac03@mail.gmail.com>
  2010-03-04  0:27   ` Gui Jianfeng
  1 sibling, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-03 15:47 UTC (permalink / raw)
  To: Gui Jianfeng; +Cc: Jens Axboe, linux kernel mailing list

On Wed, Mar 03, 2010 at 09:33:11AM -0500, Vivek Goyal wrote:
> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
> > Currently, IO Controller makes use of blkio.weight to assign weight for
> > all devices. Here a new user interface "blkio.policy" is introduced to 
> > assign different weights for different devices. blkio.weight becomes the
> > default value for devices which are not configured by blkio.policy.
> > 
> > You can use the following format to assigned specific weight for a given
> > device:
> > #echo "major:minor weight" > blkio.policy
> > 
> 
> Hi Gui,
> 
> Can we use a different name for per device weight file name than "blkio.policy".
> May be "blkio.weight_device" or "blkio.weight_per_device" etc.
> 
> The reason being that "blkio.policy" name might be more suitable to switch
> between differnt kind of BW control policies (proportional, max bandwidth etc),
> once we implement some kind of max BW controlling policies also. So it
> might be a good idea to not use that file name for specifying per device
> weights.

Well, thinking more about it, what kind of policy you implement on a block
device will not be a per cgroup property. It will not be the case that on
a device you are implementing max-bw for one cgroup and proportional
weight for other cgroup. It probably will be a per device attribute and
if need be should be controlled by /sys/block/<dev> interface.

Still, being very clear what a particular cgroup file does makes sense. So
that in future for max-bw control, we can bring in more cgroup files like
blkio.max_bw or blkio.max_iops etc which can co-exist with blkio.weight_device
etc.

Thanks
Vivek

> 
> Secondly, where is it useful. I mean do you have a scenario where you will
> use it. Why I am asking is that we never had ioprio per device kind of
> interface. ioprio was associated with task and was global and same on
> every device in the system.
> 
> I am wondering, what are the scenarios where an application is more important
> on one device and less important on other device to warrant different weights
> on different devices.
> 
> Thanks
> Vivek
>  
>  
> > major:minor represents device number.
> > 
> > And you can remove a policy as following:
> > #echo "major:minor 0" > blkio.policy
> > 
> > Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> > ---
> >  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  block/blk-cgroup.h  |    9 ++
> >  block/cfq-iosched.c |    2 +-
> >  3 files changed, 258 insertions(+), 1 deletions(-)
> > 
> > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> > index e7dbbaf..1fddb98 100644
> > --- a/block/blk-cgroup.c
> > +++ b/block/blk-cgroup.c
> > @@ -16,6 +16,7 @@
> >  #include <linux/module.h>
> >  #include <linux/err.h>
> >  #include "blk-cgroup.h"
> > +#include <linux/genhd.h>
> >  
> >  static DEFINE_SPINLOCK(blkio_list_lock);
> >  static LIST_HEAD(blkio_list);
> > @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
> >  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
> >  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
> >  
> > +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> > +				      struct io_policy_node *pn);
> > +static inline void policy_delete_node(struct io_policy_node *pn);
> > +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> > +						 dev_t dev);
> > +
> >  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
> >  {
> >  	if (!css_tryget(&blkcg->css))
> > @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >  	struct blkio_group *blkg;
> >  	struct hlist_node *n;
> >  	struct blkio_policy_type *blkiop;
> > +	struct io_policy_node *pn;
> >  
> >  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
> >  		return -EINVAL;
> > @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >  	spin_lock(&blkio_list_lock);
> >  	spin_lock_irq(&blkcg->lock);
> >  	blkcg->weight = (unsigned int)val;
> > +
> >  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> > +		pn = policy_search_node(blkcg, blkg->dev);
> > +
> > +		if (pn)
> > +			continue;
> > +
> >  		list_for_each_entry(blkiop, &blkio_list, list)
> >  			blkiop->ops.blkio_update_group_weight_fn(blkg,
> >  					blkcg->weight);
> > @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> >  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> >  #endif
> >  
> > +static int check_dev_num(dev_t dev)
> > +{
> > +	int part = 0;
> > +	struct gendisk *disk;
> > +
> > +	disk = get_gendisk(dev, &part);
> > +
> > +	if (!disk || part)
> > +		return -ENODEV;
> > +
> > +	return 0;
> > +}
> > +
> > +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
> > +{
> > +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> > +	int ret;
> > +	unsigned long major, minor, temp;
> > +	int i = 0;
> > +	dev_t dev;
> > +
> > +	memset(s, 0, sizeof(s));
> > +
> > +	while ((p = strsep(&buf, " ")) != NULL) {
> > +		if (!*p)
> > +			continue;
> > +
> > +		s[i++] = p;
> > +
> > +		/* Prevent from inputing too many things */
> > +		if (i == 3)
> > +			break;
> > +	}
> > +
> > +	if (i != 2)
> > +		return -EINVAL;
> > +
> > +	p = strsep(&s[0], ":");
> > +
> > +	if (p != NULL)
> > +		major_s = p;
> > +	else
> > +		return -EINVAL;
> > +
> > +	minor_s = s[0];
> > +
> > +	if (!minor_s)
> > +		return -EINVAL;
> > +
> > +	ret = strict_strtoul(major_s, 10, &major);
> > +	if (ret)
> > +		return -EINVAL;
> > +
> > +	ret = strict_strtoul(minor_s, 10, &minor);
> > +	if (ret)
> > +		return -EINVAL;
> > +
> > +	dev = MKDEV(major, minor);
> > +
> > +	ret = check_dev_num(dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	newpn->dev = dev;
> > +
> > +	if (s[1] == NULL)
> > +		return -EINVAL;
> > +
> > +	ret = strict_strtoul(s[1], 10, &temp);
> > +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> > +	    temp > BLKIO_WEIGHT_MAX)
> > +		return -EINVAL;
> > +
> > +	newpn->weight =  temp;
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> > +				    dev_t dev)
> > +{
> > +	struct io_policy_node *pn;
> > +
> > +	pn = policy_search_node(blkcg, dev);
> > +
> > +	if (pn)
> > +		return pn->weight;
> > +	else
> > +		return blkcg->weight;
> > +}
> > +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> > +
> > +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> > +				      struct io_policy_node *pn)
> > +{
> > +	list_add(&pn->node, &blkcg->policy_list);
> > +}
> > +
> > +/* Must be called with blkcg->lock held */
> > +static inline void policy_delete_node(struct io_policy_node *pn)
> > +{
> > +	list_del(&pn->node);
> > +}
> > +
> > +/* Must be called with blkcg->lock held */
> > +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> > +						 dev_t dev)
> > +{
> > +	struct io_policy_node *pn;
> > +
> > +	if (list_empty(&blkcg->policy_list))
> > +		return NULL;
> > +
> > +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> > +		if (pn->dev == dev)
> > +			return pn;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +
> > +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
> > +				const char *buffer)
> > +{
> > +	int ret = 0;
> > +	char *buf;
> > +	struct io_policy_node *newpn, *pn;
> > +	struct blkio_cgroup *blkcg;
> > +	struct blkio_group *blkg;
> > +	int keep_newpn = 0;
> > +	struct hlist_node *n;
> > +	struct blkio_policy_type *blkiop;
> > +
> > +	buf = kstrdup(buffer, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> > +	if (!newpn) {
> > +		ret = -ENOMEM;
> > +		goto free_buf;
> > +	}
> > +
> > +	ret = policy_parse_and_set(buf, newpn);
> > +	if (ret)
> > +		goto free_newpn;
> > +
> > +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> > +
> > +	spin_lock_irq(&blkcg->lock);
> > +
> > +	pn = policy_search_node(blkcg, newpn->dev);
> > +	if (!pn) {
> > +		if (newpn->weight != 0) {
> > +			policy_insert_node(blkcg, newpn);
> > +			keep_newpn = 1;
> > +		}
> > +		spin_unlock_irq(&blkcg->lock);
> > +		goto update_io_group;
> > +	}
> > +
> > +	if (newpn->weight == 0) {
> > +		/* weight == 0 means deleteing a policy */
> > +		policy_delete_node(pn);
> > +		spin_unlock_irq(&blkcg->lock);
> > +		goto update_io_group;
> > +	}
> > +	spin_unlock_irq(&blkcg->lock);
> > +
> > +	pn->weight = newpn->weight;
> > +
> > +update_io_group:
> > +	/* update weight for each cfqg */
> > +	spin_lock(&blkio_list_lock);
> > +	spin_lock_irq(&blkcg->lock);
> > +
> > +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> > +		if (newpn->dev == blkg->dev) {
> > +			list_for_each_entry(blkiop, &blkio_list, list)
> > +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> > +						newpn->weight ?
> > +						newpn->weight :
> > +						blkcg->weight);
> > +		}
> > +	}
> > +
> > +	spin_unlock_irq(&blkcg->lock);
> > +	spin_unlock(&blkio_list_lock);
> > +
> > +free_newpn:
> > +	if (!keep_newpn)
> > +		kfree(newpn);
> > +free_buf:
> > +	kfree(buf);
> > +	return ret;
> > +}
> > +
> > +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
> > +			       struct seq_file *m)
> > +{
> > +	struct blkio_cgroup *blkcg;
> > +	struct io_policy_node *pn;
> > +
> > +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> > +
> > +	if (list_empty(&blkcg->policy_list))
> > +		goto out;
> > +
> > +	seq_printf(m, "dev\tweight\n");
> > +
> > +	spin_lock_irq(&blkcg->lock);
> > +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> > +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> > +			   MINOR(pn->dev), pn->weight);
> > +	}
> > +	spin_unlock_irq(&blkcg->lock);
> > +out:
> > +	return 0;
> > +}
> > +
> >  struct cftype blkio_files[] = {
> >  	{
> > +		.name = "policy",
> > +		.read_seq_string = blkiocg_policy_read,
> > +		.write_string = blkiocg_policy_write,
> > +		.max_write_len = 256,
> > +	},
> > +	{
> >  		.name = "weight",
> >  		.read_u64 = blkiocg_weight_read,
> >  		.write_u64 = blkiocg_weight_write,
> > @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
> >  	struct blkio_group *blkg;
> >  	void *key;
> >  	struct blkio_policy_type *blkiop;
> > +	struct io_policy_node *pn, *pntmp;
> >  
> >  	rcu_read_lock();
> >  remove_entry:
> > @@ -264,7 +506,12 @@ remove_entry:
> >  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
> >  	spin_unlock(&blkio_list_lock);
> >  	goto remove_entry;
> > +
> >  done:
> > +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> > +		policy_delete_node(pn);
> > +		kfree(pn);
> > +	}
> >  	free_css_id(&blkio_subsys, &blkcg->css);
> >  	rcu_read_unlock();
> >  	kfree(blkcg);
> > @@ -294,6 +541,7 @@ done:
> >  	spin_lock_init(&blkcg->lock);
> >  	INIT_HLIST_HEAD(&blkcg->blkg_list);
> >  
> > +	INIT_LIST_HEAD(&blkcg->policy_list);
> >  	return &blkcg->css;
> >  }
> >  
> > diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> > index 4d316df..0551aa1 100644
> > --- a/block/blk-cgroup.h
> > +++ b/block/blk-cgroup.h
> > @@ -22,6 +22,7 @@ struct blkio_cgroup {
> >  	unsigned int weight;
> >  	spinlock_t lock;
> >  	struct hlist_head blkg_list;
> > +	struct list_head policy_list; /* list of io_policy_node */
> >  };
> >  
> >  struct blkio_group {
> > @@ -43,8 +44,16 @@ struct blkio_group {
> >  	unsigned long sectors;
> >  };
> >  
> > +struct io_policy_node {
> > +	struct list_head node;
> > +	dev_t dev;
> > +	unsigned int weight;
> > +};
> > +
> >  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
> >  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
> > +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> > +				     dev_t dev);
> >  
> >  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
> >  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> > index 023f4e6..3d0dea1 100644
> > --- a/block/cfq-iosched.c
> > +++ b/block/cfq-iosched.c
> > @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
> >  	if (!cfqg)
> >  		goto done;
> >  
> > -	cfqg->weight = blkcg->weight;
> >  	for_each_cfqg_st(cfqg, i, j, st)
> >  		*st = CFQ_RB_ROOT;
> >  	RB_CLEAR_NODE(&cfqg->rb_node);
> > @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
> >  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
> >  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
> >  					MKDEV(major, minor));
> > +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
> >  
> >  	/* Add group on cfqd list */
> >  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> > -- 
> > 1.5.4.rc3

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03 14:33 ` Vivek Goyal
  2010-03-03 15:47   ` Vivek Goyal
@ 2010-03-04  0:27   ` Gui Jianfeng
  1 sibling, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-04  0:27 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: Jens Axboe, linux kernel mailing list

Hi Vivek,

Vivek Goyal wrote:
> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
>> Currently, IO Controller makes use of blkio.weight to assign weight for
>> all devices. Here a new user interface "blkio.policy" is introduced to 
>> assign different weights for different devices. blkio.weight becomes the
>> default value for devices which are not configured by blkio.policy.
>>
>> You can use the following format to assigned specific weight for a given
>> device:
>> #echo "major:minor weight" > blkio.policy
>>
> 
> Hi Gui,
> 
> Can we use a different name for per device weight file name than "blkio.policy".
> May be "blkio.weight_device" or "blkio.weight_per_device" etc.

  ok, will change.

> 
> The reason being that "blkio.policy" name might be more suitable to switch
> between differnt kind of BW control policies (proportional, max bandwidth etc),
> once we implement some kind of max BW controlling policies also. So it
> might be a good idea to not use that file name for specifying per device
> weights.
> 
> Secondly, where is it useful. I mean do you have a scenario where you will
> use it. Why I am asking is that we never had ioprio per device kind of
> interface. ioprio was associated with task and was global and same on
> every device in the system.
> 
> I am wondering, what are the scenarios where an application is more important
> on one device and less important on other device to warrant different weights
> on different devices.

  Imaging that, if most of user's data stores on sdb, and most of system data stores
  on sda for system application. So we can assign more weight on sdb and less weight
  on sda for that user. So that user application achieve high speed on sdb, and will
  not stall system application when accessing into sda.

Thanks
Gui

> 
> Thanks
> Vivek
>  
>  
>> major:minor represents device number.
>>
>> And you can remove a policy as following:
>> #echo "major:minor 0" > blkio.policy
>>
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>  block/blk-cgroup.h  |    9 ++
>>  block/cfq-iosched.c |    2 +-
>>  3 files changed, 258 insertions(+), 1 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index e7dbbaf..1fddb98 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/module.h>
>>  #include <linux/err.h>
>>  #include "blk-cgroup.h"
>> +#include <linux/genhd.h>
>>  
>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>  static LIST_HEAD(blkio_list);
>> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>  
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct io_policy_node *pn);
>> +static inline void policy_delete_node(struct io_policy_node *pn);
>> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev);
>> +
>>  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
>>  {
>>  	if (!css_tryget(&blkcg->css))
>> @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	struct blkio_group *blkg;
>>  	struct hlist_node *n;
>>  	struct blkio_policy_type *blkiop;
>> +	struct io_policy_node *pn;
>>  
>>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>  		return -EINVAL;
>> @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	spin_lock(&blkio_list_lock);
>>  	spin_lock_irq(&blkcg->lock);
>>  	blkcg->weight = (unsigned int)val;
>> +
>>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		pn = policy_search_node(blkcg, blkg->dev);
>> +
>> +		if (pn)
>> +			continue;
>> +
>>  		list_for_each_entry(blkiop, &blkio_list, list)
>>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>>  					blkcg->weight);
>> @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>  #endif
>>  
>> +static int check_dev_num(dev_t dev)
>> +{
>> +	int part = 0;
>> +	struct gendisk *disk;
>> +
>> +	disk = get_gendisk(dev, &part);
>> +
>> +	if (!disk || part)
>> +		return -ENODEV;
>> +
>> +	return 0;
>> +}
>> +
>> +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
>> +{
>> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> +	int ret;
>> +	unsigned long major, minor, temp;
>> +	int i = 0;
>> +	dev_t dev;
>> +
>> +	memset(s, 0, sizeof(s));
>> +
>> +	while ((p = strsep(&buf, " ")) != NULL) {
>> +		if (!*p)
>> +			continue;
>> +
>> +		s[i++] = p;
>> +
>> +		/* Prevent from inputing too many things */
>> +		if (i == 3)
>> +			break;
>> +	}
>> +
>> +	if (i != 2)
>> +		return -EINVAL;
>> +
>> +	p = strsep(&s[0], ":");
>> +
>> +	if (p != NULL)
>> +		major_s = p;
>> +	else
>> +		return -EINVAL;
>> +
>> +	minor_s = s[0];
>> +
>> +	if (!minor_s)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(major_s, 10, &major);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(minor_s, 10, &minor);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	dev = MKDEV(major, minor);
>> +
>> +	ret = check_dev_num(dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	newpn->dev = dev;
>> +
>> +	if (s[1] == NULL)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(s[1], 10, &temp);
>> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
>> +	    temp > BLKIO_WEIGHT_MAX)
>> +		return -EINVAL;
>> +
>> +	newpn->weight =  temp;
>> +
>> +	return 0;
>> +}
>> +
>> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				    dev_t dev)
>> +{
>> +	struct io_policy_node *pn;
>> +
>> +	pn = policy_search_node(blkcg, dev);
>> +
>> +	if (pn)
>> +		return pn->weight;
>> +	else
>> +		return blkcg->weight;
>> +}
>> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
>> +
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct io_policy_node *pn)
>> +{
>> +	list_add(&pn->node, &blkcg->policy_list);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static inline void policy_delete_node(struct io_policy_node *pn)
>> +{
>> +	list_del(&pn->node);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev)
>> +{
>> +	struct io_policy_node *pn;
>> +
>> +	if (list_empty(&blkcg->policy_list))
>> +		return NULL;
>> +
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		if (pn->dev == dev)
>> +			return pn;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +
>> +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
>> +				const char *buffer)
>> +{
>> +	int ret = 0;
>> +	char *buf;
>> +	struct io_policy_node *newpn, *pn;
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_group *blkg;
>> +	int keep_newpn = 0;
>> +	struct hlist_node *n;
>> +	struct blkio_policy_type *blkiop;
>> +
>> +	buf = kstrdup(buffer, GFP_KERNEL);
>> +	if (!buf)
>> +		return -ENOMEM;
>> +
>> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
>> +	if (!newpn) {
>> +		ret = -ENOMEM;
>> +		goto free_buf;
>> +	}
>> +
>> +	ret = policy_parse_and_set(buf, newpn);
>> +	if (ret)
>> +		goto free_newpn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	pn = policy_search_node(blkcg, newpn->dev);
>> +	if (!pn) {
>> +		if (newpn->weight != 0) {
>> +			policy_insert_node(blkcg, newpn);
>> +			keep_newpn = 1;
>> +		}
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +
>> +	if (newpn->weight == 0) {
>> +		/* weight == 0 means deleteing a policy */
>> +		policy_delete_node(pn);
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +
>> +	pn->weight = newpn->weight;
>> +
>> +update_io_group:
>> +	/* update weight for each cfqg */
>> +	spin_lock(&blkio_list_lock);
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		if (newpn->dev == blkg->dev) {
>> +			list_for_each_entry(blkiop, &blkio_list, list)
>> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
>> +						newpn->weight ?
>> +						newpn->weight :
>> +						blkcg->weight);
>> +		}
>> +	}
>> +
>> +	spin_unlock_irq(&blkcg->lock);
>> +	spin_unlock(&blkio_list_lock);
>> +
>> +free_newpn:
>> +	if (!keep_newpn)
>> +		kfree(newpn);
>> +free_buf:
>> +	kfree(buf);
>> +	return ret;
>> +}
>> +
>> +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
>> +			       struct seq_file *m)
>> +{
>> +	struct blkio_cgroup *blkcg;
>> +	struct io_policy_node *pn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	if (list_empty(&blkcg->policy_list))
>> +		goto out;
>> +
>> +	seq_printf(m, "dev\tweight\n");
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
>> +			   MINOR(pn->dev), pn->weight);
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +out:
>> +	return 0;
>> +}
>> +
>>  struct cftype blkio_files[] = {
>>  	{
>> +		.name = "policy",
>> +		.read_seq_string = blkiocg_policy_read,
>> +		.write_string = blkiocg_policy_write,
>> +		.max_write_len = 256,
>> +	},
>> +	{
>>  		.name = "weight",
>>  		.read_u64 = blkiocg_weight_read,
>>  		.write_u64 = blkiocg_weight_write,
>> @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>>  	struct blkio_group *blkg;
>>  	void *key;
>>  	struct blkio_policy_type *blkiop;
>> +	struct io_policy_node *pn, *pntmp;
>>  
>>  	rcu_read_lock();
>>  remove_entry:
>> @@ -264,7 +506,12 @@ remove_entry:
>>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>>  	spin_unlock(&blkio_list_lock);
>>  	goto remove_entry;
>> +
>>  done:
>> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
>> +		policy_delete_node(pn);
>> +		kfree(pn);
>> +	}
>>  	free_css_id(&blkio_subsys, &blkcg->css);
>>  	rcu_read_unlock();
>>  	kfree(blkcg);
>> @@ -294,6 +541,7 @@ done:
>>  	spin_lock_init(&blkcg->lock);
>>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>>  
>> +	INIT_LIST_HEAD(&blkcg->policy_list);
>>  	return &blkcg->css;
>>  }
>>  
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 4d316df..0551aa1 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>>  	unsigned int weight;
>>  	spinlock_t lock;
>>  	struct hlist_head blkg_list;
>> +	struct list_head policy_list; /* list of io_policy_node */
>>  };
>>  
>>  struct blkio_group {
>> @@ -43,8 +44,16 @@ struct blkio_group {
>>  	unsigned long sectors;
>>  };
>>  
>> +struct io_policy_node {
>> +	struct list_head node;
>> +	dev_t dev;
>> +	unsigned int weight;
>> +};
>> +
>>  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
>>  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
>> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				     dev_t dev);
>>  
>>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index 023f4e6..3d0dea1 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	if (!cfqg)
>>  		goto done;
>>  
>> -	cfqg->weight = blkcg->weight;
>>  	for_each_cfqg_st(cfqg, i, j, st)
>>  		*st = CFQ_RB_ROOT;
>>  	RB_CLEAR_NODE(&cfqg->rb_node);
>> @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>>  					MKDEV(major, minor));
>> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>>  
>>  	/* Add group on cfqd list */
>>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>> -- 
>> 1.5.4.rc3
> 
> 
> 

-- 
Regards
Gui Jianfeng


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-03 15:36 ` Vivek Goyal
@ 2010-03-04  0:29   ` Gui Jianfeng
  0 siblings, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-04  0:29 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: Jens Axboe, linux kernel mailing list

Vivek Goyal wrote:
> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
>> Currently, IO Controller makes use of blkio.weight to assign weight for
>> all devices. Here a new user interface "blkio.policy" is introduced to 
>> assign different weights for different devices. blkio.weight becomes the
>> default value for devices which are not configured by blkio.policy.
>>
>> You can use the following format to assigned specific weight for a given
>> device:
>> #echo "major:minor weight" > blkio.policy
>>
>> major:minor represents device number.
>>
>> And you can remove a policy as following:
>> #echo "major:minor 0" > blkio.policy
>>
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>  block/blk-cgroup.h  |    9 ++
>>  block/cfq-iosched.c |    2 +-
>>  3 files changed, 258 insertions(+), 1 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index e7dbbaf..1fddb98 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/module.h>
>>  #include <linux/err.h>
>>  #include "blk-cgroup.h"
>> +#include <linux/genhd.h>
>>  
>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>  static LIST_HEAD(blkio_list);
>> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>  
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct io_policy_node *pn);
> 					     ^^^^^^^^^^
> How about blkio_policy_node?
> 
> So this structure will represent per cgroup per device policy as specified
> by user. We can re-use the same structure if there are more per cgroup
> per device policies down the line. 
> 
>> +static inline void policy_delete_node(struct io_policy_node *pn);
>> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev);
> 
> Not very sure about it but may be a good idea to prefix "blkio_" or
> "blkcg_" before all the functions you are defining. Just some uniformity in
> function names like cfq.
> 
>> +
>>  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
> 
> You might want to rebase it on Jen's for-linus branch as blkiocg_css_tryget()
> is gone in that branch
> 
> Apart from above minor nits, this patch looks good to me. Will do some
> testing on it.

Thanks for the comments vivek, will post V2.

Thanks
Gui

> 
> Thanks
> Vivek
> 
>>  {
>>  	if (!css_tryget(&blkcg->css))
>> @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	struct blkio_group *blkg;
>>  	struct hlist_node *n;
>>  	struct blkio_policy_type *blkiop;
>> +	struct io_policy_node *pn;
>>  
>>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>  		return -EINVAL;
>> @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	spin_lock(&blkio_list_lock);
>>  	spin_lock_irq(&blkcg->lock);
>>  	blkcg->weight = (unsigned int)val;
>> +
>>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		pn = policy_search_node(blkcg, blkg->dev);
>> +
>> +		if (pn)
>> +			continue;
> 
> Ok, so if we have specified a weight for a device, then blkio.weight
> update does not affect the weight on that device. Makes sense.
> 
>> +
>>  		list_for_each_entry(blkiop, &blkio_list, list)
>>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>>  					blkcg->weight);
>> @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>  #endif
>>  
>> +static int check_dev_num(dev_t dev)
>> +{
>> +	int part = 0;
>> +	struct gendisk *disk;
>> +
>> +	disk = get_gendisk(dev, &part);
>> +
>> +	if (!disk || part)
>> +		return -ENODEV;
>> +
>> +	return 0;
>> +}
>> +
>> +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
>> +{
>> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> +	int ret;
>> +	unsigned long major, minor, temp;
>> +	int i = 0;
>> +	dev_t dev;
>> +
>> +	memset(s, 0, sizeof(s));
>> +
>> +	while ((p = strsep(&buf, " ")) != NULL) {
>> +		if (!*p)
>> +			continue;
>> +
>> +		s[i++] = p;
>> +
>> +		/* Prevent from inputing too many things */
>> +		if (i == 3)
>> +			break;
>> +	}
>> +
>> +	if (i != 2)
>> +		return -EINVAL;
>> +
>> +	p = strsep(&s[0], ":");
>> +
>> +	if (p != NULL)
>> +		major_s = p;
>> +	else
>> +		return -EINVAL;
>> +
>> +	minor_s = s[0];
>> +
>> +	if (!minor_s)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(major_s, 10, &major);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(minor_s, 10, &minor);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	dev = MKDEV(major, minor);
>> +
>> +	ret = check_dev_num(dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	newpn->dev = dev;
>> +
>> +	if (s[1] == NULL)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(s[1], 10, &temp);
>> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
>> +	    temp > BLKIO_WEIGHT_MAX)
>> +		return -EINVAL;
>> +
>> +	newpn->weight =  temp;
>> +
>> +	return 0;
>> +}
>> +
>> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				    dev_t dev)
>> +{
>> +	struct io_policy_node *pn;
>> +
>> +	pn = policy_search_node(blkcg, dev);
>> +
>> +	if (pn)
>> +		return pn->weight;
>> +	else
>> +		return blkcg->weight;
>> +}
>> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
>> +
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct io_policy_node *pn)
>> +{
>> +	list_add(&pn->node, &blkcg->policy_list);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static inline void policy_delete_node(struct io_policy_node *pn)
>> +{
>> +	list_del(&pn->node);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev)
>> +{
>> +	struct io_policy_node *pn;
>> +
>> +	if (list_empty(&blkcg->policy_list))
>> +		return NULL;
>> +
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		if (pn->dev == dev)
>> +			return pn;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +
>> +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
>> +				const char *buffer)
>> +{
>> +	int ret = 0;
>> +	char *buf;
>> +	struct io_policy_node *newpn, *pn;
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_group *blkg;
>> +	int keep_newpn = 0;
>> +	struct hlist_node *n;
>> +	struct blkio_policy_type *blkiop;
>> +
>> +	buf = kstrdup(buffer, GFP_KERNEL);
>> +	if (!buf)
>> +		return -ENOMEM;
>> +
>> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
>> +	if (!newpn) {
>> +		ret = -ENOMEM;
>> +		goto free_buf;
>> +	}
>> +
>> +	ret = policy_parse_and_set(buf, newpn);
>> +	if (ret)
>> +		goto free_newpn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	pn = policy_search_node(blkcg, newpn->dev);
>> +	if (!pn) {
>> +		if (newpn->weight != 0) {
>> +			policy_insert_node(blkcg, newpn);
>> +			keep_newpn = 1;
>> +		}
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +
>> +	if (newpn->weight == 0) {
>> +		/* weight == 0 means deleteing a policy */
>> +		policy_delete_node(pn);
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +
>> +	pn->weight = newpn->weight;
>> +
>> +update_io_group:
>> +	/* update weight for each cfqg */
>> +	spin_lock(&blkio_list_lock);
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		if (newpn->dev == blkg->dev) {
>> +			list_for_each_entry(blkiop, &blkio_list, list)
>> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
>> +						newpn->weight ?
>> +						newpn->weight :
>> +						blkcg->weight);
>> +		}
>> +	}
>> +
>> +	spin_unlock_irq(&blkcg->lock);
>> +	spin_unlock(&blkio_list_lock);
>> +
>> +free_newpn:
>> +	if (!keep_newpn)
>> +		kfree(newpn);
>> +free_buf:
>> +	kfree(buf);
>> +	return ret;
>> +}
>> +
>> +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
>> +			       struct seq_file *m)
>> +{
>> +	struct blkio_cgroup *blkcg;
>> +	struct io_policy_node *pn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	if (list_empty(&blkcg->policy_list))
>> +		goto out;
>> +
>> +	seq_printf(m, "dev\tweight\n");
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
>> +			   MINOR(pn->dev), pn->weight);
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +out:
>> +	return 0;
>> +}
>> +
>>  struct cftype blkio_files[] = {
>>  	{
>> +		.name = "policy",
>> +		.read_seq_string = blkiocg_policy_read,
>> +		.write_string = blkiocg_policy_write,
>> +		.max_write_len = 256,
>> +	},
>> +	{
>>  		.name = "weight",
>>  		.read_u64 = blkiocg_weight_read,
>>  		.write_u64 = blkiocg_weight_write,
>> @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>>  	struct blkio_group *blkg;
>>  	void *key;
>>  	struct blkio_policy_type *blkiop;
>> +	struct io_policy_node *pn, *pntmp;
>>  
>>  	rcu_read_lock();
>>  remove_entry:
>> @@ -264,7 +506,12 @@ remove_entry:
>>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>>  	spin_unlock(&blkio_list_lock);
>>  	goto remove_entry;
>> +
>>  done:
>> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
>> +		policy_delete_node(pn);
>> +		kfree(pn);
>> +	}
>>  	free_css_id(&blkio_subsys, &blkcg->css);
>>  	rcu_read_unlock();
>>  	kfree(blkcg);
>> @@ -294,6 +541,7 @@ done:
>>  	spin_lock_init(&blkcg->lock);
>>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>>  
>> +	INIT_LIST_HEAD(&blkcg->policy_list);
>>  	return &blkcg->css;
>>  }
>>  
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 4d316df..0551aa1 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>>  	unsigned int weight;
>>  	spinlock_t lock;
>>  	struct hlist_head blkg_list;
>> +	struct list_head policy_list; /* list of io_policy_node */
>>  };
>>  
>>  struct blkio_group {
>> @@ -43,8 +44,16 @@ struct blkio_group {
>>  	unsigned long sectors;
>>  };
>>  
>> +struct io_policy_node {
>> +	struct list_head node;
>> +	dev_t dev;
>> +	unsigned int weight;
>> +};
>> +
>>  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
>>  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
>> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				     dev_t dev);
>>  
>>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index 023f4e6..3d0dea1 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	if (!cfqg)
>>  		goto done;
>>  
>> -	cfqg->weight = blkcg->weight;
>>  	for_each_cfqg_st(cfqg, i, j, st)
>>  		*st = CFQ_RB_ROOT;
>>  	RB_CLEAR_NODE(&cfqg->rb_node);
>> @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>>  					MKDEV(major, minor));
>> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>>  
>>  	/* Add group on cfqd list */
>>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>> -- 
>> 1.5.4.rc3
> 
> 
> 

-- 
Regards
Gui Jianfeng


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO  Controller
       [not found]     ` <e98e18941003031519v14d4f79cg33da5c3cf1f5ac03@mail.gmail.com>
@ 2010-03-04  2:25       ` Chad Talbott
  2010-03-04  3:23         ` Gui Jianfeng
  2010-03-04 15:06         ` [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Vivek Goyal
  0 siblings, 2 replies; 40+ messages in thread
From: Chad Talbott @ 2010-03-04  2:25 UTC (permalink / raw)
  To: guijianfeng; +Cc: vgoyal, jens.axboe, linux-kernel, Nauman Rafique

On Wed, Mar 3, 2010 at 3:19 PM, Nauman Rafique <nauman@google.com> wrote:
> On Wed, Mar 03, 2010 at 09:33:11AM -0500, Vivek Goyal wrote:
>> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
>> > You can use the following format to assigned specific weight for a given
>> > device:
>> > #echo "major:minor weight" > blkio.policy

>> Can we use a different name for per device weight file name than "blkio.policy".
>> May be "blkio.weight_device" or "blkio.weight_per_device" etc.

I agree with Vivek here, and his reasoning below.  This becomes more
important as more attributes are added.

>> The reason being that "blkio.policy" name might be more suitable to switch
>> between differnt kind of BW control policies (proportional, max bandwidth etc),
>> once we implement some kind of max BW controlling policies also. So it
>> might be a good idea to not use that file name for specifying per device
>> weights.
>
> Well, thinking more about it, what kind of policy you implement on a block
> device will not be a per cgroup property. It will not be the case that on
> a device you are implementing max-bw for one cgroup and proportional
> weight for other cgroup. It probably will be a per device attribute and
> if need be should be controlled by /sys/block/<dev> interface.
>
> Still, being very clear what a particular cgroup file does makes sense. So
> that in future for max-bw control, we can bring in more cgroup files like
> blkio.max_bw or blkio.max_iops etc which can co-exist with blkio.weight_device
> etc.

Agreed.  I'd like to add that since we are already thinking about
expanding the policy with more attributes, perhaps
blkio_update_group_weight_fn in blkio_policy_ops should be renamed to
blkio_policy_updated_fn.  Then it could be called if the user changed
any part of the policy.  Further, instead of storing "weight" in
blkio_cgroup, we could store a blkio_policy_node there instead.  Then
the handling of whole-cgroup and per-block-device policy items could
be more regular.

Some quick code comments:

policy_parse_and_set() could be simplified with scanf, like:

if (sscanf(buf, "%d:%d %d", &major, &minor, &temp) != 3)
        return -EINVAL;

blkcg_get_weight() might better be blkcg_get_policy() and it could
return a per-disk policy node, or fall back to the cgroup policy if
none existed for this dev.  This would be across policy attributes,
rather than just weight.

Thanks,
Chad

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-04  2:25       ` Chad Talbott
@ 2010-03-04  3:23         ` Gui Jianfeng
  2010-03-04  7:35           ` [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller Gui Jianfeng
  2010-03-04  7:35           ` [PATCH 2/2 V2] " Gui Jianfeng
  2010-03-04 15:06         ` [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Vivek Goyal
  1 sibling, 2 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-04  3:23 UTC (permalink / raw)
  To: Chad Talbott; +Cc: vgoyal, jens.axboe, linux-kernel, Nauman Rafique

Chad Talbott wrote:
> On Wed, Mar 3, 2010 at 3:19 PM, Nauman Rafique <nauman@google.com> wrote:
>> On Wed, Mar 03, 2010 at 09:33:11AM -0500, Vivek Goyal wrote:
>>> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
>>>> You can use the following format to assigned specific weight for a given
>>>> device:
>>>> #echo "major:minor weight" > blkio.policy
> 
>>> Can we use a different name for per device weight file name than "blkio.policy".
>>> May be "blkio.weight_device" or "blkio.weight_per_device" etc.
> 
> I agree with Vivek here, and his reasoning below.  This becomes more
> important as more attributes are added.
> 
>>> The reason being that "blkio.policy" name might be more suitable to switch
>>> between differnt kind of BW control policies (proportional, max bandwidth etc),
>>> once we implement some kind of max BW controlling policies also. So it
>>> might be a good idea to not use that file name for specifying per device
>>> weights.
>> Well, thinking more about it, what kind of policy you implement on a block
>> device will not be a per cgroup property. It will not be the case that on
>> a device you are implementing max-bw for one cgroup and proportional
>> weight for other cgroup. It probably will be a per device attribute and
>> if need be should be controlled by /sys/block/<dev> interface.
>>
>> Still, being very clear what a particular cgroup file does makes sense. So
>> that in future for max-bw control, we can bring in more cgroup files like
>> blkio.max_bw or blkio.max_iops etc which can co-exist with blkio.weight_device
>> etc.
> 
> Agreed.  I'd like to add that since we are already thinking about
> expanding the policy with more attributes, perhaps
> blkio_update_group_weight_fn in blkio_policy_ops should be renamed to
> blkio_policy_updated_fn.  Then it could be called if the user changed
> any part of the policy.  Further, instead of storing "weight" in
> blkio_cgroup, we could store a blkio_policy_node there instead.  Then
> the handling of whole-cgroup and per-block-device policy items could
> be more regular.
> 
> Some quick code comments:

Hi Chad,

Thanks for the comments.

> 
> policy_parse_and_set() could be simplified with scanf, like:
> 
> if (sscanf(buf, "%d:%d %d", &major, &minor, &temp) != 3)
>         return -EINVAL;

This can't handle the invalid format like following
echo "8:16 500 500 500 ...." > blkio.policy

> 
> blkcg_get_weight() might better be blkcg_get_policy() and it could
> return a per-disk policy node, or fall back to the cgroup policy if
> none existed for this dev.  This would be across policy attributes,
> rather than just weight.

  For the time being, i still choose blkcg_get_weight(). For there's
  only one attributes here, when more attributs is added, then we might
  change the name.

Thanks
Gui

> 
> Thanks,
> Chad
> 
> 



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-04  3:23         ` Gui Jianfeng
@ 2010-03-04  7:35           ` Gui Jianfeng
  2010-03-04 15:24             ` Vivek Goyal
  2010-03-04  7:35           ` [PATCH 2/2 V2] " Gui Jianfeng
  1 sibling, 1 reply; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-04  7:35 UTC (permalink / raw)
  To: vgoyal, jens.axboe; +Cc: Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

Currently, IO Controller makes use of blkio.weight to assign weight for
all devices. Here a new user interface "blkio.weight_device" is introduced to 
assign different weights for different devices. blkio.weight becomes the
default value for devices which are not configured by "blkio.weight_device"

You can use the following format to assigned specific weight for a given
device:
#echo "major:minor weight" > blkio.weight_device

major:minor represents device number.

And you can remove a specific weight as following:
#echo "major:minor 0" > blkio.weight_device

V1->V2 changes:
- use user interface "weight_device" instead of "policy" suggested by Vivek
- rename some struct suggested by Vivek
- rebase to 2.6-block "for-linus" branch
- remove an useless list_empty check pointed out by Li Zefan
- some trivial typo fix

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 block/blk-cgroup.c  |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-cgroup.h  |   10 ++
 block/cfq-iosched.c |    2 +-
 3 files changed, 251 insertions(+), 1 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c85d74c..37f1d8f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include "blk-cgroup.h"
+#include <linux/genhd.h>
 
 static DEFINE_SPINLOCK(blkio_list_lock);
 static LIST_HEAD(blkio_list);
@@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static inline void policy_insert_node(struct blkio_cgroup *blkcg,
+				      struct blkio_policy_node *pn);
+static inline void policy_delete_node(struct blkio_policy_node *pn);
+static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
+						 dev_t dev);
+
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -128,6 +135,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	struct blkio_group *blkg;
 	struct hlist_node *n;
 	struct blkio_policy_type *blkiop;
+	struct blkio_policy_node *pn;
 
 	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
 		return -EINVAL;
@@ -136,7 +144,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
 	blkcg->weight = (unsigned int)val;
+
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		pn = policy_search_node(blkcg, blkg->dev);
+
+		if (pn)
+			continue;
+
 		list_for_each_entry(blkiop, &blkio_list, list)
 			blkiop->ops.blkio_update_group_weight_fn(blkg,
 					blkcg->weight);
@@ -185,8 +199,227 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
 EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
 #endif
 
+static int check_dev_num(dev_t dev)
+{
+	int part = 0;
+	struct gendisk *disk;
+
+	disk = get_gendisk(dev, &part);
+	if (!disk || part)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int policy_parse_and_set(char *buf, struct blkio_policy_node *newpn)
+{
+	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+	int ret;
+	unsigned long major, minor, temp;
+	int i = 0;
+	dev_t dev;
+
+	memset(s, 0, sizeof(s));
+
+	while ((p = strsep(&buf, " ")) != NULL) {
+		if (!*p)
+			continue;
+
+		s[i++] = p;
+
+		/* Prevent from inputing too many things */
+		if (i == 3)
+			break;
+	}
+
+	if (i != 2)
+		return -EINVAL;
+
+	p = strsep(&s[0], ":");
+	if (p != NULL)
+		major_s = p;
+	else
+		return -EINVAL;
+
+	minor_s = s[0];
+	if (!minor_s)
+		return -EINVAL;
+
+	ret = strict_strtoul(major_s, 10, &major);
+	if (ret)
+		return -EINVAL;
+
+	ret = strict_strtoul(minor_s, 10, &minor);
+	if (ret)
+		return -EINVAL;
+
+	dev = MKDEV(major, minor);
+
+	ret = check_dev_num(dev);
+	if (ret)
+		return ret;
+
+	newpn->dev = dev;
+
+	if (s[1] == NULL)
+		return -EINVAL;
+
+	ret = strict_strtoul(s[1], 10, &temp);
+	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+	    temp > BLKIO_WEIGHT_MAX)
+		return -EINVAL;
+
+	newpn->weight =  temp;
+
+	return 0;
+}
+
+unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+				    dev_t dev)
+{
+	struct blkio_policy_node *pn;
+
+	pn = policy_search_node(blkcg, dev);
+	if (pn)
+		return pn->weight;
+	else
+		return blkcg->weight;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_weight);
+
+static inline void policy_insert_node(struct blkio_cgroup *blkcg,
+				      struct blkio_policy_node *pn)
+{
+	list_add(&pn->node, &blkcg->policy_list);
+}
+
+/* Must be called with blkcg->lock held */
+static inline void policy_delete_node(struct blkio_policy_node *pn)
+{
+	list_del(&pn->node);
+}
+
+/* Must be called with blkcg->lock held */
+static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
+						 dev_t dev)
+{
+	struct blkio_policy_node *pn;
+
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		if (pn->dev == dev)
+			return pn;
+	}
+
+	return NULL;
+}
+
+
+static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
+				const char *buffer)
+{
+	int ret = 0;
+	char *buf;
+	struct blkio_policy_node *newpn, *pn;
+	struct blkio_cgroup *blkcg;
+	struct blkio_group *blkg;
+	int keep_newpn = 0;
+	struct hlist_node *n;
+	struct blkio_policy_type *blkiop;
+
+	buf = kstrdup(buffer, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
+	if (!newpn) {
+		ret = -ENOMEM;
+		goto free_buf;
+	}
+
+	ret = policy_parse_and_set(buf, newpn);
+	if (ret)
+		goto free_newpn;
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	spin_lock_irq(&blkcg->lock);
+
+	pn = policy_search_node(blkcg, newpn->dev);
+	if (!pn) {
+		if (newpn->weight != 0) {
+			policy_insert_node(blkcg, newpn);
+			keep_newpn = 1;
+		}
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+
+	if (newpn->weight == 0) {
+		/* weight == 0 means deleteing a specific weight */
+		policy_delete_node(pn);
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+	spin_unlock_irq(&blkcg->lock);
+
+	pn->weight = newpn->weight;
+
+update_io_group:
+	/* update weight for each cfqg */
+	spin_lock(&blkio_list_lock);
+	spin_lock_irq(&blkcg->lock);
+
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		if (newpn->dev == blkg->dev) {
+			list_for_each_entry(blkiop, &blkio_list, list)
+				blkiop->ops.blkio_update_group_weight_fn(blkg,
+						newpn->weight ?
+						newpn->weight :
+						blkcg->weight);
+		}
+	}
+
+	spin_unlock_irq(&blkcg->lock);
+	spin_unlock(&blkio_list_lock);
+
+free_newpn:
+	if (!keep_newpn)
+		kfree(newpn);
+free_buf:
+	kfree(buf);
+	return ret;
+}
+
+static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
+				       struct seq_file *m)
+{
+	struct blkio_cgroup *blkcg;
+	struct blkio_policy_node *pn;
+
+	seq_printf(m, "dev\tweight\n");
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+	if (list_empty(&blkcg->policy_list))
+		goto out;
+
+	spin_lock_irq(&blkcg->lock);
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
+			   MINOR(pn->dev), pn->weight);
+	}
+	spin_unlock_irq(&blkcg->lock);
+out:
+	return 0;
+}
+
 struct cftype blkio_files[] = {
 	{
+		.name = "weight_device",
+		.read_seq_string = blkiocg_weight_device_read,
+		.write_string = blkiocg_weight_device_write,
+		.max_write_len = 256,
+	},
+	{
 		.name = "weight",
 		.read_u64 = blkiocg_weight_read,
 		.write_u64 = blkiocg_weight_write,
@@ -220,6 +453,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_group *blkg;
 	void *key;
 	struct blkio_policy_type *blkiop;
+	struct blkio_policy_node *pn, *pntmp;
 
 	rcu_read_lock();
 remove_entry:
@@ -250,7 +484,12 @@ remove_entry:
 		blkiop->ops.blkio_unlink_group_fn(key, blkg);
 	spin_unlock(&blkio_list_lock);
 	goto remove_entry;
+
 done:
+	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
+		policy_delete_node(pn);
+		kfree(pn);
+	}
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
 	kfree(blkcg);
@@ -280,6 +519,7 @@ done:
 	spin_lock_init(&blkcg->lock);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 
+	INIT_LIST_HEAD(&blkcg->policy_list);
 	return &blkcg->css;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 84bf745..c8144a2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -22,6 +22,7 @@ struct blkio_cgroup {
 	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
+	struct list_head policy_list; /* list of blkio_policy_node */
 };
 
 struct blkio_group {
@@ -43,6 +44,15 @@ struct blkio_group {
 	unsigned long sectors;
 };
 
+struct blkio_policy_node {
+	struct list_head node;
+	dev_t dev;
+	unsigned int weight;
+};
+
+extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+				     dev_t dev);
+
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
 						unsigned int weight);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dee9d93..6945e9b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	if (!cfqg)
 		goto done;
 
-	cfqg->weight = blkcg->weight;
 	for_each_cfqg_st(cfqg, i, j, st)
 		*st = CFQ_RB_ROOT;
 	RB_CLEAR_NODE(&cfqg->rb_node);
@@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
 	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
 					MKDEV(major, minor));
+	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
 	/* Add group on cfqd list */
 	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-- 
1.5.4.rc3 




^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/2 V2] io-controller: Document for blkio.weight_device
  2010-03-04  3:23         ` Gui Jianfeng
  2010-03-04  7:35           ` [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller Gui Jianfeng
@ 2010-03-04  7:35           ` Gui Jianfeng
  2010-03-04 11:22             ` Takuya Yoshikawa
  1 sibling, 1 reply; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-04  7:35 UTC (permalink / raw)
  To: vgoyal, jens.axboe; +Cc: Chad Talbott, linux-kernel, Nauman Rafique

Here is the document for blkio.weight_device

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 Documentation/cgroups/blkio-controller.txt |   35 +++++++++++++++++++++++++++-
 1 files changed, 34 insertions(+), 1 deletions(-)

diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 630879c..dcd6bdc 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -76,7 +76,9 @@ CONFIG_DEBUG_BLK_CGROUP
 Details of cgroup files
 =======================
 - blkio.weight
-	- Specifies per cgroup weight.
+	- Specifies per cgroup weight. This is default weight of the group
+	  on all the devices until and unless overridden by per device rule.
+	  (See blkio.weight_device).
 
 	  Currently allowed range of weights is from 100 to 1000.
 
@@ -99,6 +101,37 @@ Details of cgroup files
 	  and minor number of the device and third field specifies the number
 	  of times a group was dequeued from a particular device.
 
+- blkio.weight_device
+	- One can specify per cgroup per device rules using this interface.
+	  These rules override the default value of group weight as specified
+	  by blkio.weight.
+
+	  Following is the format.
+
+	  #echo dev_maj:dev_minor weight > /patch/to/cgroup/blkio.weight_device
+
+	  weight=0 means removing weight on a given device.
+
+	  Examples:
+	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
+	  # echo 8:16 300 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:16    300
+
+	  Configure weight=500 on /dev/sda (8:0) in this cgroup
+	  # echo 8:0 500 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:0     500
+	  8:16    300
+
+	  Remove specific weight for /dev/sda in this cgroup
+	  # echo 8:0 0 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:16    300
+
 CFQ sysfs tunable
 =================
 /sys/block/<disk>/queue/iosched/group_isolation
-- 
1.5.4.rc3 




^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2 V2] io-controller: Document for blkio.weight_device
  2010-03-04  7:35           ` [PATCH 2/2 V2] " Gui Jianfeng
@ 2010-03-04 11:22             ` Takuya Yoshikawa
  2010-03-05  0:38               ` Gui Jianfeng
  0 siblings, 1 reply; 40+ messages in thread
From: Takuya Yoshikawa @ 2010-03-04 11:22 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: vgoyal, jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique

Hi, I found a typo.

Gui Jianfeng wrote:
> +- blkio.weight_device
> +	- One can specify per cgroup per device rules using this interface.
> +	  These rules override the default value of group weight as specified
> +	  by blkio.weight.
> +
> +	  Following is the format.
> +
> +	  #echo dev_maj:dev_minor weight > /patch/to/cgroup/blkio.weight_device

                                   You mean "path to"?

Thanks,
   Takuya

> +
> +	  weight=0 means removing weight on a given device.
> +
> +	  Examples:
> +	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
> +	  # echo 8:16 300 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
> +	  Configure weight=500 on /dev/sda (8:0) in this cgroup
> +	  # echo 8:0 500 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:0     500
> +	  8:16    300
> +
> +	  Remove specific weight for /dev/sda in this cgroup
> +	  # echo 8:0 0 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
>  CFQ sysfs tunable
>  =================
>  /sys/block/<disk>/queue/iosched/group_isolation


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller
  2010-03-04  2:25       ` Chad Talbott
  2010-03-04  3:23         ` Gui Jianfeng
@ 2010-03-04 15:06         ` Vivek Goyal
  1 sibling, 0 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-04 15:06 UTC (permalink / raw)
  To: Chad Talbott; +Cc: guijianfeng, jens.axboe, linux-kernel, Nauman Rafique

On Wed, Mar 03, 2010 at 06:25:44PM -0800, Chad Talbott wrote:
> On Wed, Mar 3, 2010 at 3:19 PM, Nauman Rafique <nauman@google.com> wrote:
> > On Wed, Mar 03, 2010 at 09:33:11AM -0500, Vivek Goyal wrote:
> >> On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
> >> > You can use the following format to assigned specific weight for a given
> >> > device:
> >> > #echo "major:minor weight" > blkio.policy
> 
> >> Can we use a different name for per device weight file name than "blkio.policy".
> >> May be "blkio.weight_device" or "blkio.weight_per_device" etc.
> 
> I agree with Vivek here, and his reasoning below.  This becomes more
> important as more attributes are added.
> 
> >> The reason being that "blkio.policy" name might be more suitable to switch
> >> between differnt kind of BW control policies (proportional, max bandwidth etc),
> >> once we implement some kind of max BW controlling policies also. So it
> >> might be a good idea to not use that file name for specifying per device
> >> weights.
> >
> > Well, thinking more about it, what kind of policy you implement on a block
> > device will not be a per cgroup property. It will not be the case that on
> > a device you are implementing max-bw for one cgroup and proportional
> > weight for other cgroup. It probably will be a per device attribute and
> > if need be should be controlled by /sys/block/<dev> interface.
> >
> > Still, being very clear what a particular cgroup file does makes sense. So
> > that in future for max-bw control, we can bring in more cgroup files like
> > blkio.max_bw or blkio.max_iops etc which can co-exist with blkio.weight_device
> > etc.
> 
> Agreed.  I'd like to add that since we are already thinking about
> expanding the policy with more attributes, perhaps
> blkio_update_group_weight_fn in blkio_policy_ops should be renamed to
> blkio_policy_updated_fn.  Then it could be called if the user changed
> any part of the policy.

Currently there is only one policy (proportional weight) and there is
only one attribute (weight). So I think we can leave the function name
as it is. We can make it more generic as we introduce new policies or
policy attributes.

> Further, instead of storing "weight" in
> blkio_cgroup, we could store a blkio_policy_node there instead.

Sorry, did not get it. Are you suggesting that don't store group weight in
blkio_cgroup, instead store it in blkio_policy_node? But we don't create
blkio_policy_node, until and unless user create a specific weight policy
for a device. 

> Then
> the handling of whole-cgroup and per-block-device policy items could
> be more regular.
> 
> Some quick code comments:
> 
> policy_parse_and_set() could be simplified with scanf, like:
> 
> if (sscanf(buf, "%d:%d %d", &major, &minor, &temp) != 3)
>         return -EINVAL;
> 
> blkcg_get_weight() might better be blkcg_get_policy() and it could
> return a per-disk policy node, or fall back to the cgroup policy if
> none existed for this dev.  This would be across policy attributes,
> rather than just weight.

This is an interesting idea. But again we can implement it once we have
more policy attributes. For the time being, it is simple to understand
that get group weight from blk-cgroup layer for that particular device.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-04  7:35           ` [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller Gui Jianfeng
@ 2010-03-04 15:24             ` Vivek Goyal
  2010-03-05  0:31               ` Gui Jianfeng
                                 ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-04 15:24 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

On Thu, Mar 04, 2010 at 03:35:19PM +0800, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.weight_device" is introduced to 
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by "blkio.weight_device"
> 
> You can use the following format to assigned specific weight for a given
> device:
> #echo "major:minor weight" > blkio.weight_device
> 
> major:minor represents device number.
> 
> And you can remove a specific weight as following:
> #echo "major:minor 0" > blkio.weight_device
> 
> V1->V2 changes:
> - use user interface "weight_device" instead of "policy" suggested by Vivek
> - rename some struct suggested by Vivek
> - rebase to 2.6-block "for-linus" branch
> - remove an useless list_empty check pointed out by Li Zefan
> - some trivial typo fix
> 
> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> ---
>  block/blk-cgroup.c  |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/blk-cgroup.h  |   10 ++
>  block/cfq-iosched.c |    2 +-
>  3 files changed, 251 insertions(+), 1 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index c85d74c..37f1d8f 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -16,6 +16,7 @@
>  #include <linux/module.h>
>  #include <linux/err.h>
>  #include "blk-cgroup.h"
> +#include <linux/genhd.h>
>  
>  static DEFINE_SPINLOCK(blkio_list_lock);
>  static LIST_HEAD(blkio_list);
> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>  
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct blkio_policy_node *pn);
> +static inline void policy_delete_node(struct blkio_policy_node *pn);
> +static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev);

Hi Gui,

Thanks. Can we move above functions up in the file and get rid of these
forward declarations?

Also as I suggested last time, it might be good idea to prefix "blkio_"
before function names.

policy_insert_node ---> blkio_policy_insert_node()
policy_delete_node ---> blkio_policy_delete_node()
policy_search_node ---> blkio_policy_search_node()
chekc_dev_num ---> blkio_check_dev_num()
.
.

Thanks
Vivek

> +
>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>  {
>  	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
> @@ -128,6 +135,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	struct blkio_group *blkg;
>  	struct hlist_node *n;
>  	struct blkio_policy_type *blkiop;
> +	struct blkio_policy_node *pn;
>  
>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>  		return -EINVAL;
> @@ -136,7 +144,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	spin_lock(&blkio_list_lock);
>  	spin_lock_irq(&blkcg->lock);
>  	blkcg->weight = (unsigned int)val;
> +
>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		pn = policy_search_node(blkcg, blkg->dev);
> +
> +		if (pn)
> +			continue;
> +
>  		list_for_each_entry(blkiop, &blkio_list, list)
>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>  					blkcg->weight);
> @@ -185,8 +199,227 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>  #endif
>  
> +static int check_dev_num(dev_t dev)
> +{
> +	int part = 0;
> +	struct gendisk *disk;
> +
> +	disk = get_gendisk(dev, &part);
> +	if (!disk || part)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int policy_parse_and_set(char *buf, struct blkio_policy_node *newpn)
> +{
> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> +	int ret;
> +	unsigned long major, minor, temp;
> +	int i = 0;
> +	dev_t dev;
> +
> +	memset(s, 0, sizeof(s));
> +
> +	while ((p = strsep(&buf, " ")) != NULL) {
> +		if (!*p)
> +			continue;
> +
> +		s[i++] = p;
> +
> +		/* Prevent from inputing too many things */
> +		if (i == 3)
> +			break;
> +	}
> +
> +	if (i != 2)
> +		return -EINVAL;
> +
> +	p = strsep(&s[0], ":");
> +	if (p != NULL)
> +		major_s = p;
> +	else
> +		return -EINVAL;
> +
> +	minor_s = s[0];
> +	if (!minor_s)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(major_s, 10, &major);
> +	if (ret)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(minor_s, 10, &minor);
> +	if (ret)
> +		return -EINVAL;
> +
> +	dev = MKDEV(major, minor);
> +
> +	ret = check_dev_num(dev);
> +	if (ret)
> +		return ret;
> +
> +	newpn->dev = dev;
> +
> +	if (s[1] == NULL)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(s[1], 10, &temp);
> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +	    temp > BLKIO_WEIGHT_MAX)
> +		return -EINVAL;
> +
> +	newpn->weight =  temp;
> +
> +	return 0;
> +}
> +
> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				    dev_t dev)
> +{
> +	struct blkio_policy_node *pn;
> +
> +	pn = policy_search_node(blkcg, dev);
> +	if (pn)
> +		return pn->weight;
> +	else
> +		return blkcg->weight;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> +
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct blkio_policy_node *pn)
> +{
> +	list_add(&pn->node, &blkcg->policy_list);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static inline void policy_delete_node(struct blkio_policy_node *pn)
> +{
> +	list_del(&pn->node);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev)
> +{
> +	struct blkio_policy_node *pn;
> +
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		if (pn->dev == dev)
> +			return pn;
> +	}
> +
> +	return NULL;
> +}
> +
> +
> +static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
> +				const char *buffer)
> +{
> +	int ret = 0;
> +	char *buf;
> +	struct blkio_policy_node *newpn, *pn;
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_group *blkg;
> +	int keep_newpn = 0;
> +	struct hlist_node *n;
> +	struct blkio_policy_type *blkiop;
> +
> +	buf = kstrdup(buffer, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> +	if (!newpn) {
> +		ret = -ENOMEM;
> +		goto free_buf;
> +	}
> +
> +	ret = policy_parse_and_set(buf, newpn);
> +	if (ret)
> +		goto free_newpn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	spin_lock_irq(&blkcg->lock);
> +
> +	pn = policy_search_node(blkcg, newpn->dev);
> +	if (!pn) {
> +		if (newpn->weight != 0) {
> +			policy_insert_node(blkcg, newpn);
> +			keep_newpn = 1;
> +		}
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +
> +	if (newpn->weight == 0) {
> +		/* weight == 0 means deleteing a specific weight */
> +		policy_delete_node(pn);
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +
> +	pn->weight = newpn->weight;
> +
> +update_io_group:
> +	/* update weight for each cfqg */
> +	spin_lock(&blkio_list_lock);
> +	spin_lock_irq(&blkcg->lock);
> +
> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		if (newpn->dev == blkg->dev) {
> +			list_for_each_entry(blkiop, &blkio_list, list)
> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> +						newpn->weight ?
> +						newpn->weight :
> +						blkcg->weight);
> +		}
> +	}
> +
> +	spin_unlock_irq(&blkcg->lock);
> +	spin_unlock(&blkio_list_lock);
> +
> +free_newpn:
> +	if (!keep_newpn)
> +		kfree(newpn);
> +free_buf:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
> +				       struct seq_file *m)
> +{
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_policy_node *pn;
> +
> +	seq_printf(m, "dev\tweight\n");
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +	if (list_empty(&blkcg->policy_list))
> +		goto out;
> +
> +	spin_lock_irq(&blkcg->lock);
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +			   MINOR(pn->dev), pn->weight);
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +out:
> +	return 0;
> +}
> +
>  struct cftype blkio_files[] = {
>  	{
> +		.name = "weight_device",
> +		.read_seq_string = blkiocg_weight_device_read,
> +		.write_string = blkiocg_weight_device_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.read_u64 = blkiocg_weight_read,
>  		.write_u64 = blkiocg_weight_write,
> @@ -220,6 +453,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>  	struct blkio_group *blkg;
>  	void *key;
>  	struct blkio_policy_type *blkiop;
> +	struct blkio_policy_node *pn, *pntmp;
>  
>  	rcu_read_lock();
>  remove_entry:
> @@ -250,7 +484,12 @@ remove_entry:
>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>  	spin_unlock(&blkio_list_lock);
>  	goto remove_entry;
> +
>  done:
> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> +		policy_delete_node(pn);
> +		kfree(pn);
> +	}
>  	free_css_id(&blkio_subsys, &blkcg->css);
>  	rcu_read_unlock();
>  	kfree(blkcg);
> @@ -280,6 +519,7 @@ done:
>  	spin_lock_init(&blkcg->lock);
>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>  
> +	INIT_LIST_HEAD(&blkcg->policy_list);
>  	return &blkcg->css;
>  }
>  
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 84bf745..c8144a2 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>  	unsigned int weight;
>  	spinlock_t lock;
>  	struct hlist_head blkg_list;
> +	struct list_head policy_list; /* list of blkio_policy_node */
>  };
>  
>  struct blkio_group {
> @@ -43,6 +44,15 @@ struct blkio_group {
>  	unsigned long sectors;
>  };
>  
> +struct blkio_policy_node {
> +	struct list_head node;
> +	dev_t dev;
> +	unsigned int weight;
> +};
> +
> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				     dev_t dev);
> +
>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>  						unsigned int weight);
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index dee9d93..6945e9b 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	if (!cfqg)
>  		goto done;
>  
> -	cfqg->weight = blkcg->weight;
>  	for_each_cfqg_st(cfqg, i, j, st)
>  		*st = CFQ_RB_ROOT;
>  	RB_CLEAR_NODE(&cfqg->rb_node);
> @@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>  					MKDEV(major, minor));
> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>  
>  	/* Add group on cfqd list */
>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> -- 
> 1.5.4.rc3 
> 
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-04 15:24             ` Vivek Goyal
@ 2010-03-05  0:31               ` Gui Jianfeng
  2010-03-05  2:25               ` [PATCH 1/2 V3] " Gui Jianfeng
  2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
  2 siblings, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-05  0:31 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

Vivek Goyal wrote:
> On Thu, Mar 04, 2010 at 03:35:19PM +0800, Gui Jianfeng wrote:
>> Currently, IO Controller makes use of blkio.weight to assign weight for
>> all devices. Here a new user interface "blkio.weight_device" is introduced to 
>> assign different weights for different devices. blkio.weight becomes the
>> default value for devices which are not configured by "blkio.weight_device"
>>
>> You can use the following format to assigned specific weight for a given
>> device:
>> #echo "major:minor weight" > blkio.weight_device
>>
>> major:minor represents device number.
>>
>> And you can remove a specific weight as following:
>> #echo "major:minor 0" > blkio.weight_device
>>
>> V1->V2 changes:
>> - use user interface "weight_device" instead of "policy" suggested by Vivek
>> - rename some struct suggested by Vivek
>> - rebase to 2.6-block "for-linus" branch
>> - remove an useless list_empty check pointed out by Li Zefan
>> - some trivial typo fix
>>
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/blk-cgroup.c  |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>  block/blk-cgroup.h  |   10 ++
>>  block/cfq-iosched.c |    2 +-
>>  3 files changed, 251 insertions(+), 1 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index c85d74c..37f1d8f 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/module.h>
>>  #include <linux/err.h>
>>  #include "blk-cgroup.h"
>> +#include <linux/genhd.h>
>>  
>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>  static LIST_HEAD(blkio_list);
>> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>  
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct blkio_policy_node *pn);
>> +static inline void policy_delete_node(struct blkio_policy_node *pn);
>> +static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev);
> 
> Hi Gui,
> 
> Thanks. Can we move above functions up in the file and get rid of these
> forward declarations?
> 
> Also as I suggested last time, it might be good idea to prefix "blkio_"
> before function names.
> 
> policy_insert_node ---> blkio_policy_insert_node()
> policy_delete_node ---> blkio_policy_delete_node()
> policy_search_node ---> blkio_policy_search_node()
> chekc_dev_num ---> blkio_check_dev_num()
> .
> .

Sure, will post again.

Thanks,
Gui

> 
> Thanks
> Vivek
> 
>> +
>>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>>  {
>>  	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
>> @@ -128,6 +135,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	struct blkio_group *blkg;
>>  	struct hlist_node *n;
>>  	struct blkio_policy_type *blkiop;
>> +	struct blkio_policy_node *pn;
>>  
>>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>  		return -EINVAL;
>> @@ -136,7 +144,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	spin_lock(&blkio_list_lock);
>>  	spin_lock_irq(&blkcg->lock);
>>  	blkcg->weight = (unsigned int)val;
>> +
>>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		pn = policy_search_node(blkcg, blkg->dev);
>> +
>> +		if (pn)
>> +			continue;
>> +
>>  		list_for_each_entry(blkiop, &blkio_list, list)
>>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>>  					blkcg->weight);
>> @@ -185,8 +199,227 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>  #endif
>>  
>> +static int check_dev_num(dev_t dev)
>> +{
>> +	int part = 0;
>> +	struct gendisk *disk;
>> +
>> +	disk = get_gendisk(dev, &part);
>> +	if (!disk || part)
>> +		return -ENODEV;
>> +
>> +	return 0;
>> +}
>> +
>> +static int policy_parse_and_set(char *buf, struct blkio_policy_node *newpn)
>> +{
>> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> +	int ret;
>> +	unsigned long major, minor, temp;
>> +	int i = 0;
>> +	dev_t dev;
>> +
>> +	memset(s, 0, sizeof(s));
>> +
>> +	while ((p = strsep(&buf, " ")) != NULL) {
>> +		if (!*p)
>> +			continue;
>> +
>> +		s[i++] = p;
>> +
>> +		/* Prevent from inputing too many things */
>> +		if (i == 3)
>> +			break;
>> +	}
>> +
>> +	if (i != 2)
>> +		return -EINVAL;
>> +
>> +	p = strsep(&s[0], ":");
>> +	if (p != NULL)
>> +		major_s = p;
>> +	else
>> +		return -EINVAL;
>> +
>> +	minor_s = s[0];
>> +	if (!minor_s)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(major_s, 10, &major);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(minor_s, 10, &minor);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	dev = MKDEV(major, minor);
>> +
>> +	ret = check_dev_num(dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	newpn->dev = dev;
>> +
>> +	if (s[1] == NULL)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(s[1], 10, &temp);
>> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
>> +	    temp > BLKIO_WEIGHT_MAX)
>> +		return -EINVAL;
>> +
>> +	newpn->weight =  temp;
>> +
>> +	return 0;
>> +}
>> +
>> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				    dev_t dev)
>> +{
>> +	struct blkio_policy_node *pn;
>> +
>> +	pn = policy_search_node(blkcg, dev);
>> +	if (pn)
>> +		return pn->weight;
>> +	else
>> +		return blkcg->weight;
>> +}
>> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
>> +
>> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
>> +				      struct blkio_policy_node *pn)
>> +{
>> +	list_add(&pn->node, &blkcg->policy_list);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static inline void policy_delete_node(struct blkio_policy_node *pn)
>> +{
>> +	list_del(&pn->node);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static struct blkio_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
>> +						 dev_t dev)
>> +{
>> +	struct blkio_policy_node *pn;
>> +
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		if (pn->dev == dev)
>> +			return pn;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +
>> +static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
>> +				const char *buffer)
>> +{
>> +	int ret = 0;
>> +	char *buf;
>> +	struct blkio_policy_node *newpn, *pn;
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_group *blkg;
>> +	int keep_newpn = 0;
>> +	struct hlist_node *n;
>> +	struct blkio_policy_type *blkiop;
>> +
>> +	buf = kstrdup(buffer, GFP_KERNEL);
>> +	if (!buf)
>> +		return -ENOMEM;
>> +
>> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
>> +	if (!newpn) {
>> +		ret = -ENOMEM;
>> +		goto free_buf;
>> +	}
>> +
>> +	ret = policy_parse_and_set(buf, newpn);
>> +	if (ret)
>> +		goto free_newpn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	pn = policy_search_node(blkcg, newpn->dev);
>> +	if (!pn) {
>> +		if (newpn->weight != 0) {
>> +			policy_insert_node(blkcg, newpn);
>> +			keep_newpn = 1;
>> +		}
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +
>> +	if (newpn->weight == 0) {
>> +		/* weight == 0 means deleteing a specific weight */
>> +		policy_delete_node(pn);
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +
>> +	pn->weight = newpn->weight;
>> +
>> +update_io_group:
>> +	/* update weight for each cfqg */
>> +	spin_lock(&blkio_list_lock);
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		if (newpn->dev == blkg->dev) {
>> +			list_for_each_entry(blkiop, &blkio_list, list)
>> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
>> +						newpn->weight ?
>> +						newpn->weight :
>> +						blkcg->weight);
>> +		}
>> +	}
>> +
>> +	spin_unlock_irq(&blkcg->lock);
>> +	spin_unlock(&blkio_list_lock);
>> +
>> +free_newpn:
>> +	if (!keep_newpn)
>> +		kfree(newpn);
>> +free_buf:
>> +	kfree(buf);
>> +	return ret;
>> +}
>> +
>> +static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
>> +				       struct seq_file *m)
>> +{
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_policy_node *pn;
>> +
>> +	seq_printf(m, "dev\tweight\n");
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +	if (list_empty(&blkcg->policy_list))
>> +		goto out;
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
>> +			   MINOR(pn->dev), pn->weight);
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +out:
>> +	return 0;
>> +}
>> +
>>  struct cftype blkio_files[] = {
>>  	{
>> +		.name = "weight_device",
>> +		.read_seq_string = blkiocg_weight_device_read,
>> +		.write_string = blkiocg_weight_device_write,
>> +		.max_write_len = 256,
>> +	},
>> +	{
>>  		.name = "weight",
>>  		.read_u64 = blkiocg_weight_read,
>>  		.write_u64 = blkiocg_weight_write,
>> @@ -220,6 +453,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>>  	struct blkio_group *blkg;
>>  	void *key;
>>  	struct blkio_policy_type *blkiop;
>> +	struct blkio_policy_node *pn, *pntmp;
>>  
>>  	rcu_read_lock();
>>  remove_entry:
>> @@ -250,7 +484,12 @@ remove_entry:
>>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>>  	spin_unlock(&blkio_list_lock);
>>  	goto remove_entry;
>> +
>>  done:
>> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
>> +		policy_delete_node(pn);
>> +		kfree(pn);
>> +	}
>>  	free_css_id(&blkio_subsys, &blkcg->css);
>>  	rcu_read_unlock();
>>  	kfree(blkcg);
>> @@ -280,6 +519,7 @@ done:
>>  	spin_lock_init(&blkcg->lock);
>>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>>  
>> +	INIT_LIST_HEAD(&blkcg->policy_list);
>>  	return &blkcg->css;
>>  }
>>  
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 84bf745..c8144a2 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>>  	unsigned int weight;
>>  	spinlock_t lock;
>>  	struct hlist_head blkg_list;
>> +	struct list_head policy_list; /* list of blkio_policy_node */
>>  };
>>  
>>  struct blkio_group {
>> @@ -43,6 +44,15 @@ struct blkio_group {
>>  	unsigned long sectors;
>>  };
>>  
>> +struct blkio_policy_node {
>> +	struct list_head node;
>> +	dev_t dev;
>> +	unsigned int weight;
>> +};
>> +
>> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				     dev_t dev);
>> +
>>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>>  						unsigned int weight);
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index dee9d93..6945e9b 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	if (!cfqg)
>>  		goto done;
>>  
>> -	cfqg->weight = blkcg->weight;
>>  	for_each_cfqg_st(cfqg, i, j, st)
>>  		*st = CFQ_RB_ROOT;
>>  	RB_CLEAR_NODE(&cfqg->rb_node);
>> @@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>>  					MKDEV(major, minor));
>> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>>  
>>  	/* Add group on cfqd list */
>>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>> -- 
>> 1.5.4.rc3 
>>
>>
> 
> 
> 

-- 
Regards
Gui Jianfeng


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2 V2] io-controller: Document for blkio.weight_device
  2010-03-04 11:22             ` Takuya Yoshikawa
@ 2010-03-05  0:38               ` Gui Jianfeng
  0 siblings, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-05  0:38 UTC (permalink / raw)
  To: Takuya Yoshikawa
  Cc: vgoyal, jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique

Takuya Yoshikawa wrote:
> Hi, I found a typo.
> 
> Gui Jianfeng wrote:
>> +- blkio.weight_device
>> +    - One can specify per cgroup per device rules using this interface.
>> +      These rules override the default value of group weight as
>> specified
>> +      by blkio.weight.
>> +
>> +      Following is the format.
>> +
>> +      #echo dev_maj:dev_minor weight >
>> /patch/to/cgroup/blkio.weight_device
> 
>                                   You mean "path to"?
> 

yeah, will fix.

Thanks
Gui

> Thanks,
>   Takuya
> 
>> +
>> +      weight=0 means removing weight on a given device.
>> +
>> +      Examples:
>> +      Configure weight=300 on /dev/sdb (8:16) in this cgroup
>> +      # echo 8:16 300 > blkio.weight_device
>> +      # cat blkio.weight_device
>> +      dev     weight
>> +      8:16    300
>> +
>> +      Configure weight=500 on /dev/sda (8:0) in this cgroup
>> +      # echo 8:0 500 > blkio.weight_device
>> +      # cat blkio.weight_device
>> +      dev     weight
>> +      8:0     500
>> +      8:16    300
>> +
>> +      Remove specific weight for /dev/sda in this cgroup
>> +      # echo 8:0 0 > blkio.weight_device
>> +      # cat blkio.weight_device
>> +      dev     weight
>> +      8:16    300
>> +
>>  CFQ sysfs tunable
>>  =================
>>  /sys/block/<disk>/queue/iosched/group_isolation
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-04 15:24             ` Vivek Goyal
  2010-03-05  0:31               ` Gui Jianfeng
@ 2010-03-05  2:25               ` Gui Jianfeng
  2010-03-05 14:13                 ` Vivek Goyal
  2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
  2 siblings, 1 reply; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-05  2:25 UTC (permalink / raw)
  To: Vivek Goyal, jens.axboe
  Cc: Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

Currently, IO Controller makes use of blkio.weight to assign weight for
all devices. Here a new user interface "blkio.weight_device" is introduced to
assign different weights for different devices. blkio.weight becomes the
default value for devices which are not configured by "blkio.weight_device"

You can use the following format to assigned specific weight for a given
device:

major:minor represents device number.

And you can remove a specific weight as following:

V1->V2 changes:
- use user interface "weight_device" instead of "policy" suggested by Vivek
- rename some struct suggested by Vivek
- rebase to 2.6-block "for-linus" branch
- remove an useless list_empty check pointed out by Li Zefan
- some trivial typo fix

V2->V3 changes:
- Move policy_*_node() functions up to get rid of forward declarations
- rename related functions by adding prefix "blkio_"

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 block/blk-cgroup.h  |   10 ++
 block/cfq-iosched.c |    2 +-
 3 files changed, 246 insertions(+), 2 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c85d74c..8825e49 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include "blk-cgroup.h"
+#include <linux/genhd.h>
 
 static DEFINE_SPINLOCK(blkio_list_lock);
 static LIST_HEAD(blkio_list);
@@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
+					    struct blkio_policy_node *pn)
+{
+	list_add(&pn->node, &blkcg->policy_list);
+}
+
+/* Must be called with blkcg->lock held */
+static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
+{
+	list_del(&pn->node);
+}
+
+/* Must be called with blkcg->lock held */
+static struct blkio_policy_node *
+blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
+{
+	struct blkio_policy_node *pn;
+
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		if (pn->dev == dev)
+			return pn;
+	}
+
+	return NULL;
+}
+
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	struct blkio_group *blkg;
 	struct hlist_node *n;
 	struct blkio_policy_type *blkiop;
+	struct blkio_policy_node *pn;
 
 	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
 		return -EINVAL;
@@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
 	blkcg->weight = (unsigned int)val;
+
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		pn = blkio_policy_search_node(blkcg, blkg->dev);
+
+		if (pn)
+			continue;
+
 		list_for_each_entry(blkiop, &blkio_list, list)
 			blkiop->ops.blkio_update_group_weight_fn(blkg,
 					blkcg->weight);
@@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
-			unsigned long dequeue)
+					      unsigned long dequeue)
 {
 	blkg->dequeue += dequeue;
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
 #endif
 
+static int blkio_check_dev_num(dev_t dev)
+{
+	int part = 0;
+	struct gendisk *disk;
+
+	disk = get_gendisk(dev, &part);
+	if (!disk || part)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int blkio_policy_parse_and_set(char *buf,
+				      struct blkio_policy_node *newpn)
+{
+	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+	int ret;
+	unsigned long major, minor, temp;
+	int i = 0;
+	dev_t dev;
+
+	memset(s, 0, sizeof(s));
+
+	while ((p = strsep(&buf, " ")) != NULL) {
+		if (!*p)
+			continue;
+
+		s[i++] = p;
+
+		/* Prevent from inputing too many things */
+		if (i == 3)
+			break;
+	}
+
+	if (i != 2)
+		return -EINVAL;
+
+	p = strsep(&s[0], ":");
+	if (p != NULL)
+		major_s = p;
+	else
+		return -EINVAL;
+
+	minor_s = s[0];
+	if (!minor_s)
+		return -EINVAL;
+
+	ret = strict_strtoul(major_s, 10, &major);
+	if (ret)
+		return -EINVAL;
+
+	ret = strict_strtoul(minor_s, 10, &minor);
+	if (ret)
+		return -EINVAL;
+
+	dev = MKDEV(major, minor);
+
+	ret = blkio_check_dev_num(dev);
+	if (ret)
+		return ret;
+
+	newpn->dev = dev;
+
+	if (s[1] == NULL)
+		return -EINVAL;
+
+	ret = strict_strtoul(s[1], 10, &temp);
+	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+	    temp > BLKIO_WEIGHT_MAX)
+		return -EINVAL;
+
+	newpn->weight =  temp;
+
+	return 0;
+}
+
+unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+			      dev_t dev)
+{
+	struct blkio_policy_node *pn;
+
+	pn = blkio_policy_search_node(blkcg, dev);
+	if (pn)
+		return pn->weight;
+	else
+		return blkcg->weight;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_weight);
+
+static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
+				       const char *buffer)
+{
+	int ret = 0;
+	char *buf;
+	struct blkio_policy_node *newpn, *pn;
+	struct blkio_cgroup *blkcg;
+	struct blkio_group *blkg;
+	int keep_newpn = 0;
+	struct hlist_node *n;
+	struct blkio_policy_type *blkiop;
+
+	buf = kstrdup(buffer, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
+	if (!newpn) {
+		ret = -ENOMEM;
+		goto free_buf;
+	}
+
+	ret = blkio_policy_parse_and_set(buf, newpn);
+	if (ret)
+		goto free_newpn;
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	spin_lock_irq(&blkcg->lock);
+
+	pn = blkio_policy_search_node(blkcg, newpn->dev);
+	if (!pn) {
+		if (newpn->weight != 0) {
+			blkio_policy_insert_node(blkcg, newpn);
+			keep_newpn = 1;
+		}
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+
+	if (newpn->weight == 0) {
+		/* weight == 0 means deleteing a specific weight */
+		blkio_policy_delete_node(pn);
+		spin_unlock_irq(&blkcg->lock);
+		goto update_io_group;
+	}
+	spin_unlock_irq(&blkcg->lock);
+
+	pn->weight = newpn->weight;
+
+update_io_group:
+	/* update weight for each cfqg */
+	spin_lock(&blkio_list_lock);
+	spin_lock_irq(&blkcg->lock);
+
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		if (newpn->dev == blkg->dev) {
+			list_for_each_entry(blkiop, &blkio_list, list)
+				blkiop->ops.blkio_update_group_weight_fn(blkg,
+						newpn->weight ?
+						newpn->weight :
+						blkcg->weight);
+		}
+	}
+
+	spin_unlock_irq(&blkcg->lock);
+	spin_unlock(&blkio_list_lock);
+
+free_newpn:
+	if (!keep_newpn)
+		kfree(newpn);
+free_buf:
+	kfree(buf);
+	return ret;
+}
+
+static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
+				      struct seq_file *m)
+{
+	struct blkio_cgroup *blkcg;
+	struct blkio_policy_node *pn;
+
+	seq_printf(m, "dev\tweight\n");
+
+	blkcg = cgroup_to_blkio_cgroup(cgrp);
+	if (list_empty(&blkcg->policy_list))
+		goto out;
+
+	spin_lock_irq(&blkcg->lock);
+	list_for_each_entry(pn, &blkcg->policy_list, node) {
+		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
+			   MINOR(pn->dev), pn->weight);
+	}
+	spin_unlock_irq(&blkcg->lock);
+out:
+	return 0;
+}
+
 struct cftype blkio_files[] = {
 	{
+		.name = "weight_device",
+		.read_seq_string = blkiocg_weight_device_read,
+		.write_string = blkiocg_weight_device_write,
+		.max_write_len = 256,
+	},
+	{
 		.name = "weight",
 		.read_u64 = blkiocg_weight_read,
 		.write_u64 = blkiocg_weight_write,
@@ -220,6 +447,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_group *blkg;
 	void *key;
 	struct blkio_policy_type *blkiop;
+	struct blkio_policy_node *pn, *pntmp;
 
 	rcu_read_lock();
 remove_entry:
@@ -250,7 +478,12 @@ remove_entry:
 		blkiop->ops.blkio_unlink_group_fn(key, blkg);
 	spin_unlock(&blkio_list_lock);
 	goto remove_entry;
+
 done:
+	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
+		blkio_policy_delete_node(pn);
+		kfree(pn);
+	}
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
 	kfree(blkcg);
@@ -280,6 +513,7 @@ done:
 	spin_lock_init(&blkcg->lock);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 
+	INIT_LIST_HEAD(&blkcg->policy_list);
 	return &blkcg->css;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 84bf745..c8144a2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -22,6 +22,7 @@ struct blkio_cgroup {
 	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
+	struct list_head policy_list; /* list of blkio_policy_node */
 };
 
 struct blkio_group {
@@ -43,6 +44,15 @@ struct blkio_group {
 	unsigned long sectors;
 };
 
+struct blkio_policy_node {
+	struct list_head node;
+	dev_t dev;
+	unsigned int weight;
+};
+
+extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+				     dev_t dev);
+
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
 						unsigned int weight);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dee9d93..6945e9b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	if (!cfqg)
 		goto done;
 
-	cfqg->weight = blkcg->weight;
 	for_each_cfqg_st(cfqg, i, j, st)
 		*st = CFQ_RB_ROOT;
 	RB_CLEAR_NODE(&cfqg->rb_node);
@@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
 	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
 					MKDEV(major, minor));
+	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
 	/* Add group on cfqd list */
 	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-- 
1.5.4.rc3






^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/2 V3] io-controller: Document for blkio.weight_device
  2010-03-04 15:24             ` Vivek Goyal
  2010-03-05  0:31               ` Gui Jianfeng
  2010-03-05  2:25               ` [PATCH 1/2 V3] " Gui Jianfeng
@ 2010-03-05  2:26               ` Gui Jianfeng
  2010-03-05  2:44                 ` Takuya Yoshikawa
  2010-03-05 14:13                 ` Vivek Goyal
  2 siblings, 2 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-05  2:26 UTC (permalink / raw)
  To: Vivek Goyal, jens.axboe
  Cc: Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

Here is the document for blkio.weight_device

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
---
 Documentation/cgroups/blkio-controller.txt |   35 +++++++++++++++++++++++++++-
 1 files changed, 34 insertions(+), 1 deletions(-)

diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 630879c..e551c85 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -76,7 +76,9 @@ CONFIG_DEBUG_BLK_CGROUP
 Details of cgroup files
 =======================
 - blkio.weight
-	- Specifies per cgroup weight.
+	- Specifies per cgroup weight. This is default weight of the group
+	  on all the devices until and unless overridden by per device rule.
+	  (See blkio.weight_device).
 
 	  Currently allowed range of weights is from 100 to 1000.
 
@@ -99,6 +101,37 @@ Details of cgroup files
 	  and minor number of the device and third field specifies the number
 	  of times a group was dequeued from a particular device.
 
+- blkio.weight_device
+	- One can specify per cgroup per device rules using this interface.
+	  These rules override the default value of group weight as specified
+	  by blkio.weight.
+
+	  Following is the format.
+
+	  #echo dev_maj:dev_minor weight > /pach/to/cgroup/blkio.weight_device
+
+	  weight=0 means removing weight on a given device.
+
+	  Examples:
+	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
+	  # echo 8:16 300 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:16    300
+
+	  Configure weight=500 on /dev/sda (8:0) in this cgroup
+	  # echo 8:0 500 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:0     500
+	  8:16    300
+
+	  Remove specific weight for /dev/sda in this cgroup
+	  # echo 8:0 0 > blkio.weight_device
+	  # cat blkio.weight_device
+	  dev     weight
+	  8:16    300
+
 CFQ sysfs tunable
 =================
 /sys/block/<disk>/queue/iosched/group_isolation
-- 
1.5.4.rc3





^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2 V3] io-controller: Document for blkio.weight_device
  2010-03-05  2:44                 ` Takuya Yoshikawa
@ 2010-03-05  2:42                   ` Gui Jianfeng
  0 siblings, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-05  2:42 UTC (permalink / raw)
  To: Takuya Yoshikawa
  Cc: Vivek Goyal, jens.axboe, Chad Talbott, linux-kernel,
	Nauman Rafique, Li Zefan

Takuya Yoshikawa wrote:
> Hi, thank you for fixing ...
> 
> Gui Jianfeng wrote:
>> +- blkio.weight_device
>> +    - One can specify per cgroup per device rules using this interface.
>> +      These rules override the default value of group weight as
>> specified
>> +      by blkio.weight.
>> +
>> +      Following is the format.
>> +
>> +      #echo dev_maj:dev_minor weight >
>> /pach/to/cgroup/blkio.weight_device
> 
> You deleted wrong character!  We need 't' not 'c'.

OOPS, Sorry :(


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2 V3] io-controller: Document for blkio.weight_device
  2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
@ 2010-03-05  2:44                 ` Takuya Yoshikawa
  2010-03-05  2:42                   ` Gui Jianfeng
  2010-03-05 14:13                 ` Vivek Goyal
  1 sibling, 1 reply; 40+ messages in thread
From: Takuya Yoshikawa @ 2010-03-05  2:44 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: Vivek Goyal, jens.axboe, Chad Talbott, linux-kernel,
	Nauman Rafique, Li Zefan

Hi, thank you for fixing ...

Gui Jianfeng wrote:
> +- blkio.weight_device
> +	- One can specify per cgroup per device rules using this interface.
> +	  These rules override the default value of group weight as specified
> +	  by blkio.weight.
> +
> +	  Following is the format.
> +
> +	  #echo dev_maj:dev_minor weight > /pach/to/cgroup/blkio.weight_device

You deleted wrong character!  We need 't' not 'c'.

Thanks,
   Takuya

> +
> +	  weight=0 means removing weight on a given device.
> +
> +	  Examples:
> +	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
> +	  # echo 8:16 300 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
> +	  Configure weight=500 on /dev/sda (8:0) in this cgroup
> +	  # echo 8:0 500 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:0     500
> +	  8:16    300
> +
> +	  Remove specific weight for /dev/sda in this cgroup
> +	  # echo 8:0 0 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
>  CFQ sysfs tunable
>  =================
>  /sys/block/<disk>/queue/iosched/group_isolation


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-05  2:25               ` [PATCH 1/2 V3] " Gui Jianfeng
@ 2010-03-05 14:13                 ` Vivek Goyal
  2010-03-08 19:39                   ` Nauman Rafique
  2010-03-25  6:28                   ` Gui Jianfeng
  0 siblings, 2 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-05 14:13 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.weight_device" is introduced to
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by "blkio.weight_device"
> 
> You can use the following format to assigned specific weight for a given
> device:
> 
> major:minor represents device number.
> 
> And you can remove a specific weight as following:
> 
> V1->V2 changes:
> - use user interface "weight_device" instead of "policy" suggested by Vivek
> - rename some struct suggested by Vivek
> - rebase to 2.6-block "for-linus" branch
> - remove an useless list_empty check pointed out by Li Zefan
> - some trivial typo fix
> 
> V2->V3 changes:
> - Move policy_*_node() functions up to get rid of forward declarations
> - rename related functions by adding prefix "blkio_"
> 

Thanks for the changes Gui. Looks good to me.

Acked-by: Vivek Goyal <vgoyal@redhat.com>

Vivek

> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> ---
>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  block/blk-cgroup.h  |   10 ++
>  block/cfq-iosched.c |    2 +-
>  3 files changed, 246 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index c85d74c..8825e49 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -16,6 +16,7 @@
>  #include <linux/module.h>
>  #include <linux/err.h>
>  #include "blk-cgroup.h"
> +#include <linux/genhd.h>
>  
>  static DEFINE_SPINLOCK(blkio_list_lock);
>  static LIST_HEAD(blkio_list);
> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>  
> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
> +					    struct blkio_policy_node *pn)
> +{
> +	list_add(&pn->node, &blkcg->policy_list);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
> +{
> +	list_del(&pn->node);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static struct blkio_policy_node *
> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
> +{
> +	struct blkio_policy_node *pn;
> +
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		if (pn->dev == dev)
> +			return pn;
> +	}
> +
> +	return NULL;
> +}
> +
>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>  {
>  	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	struct blkio_group *blkg;
>  	struct hlist_node *n;
>  	struct blkio_policy_type *blkiop;
> +	struct blkio_policy_node *pn;
>  
>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>  		return -EINVAL;
> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	spin_lock(&blkio_list_lock);
>  	spin_lock_irq(&blkcg->lock);
>  	blkcg->weight = (unsigned int)val;
> +
>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		pn = blkio_policy_search_node(blkcg, blkg->dev);
> +
> +		if (pn)
> +			continue;
> +
>  		list_for_each_entry(blkiop, &blkio_list, list)
>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>  					blkcg->weight);
> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>  
>  #ifdef CONFIG_DEBUG_BLK_CGROUP
>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> -			unsigned long dequeue)
> +					      unsigned long dequeue)
>  {
>  	blkg->dequeue += dequeue;
>  }
>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>  #endif
>  
> +static int blkio_check_dev_num(dev_t dev)
> +{
> +	int part = 0;
> +	struct gendisk *disk;
> +
> +	disk = get_gendisk(dev, &part);
> +	if (!disk || part)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int blkio_policy_parse_and_set(char *buf,
> +				      struct blkio_policy_node *newpn)
> +{
> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> +	int ret;
> +	unsigned long major, minor, temp;
> +	int i = 0;
> +	dev_t dev;
> +
> +	memset(s, 0, sizeof(s));
> +
> +	while ((p = strsep(&buf, " ")) != NULL) {
> +		if (!*p)
> +			continue;
> +
> +		s[i++] = p;
> +
> +		/* Prevent from inputing too many things */
> +		if (i == 3)
> +			break;
> +	}
> +
> +	if (i != 2)
> +		return -EINVAL;
> +
> +	p = strsep(&s[0], ":");
> +	if (p != NULL)
> +		major_s = p;
> +	else
> +		return -EINVAL;
> +
> +	minor_s = s[0];
> +	if (!minor_s)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(major_s, 10, &major);
> +	if (ret)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(minor_s, 10, &minor);
> +	if (ret)
> +		return -EINVAL;
> +
> +	dev = MKDEV(major, minor);
> +
> +	ret = blkio_check_dev_num(dev);
> +	if (ret)
> +		return ret;
> +
> +	newpn->dev = dev;
> +
> +	if (s[1] == NULL)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(s[1], 10, &temp);
> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +	    temp > BLKIO_WEIGHT_MAX)
> +		return -EINVAL;
> +
> +	newpn->weight =  temp;
> +
> +	return 0;
> +}
> +
> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +			      dev_t dev)
> +{
> +	struct blkio_policy_node *pn;
> +
> +	pn = blkio_policy_search_node(blkcg, dev);
> +	if (pn)
> +		return pn->weight;
> +	else
> +		return blkcg->weight;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> +
> +static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
> +				       const char *buffer)
> +{
> +	int ret = 0;
> +	char *buf;
> +	struct blkio_policy_node *newpn, *pn;
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_group *blkg;
> +	int keep_newpn = 0;
> +	struct hlist_node *n;
> +	struct blkio_policy_type *blkiop;
> +
> +	buf = kstrdup(buffer, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> +	if (!newpn) {
> +		ret = -ENOMEM;
> +		goto free_buf;
> +	}
> +
> +	ret = blkio_policy_parse_and_set(buf, newpn);
> +	if (ret)
> +		goto free_newpn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	spin_lock_irq(&blkcg->lock);
> +
> +	pn = blkio_policy_search_node(blkcg, newpn->dev);
> +	if (!pn) {
> +		if (newpn->weight != 0) {
> +			blkio_policy_insert_node(blkcg, newpn);
> +			keep_newpn = 1;
> +		}
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +
> +	if (newpn->weight == 0) {
> +		/* weight == 0 means deleteing a specific weight */
> +		blkio_policy_delete_node(pn);
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +
> +	pn->weight = newpn->weight;
> +
> +update_io_group:
> +	/* update weight for each cfqg */
> +	spin_lock(&blkio_list_lock);
> +	spin_lock_irq(&blkcg->lock);
> +
> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		if (newpn->dev == blkg->dev) {
> +			list_for_each_entry(blkiop, &blkio_list, list)
> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> +						newpn->weight ?
> +						newpn->weight :
> +						blkcg->weight);
> +		}
> +	}
> +
> +	spin_unlock_irq(&blkcg->lock);
> +	spin_unlock(&blkio_list_lock);
> +
> +free_newpn:
> +	if (!keep_newpn)
> +		kfree(newpn);
> +free_buf:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
> +				      struct seq_file *m)
> +{
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_policy_node *pn;
> +
> +	seq_printf(m, "dev\tweight\n");
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +	if (list_empty(&blkcg->policy_list))
> +		goto out;
> +
> +	spin_lock_irq(&blkcg->lock);
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +			   MINOR(pn->dev), pn->weight);
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +out:
> +	return 0;
> +}
> +
>  struct cftype blkio_files[] = {
>  	{
> +		.name = "weight_device",
> +		.read_seq_string = blkiocg_weight_device_read,
> +		.write_string = blkiocg_weight_device_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.read_u64 = blkiocg_weight_read,
>  		.write_u64 = blkiocg_weight_write,
> @@ -220,6 +447,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>  	struct blkio_group *blkg;
>  	void *key;
>  	struct blkio_policy_type *blkiop;
> +	struct blkio_policy_node *pn, *pntmp;
>  
>  	rcu_read_lock();
>  remove_entry:
> @@ -250,7 +478,12 @@ remove_entry:
>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>  	spin_unlock(&blkio_list_lock);
>  	goto remove_entry;
> +
>  done:
> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> +		blkio_policy_delete_node(pn);
> +		kfree(pn);
> +	}
>  	free_css_id(&blkio_subsys, &blkcg->css);
>  	rcu_read_unlock();
>  	kfree(blkcg);
> @@ -280,6 +513,7 @@ done:
>  	spin_lock_init(&blkcg->lock);
>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>  
> +	INIT_LIST_HEAD(&blkcg->policy_list);
>  	return &blkcg->css;
>  }
>  
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 84bf745..c8144a2 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>  	unsigned int weight;
>  	spinlock_t lock;
>  	struct hlist_head blkg_list;
> +	struct list_head policy_list; /* list of blkio_policy_node */
>  };
>  
>  struct blkio_group {
> @@ -43,6 +44,15 @@ struct blkio_group {
>  	unsigned long sectors;
>  };
>  
> +struct blkio_policy_node {
> +	struct list_head node;
> +	dev_t dev;
> +	unsigned int weight;
> +};
> +
> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				     dev_t dev);
> +
>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>  						unsigned int weight);
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index dee9d93..6945e9b 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	if (!cfqg)
>  		goto done;
>  
> -	cfqg->weight = blkcg->weight;
>  	for_each_cfqg_st(cfqg, i, j, st)
>  		*st = CFQ_RB_ROOT;
>  	RB_CLEAR_NODE(&cfqg->rb_node);
> @@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>  					MKDEV(major, minor));
> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>  
>  	/* Add group on cfqd list */
>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> -- 
> 1.5.4.rc3
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2 V3] io-controller: Document for blkio.weight_device
  2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
  2010-03-05  2:44                 ` Takuya Yoshikawa
@ 2010-03-05 14:13                 ` Vivek Goyal
  1 sibling, 0 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-05 14:13 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: jens.axboe, Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

On Fri, Mar 05, 2010 at 10:26:39AM +0800, Gui Jianfeng wrote:
> Here is the document for blkio.weight_device
> 
> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>

Looks good to me.

Acked-by: Vivek Goyal <vgoyal@redhat.com>

Vivek

> ---
>  Documentation/cgroups/blkio-controller.txt |   35 +++++++++++++++++++++++++++-
>  1 files changed, 34 insertions(+), 1 deletions(-)
> 
> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
> index 630879c..e551c85 100644
> --- a/Documentation/cgroups/blkio-controller.txt
> +++ b/Documentation/cgroups/blkio-controller.txt
> @@ -76,7 +76,9 @@ CONFIG_DEBUG_BLK_CGROUP
>  Details of cgroup files
>  =======================
>  - blkio.weight
> -	- Specifies per cgroup weight.
> +	- Specifies per cgroup weight. This is default weight of the group
> +	  on all the devices until and unless overridden by per device rule.
> +	  (See blkio.weight_device).
>  
>  	  Currently allowed range of weights is from 100 to 1000.
>  
> @@ -99,6 +101,37 @@ Details of cgroup files
>  	  and minor number of the device and third field specifies the number
>  	  of times a group was dequeued from a particular device.
>  
> +- blkio.weight_device
> +	- One can specify per cgroup per device rules using this interface.
> +	  These rules override the default value of group weight as specified
> +	  by blkio.weight.
> +
> +	  Following is the format.
> +
> +	  #echo dev_maj:dev_minor weight > /pach/to/cgroup/blkio.weight_device
> +
> +	  weight=0 means removing weight on a given device.
> +
> +	  Examples:
> +	  Configure weight=300 on /dev/sdb (8:16) in this cgroup
> +	  # echo 8:16 300 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
> +	  Configure weight=500 on /dev/sda (8:0) in this cgroup
> +	  # echo 8:0 500 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:0     500
> +	  8:16    300
> +
> +	  Remove specific weight for /dev/sda in this cgroup
> +	  # echo 8:0 0 > blkio.weight_device
> +	  # cat blkio.weight_device
> +	  dev     weight
> +	  8:16    300
> +
>  CFQ sysfs tunable
>  =================
>  /sys/block/<disk>/queue/iosched/group_isolation
> -- 
> 1.5.4.rc3
> 
> 
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-05 14:13                 ` Vivek Goyal
@ 2010-03-08 19:39                   ` Nauman Rafique
  2010-03-08 23:09                     ` Vivek Goyal
  2010-03-09 20:16                     ` Vivek Goyal
  2010-03-25  6:28                   ` Gui Jianfeng
  1 sibling, 2 replies; 40+ messages in thread
From: Nauman Rafique @ 2010-03-08 19:39 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Gui Jianfeng, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Fri, Mar 5, 2010 at 6:13 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>> Currently, IO Controller makes use of blkio.weight to assign weight for
>> all devices. Here a new user interface "blkio.weight_device" is introduced to
>> assign different weights for different devices. blkio.weight becomes the
>> default value for devices which are not configured by "blkio.weight_device"
>>
>> You can use the following format to assigned specific weight for a given
>> device:
>>
>> major:minor represents device number.
>>
>> And you can remove a specific weight as following:
>>
>> V1->V2 changes:
>> - use user interface "weight_device" instead of "policy" suggested by Vivek
>> - rename some struct suggested by Vivek
>> - rebase to 2.6-block "for-linus" branch
>> - remove an useless list_empty check pointed out by Li Zefan
>> - some trivial typo fix
>>
>> V2->V3 changes:
>> - Move policy_*_node() functions up to get rid of forward declarations
>> - rename related functions by adding prefix "blkio_"
>>
>
> Thanks for the changes Gui. Looks good to me.
>
> Acked-by: Vivek Goyal <vgoyal@redhat.com>
>
> Vivek
>
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  block/blk-cgroup.h  |   10 ++
>>  block/cfq-iosched.c |    2 +-
>>  3 files changed, 246 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index c85d74c..8825e49 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/module.h>
>>  #include <linux/err.h>
>>  #include "blk-cgroup.h"
>> +#include <linux/genhd.h>
>>
>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>  static LIST_HEAD(blkio_list);
>> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>
>> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
>> +                                         struct blkio_policy_node *pn)
>> +{
>> +     list_add(&pn->node, &blkcg->policy_list);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
>> +{
>> +     list_del(&pn->node);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static struct blkio_policy_node *
>> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
>> +{
>> +     struct blkio_policy_node *pn;
>> +
>> +     list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +             if (pn->dev == dev)
>> +                     return pn;
>> +     }
>> +
>> +     return NULL;
>> +}
>> +
>>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>>  {
>>       return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
>> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>       struct blkio_group *blkg;
>>       struct hlist_node *n;
>>       struct blkio_policy_type *blkiop;
>> +     struct blkio_policy_node *pn;
>>
>>       if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>               return -EINVAL;
>> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>       spin_lock(&blkio_list_lock);
>>       spin_lock_irq(&blkcg->lock);
>>       blkcg->weight = (unsigned int)val;
>> +
>>       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +             pn = blkio_policy_search_node(blkcg, blkg->dev);
>> +
>> +             if (pn)
>> +                     continue;
>> +
>>               list_for_each_entry(blkiop, &blkio_list, list)
>>                       blkiop->ops.blkio_update_group_weight_fn(blkg,
>>                                       blkcg->weight);
>> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>>
>>  #ifdef CONFIG_DEBUG_BLK_CGROUP
>>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> -                     unsigned long dequeue)
>> +                                           unsigned long dequeue)
>>  {
>>       blkg->dequeue += dequeue;
>>  }
>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>  #endif
>>
>> +static int blkio_check_dev_num(dev_t dev)
>> +{
>> +     int part = 0;
>> +     struct gendisk *disk;
>> +
>> +     disk = get_gendisk(dev, &part);
>> +     if (!disk || part)
>> +             return -ENODEV;
>> +
>> +     return 0;
>> +}
>> +
>> +static int blkio_policy_parse_and_set(char *buf,
>> +                                   struct blkio_policy_node *newpn)
>> +{
>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> +     int ret;
>> +     unsigned long major, minor, temp;
>> +     int i = 0;
>> +     dev_t dev;
>> +
>> +     memset(s, 0, sizeof(s));
>> +
>> +     while ((p = strsep(&buf, " ")) != NULL) {
>> +             if (!*p)
>> +                     continue;
>> +
>> +             s[i++] = p;
>> +
>> +             /* Prevent from inputing too many things */
>> +             if (i == 3)
>> +                     break;
>> +     }
>> +
>> +     if (i != 2)
>> +             return -EINVAL;
>> +
>> +     p = strsep(&s[0], ":");
>> +     if (p != NULL)
>> +             major_s = p;
>> +     else
>> +             return -EINVAL;
>> +
>> +     minor_s = s[0];
>> +     if (!minor_s)
>> +             return -EINVAL;
>> +
>> +     ret = strict_strtoul(major_s, 10, &major);
>> +     if (ret)
>> +             return -EINVAL;
>> +
>> +     ret = strict_strtoul(minor_s, 10, &minor);
>> +     if (ret)
>> +             return -EINVAL;
>> +
>> +     dev = MKDEV(major, minor);

I am not quite sure if exposing a mojor,minor number is the best
interface that can be exposed to user space. How about actual disk
names like sda, sdb, .. etc? The only problem I see there is that it
seems tricky to get to these disk names from within the block layer.
"struct request_queue" has a pointer to backing_dev which has a device
from which we can get major,minor. But in order to get to disk name,
we would have to call get_gendisk which can hold a semaphore. Is this
the reason for us going with major,minor as a user interface to
specify a disk? I bet there are good reasons for us not keeping a
pointer to "struct gendisk" from "struct request_queue". If we could
keep that pointer, our user interface could be very easily modified to
be the disk name like sda, sdb, etc.

>> +
>> +     ret = blkio_check_dev_num(dev);
>> +     if (ret)
>> +             return ret;
>> +
>> +     newpn->dev = dev;
>> +
>> +     if (s[1] == NULL)
>> +             return -EINVAL;
>> +
>> +     ret = strict_strtoul(s[1], 10, &temp);
>> +     if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
>> +         temp > BLKIO_WEIGHT_MAX)
>> +             return -EINVAL;
>> +
>> +     newpn->weight =  temp;
>> +
>> +     return 0;
>> +}
>> +
>> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +                           dev_t dev)
>> +{
>> +     struct blkio_policy_node *pn;
>> +
>> +     pn = blkio_policy_search_node(blkcg, dev);
>> +     if (pn)
>> +             return pn->weight;
>> +     else
>> +             return blkcg->weight;
>> +}
>> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
>> +
>> +static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
>> +                                    const char *buffer)
>> +{
>> +     int ret = 0;
>> +     char *buf;
>> +     struct blkio_policy_node *newpn, *pn;
>> +     struct blkio_cgroup *blkcg;
>> +     struct blkio_group *blkg;
>> +     int keep_newpn = 0;
>> +     struct hlist_node *n;
>> +     struct blkio_policy_type *blkiop;
>> +
>> +     buf = kstrdup(buffer, GFP_KERNEL);
>> +     if (!buf)
>> +             return -ENOMEM;
>> +
>> +     newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
>> +     if (!newpn) {
>> +             ret = -ENOMEM;
>> +             goto free_buf;
>> +     }
>> +
>> +     ret = blkio_policy_parse_and_set(buf, newpn);
>> +     if (ret)
>> +             goto free_newpn;
>> +
>> +     blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +     spin_lock_irq(&blkcg->lock);
>> +
>> +     pn = blkio_policy_search_node(blkcg, newpn->dev);
>> +     if (!pn) {
>> +             if (newpn->weight != 0) {
>> +                     blkio_policy_insert_node(blkcg, newpn);
>> +                     keep_newpn = 1;
>> +             }
>> +             spin_unlock_irq(&blkcg->lock);
>> +             goto update_io_group;
>> +     }
>> +
>> +     if (newpn->weight == 0) {
>> +             /* weight == 0 means deleteing a specific weight */
>> +             blkio_policy_delete_node(pn);
>> +             spin_unlock_irq(&blkcg->lock);
>> +             goto update_io_group;
>> +     }
>> +     spin_unlock_irq(&blkcg->lock);
>> +
>> +     pn->weight = newpn->weight;
>> +
>> +update_io_group:
>> +     /* update weight for each cfqg */
>> +     spin_lock(&blkio_list_lock);
>> +     spin_lock_irq(&blkcg->lock);
>> +
>> +     hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +             if (newpn->dev == blkg->dev) {
>> +                     list_for_each_entry(blkiop, &blkio_list, list)
>> +                             blkiop->ops.blkio_update_group_weight_fn(blkg,
>> +                                             newpn->weight ?
>> +                                             newpn->weight :
>> +                                             blkcg->weight);
>> +             }
>> +     }
>> +
>> +     spin_unlock_irq(&blkcg->lock);
>> +     spin_unlock(&blkio_list_lock);
>> +
>> +free_newpn:
>> +     if (!keep_newpn)
>> +             kfree(newpn);
>> +free_buf:
>> +     kfree(buf);
>> +     return ret;
>> +}
>> +
>> +static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
>> +                                   struct seq_file *m)
>> +{
>> +     struct blkio_cgroup *blkcg;
>> +     struct blkio_policy_node *pn;
>> +
>> +     seq_printf(m, "dev\tweight\n");
>> +
>> +     blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +     if (list_empty(&blkcg->policy_list))
>> +             goto out;
>> +
>> +     spin_lock_irq(&blkcg->lock);
>> +     list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +             seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
>> +                        MINOR(pn->dev), pn->weight);
>> +     }
>> +     spin_unlock_irq(&blkcg->lock);
>> +out:
>> +     return 0;
>> +}
>> +
>>  struct cftype blkio_files[] = {
>>       {
>> +             .name = "weight_device",
>> +             .read_seq_string = blkiocg_weight_device_read,
>> +             .write_string = blkiocg_weight_device_write,
>> +             .max_write_len = 256,
>> +     },
>> +     {
>>               .name = "weight",
>>               .read_u64 = blkiocg_weight_read,
>>               .write_u64 = blkiocg_weight_write,
>> @@ -220,6 +447,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>>       struct blkio_group *blkg;
>>       void *key;
>>       struct blkio_policy_type *blkiop;
>> +     struct blkio_policy_node *pn, *pntmp;
>>
>>       rcu_read_lock();
>>  remove_entry:
>> @@ -250,7 +478,12 @@ remove_entry:
>>               blkiop->ops.blkio_unlink_group_fn(key, blkg);
>>       spin_unlock(&blkio_list_lock);
>>       goto remove_entry;
>> +
>>  done:
>> +     list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
>> +             blkio_policy_delete_node(pn);
>> +             kfree(pn);
>> +     }
>>       free_css_id(&blkio_subsys, &blkcg->css);
>>       rcu_read_unlock();
>>       kfree(blkcg);
>> @@ -280,6 +513,7 @@ done:
>>       spin_lock_init(&blkcg->lock);
>>       INIT_HLIST_HEAD(&blkcg->blkg_list);
>>
>> +     INIT_LIST_HEAD(&blkcg->policy_list);
>>       return &blkcg->css;
>>  }
>>
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 84bf745..c8144a2 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>>       unsigned int weight;
>>       spinlock_t lock;
>>       struct hlist_head blkg_list;
>> +     struct list_head policy_list; /* list of blkio_policy_node */
>>  };
>>
>>  struct blkio_group {
>> @@ -43,6 +44,15 @@ struct blkio_group {
>>       unsigned long sectors;
>>  };
>>
>> +struct blkio_policy_node {
>> +     struct list_head node;
>> +     dev_t dev;
>> +     unsigned int weight;
>> +};
>> +
>> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +                                  dev_t dev);
>> +
>>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>>                                               unsigned int weight);
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index dee9d93..6945e9b 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>       if (!cfqg)
>>               goto done;
>>
>> -     cfqg->weight = blkcg->weight;
>>       for_each_cfqg_st(cfqg, i, j, st)
>>               *st = CFQ_RB_ROOT;
>>       RB_CLEAR_NODE(&cfqg->rb_node);
>> @@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>       sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>>       blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>>                                       MKDEV(major, minor));
>> +     cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>>
>>       /* Add group on cfqd list */
>>       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>> --
>> 1.5.4.rc3
>>
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-08 19:39                   ` Nauman Rafique
@ 2010-03-08 23:09                     ` Vivek Goyal
  2010-03-09  1:52                       ` Gui Jianfeng
  2010-03-09 20:16                     ` Vivek Goyal
  1 sibling, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-08 23:09 UTC (permalink / raw)
  To: Nauman Rafique
  Cc: Gui Jianfeng, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Mon, Mar 08, 2010 at 11:39:54AM -0800, Nauman Rafique wrote:
> On Fri, Mar 5, 2010 at 6:13 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
> >> Currently, IO Controller makes use of blkio.weight to assign weight for
> >> all devices. Here a new user interface "blkio.weight_device" is introduced to
> >> assign different weights for different devices. blkio.weight becomes the
> >> default value for devices which are not configured by "blkio.weight_device"
> >>
> >> You can use the following format to assigned specific weight for a given
> >> device:
> >>
> >> major:minor represents device number.
> >>
> >> And you can remove a specific weight as following:
> >>
> >> V1->V2 changes:
> >> - use user interface "weight_device" instead of "policy" suggested by Vivek
> >> - rename some struct suggested by Vivek
> >> - rebase to 2.6-block "for-linus" branch
> >> - remove an useless list_empty check pointed out by Li Zefan
> >> - some trivial typo fix
> >>
> >> V2->V3 changes:
> >> - Move policy_*_node() functions up to get rid of forward declarations
> >> - rename related functions by adding prefix "blkio_"
> >>
> >
> > Thanks for the changes Gui. Looks good to me.
> >
> > Acked-by: Vivek Goyal <vgoyal@redhat.com>
> >
> > Vivek
> >
> >> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
> >> ---
> >>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >>  block/blk-cgroup.h  |   10 ++
> >>  block/cfq-iosched.c |    2 +-
> >>  3 files changed, 246 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> >> index c85d74c..8825e49 100644
> >> --- a/block/blk-cgroup.c
> >> +++ b/block/blk-cgroup.c
> >> @@ -16,6 +16,7 @@
> >>  #include <linux/module.h>
> >>  #include <linux/err.h>
> >>  #include "blk-cgroup.h"
> >> +#include <linux/genhd.h>
> >>
> >>  static DEFINE_SPINLOCK(blkio_list_lock);
> >>  static LIST_HEAD(blkio_list);
> >> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
> >>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
> >>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
> >>
> >> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
> >> +                                         struct blkio_policy_node *pn)
> >> +{
> >> +     list_add(&pn->node, &blkcg->policy_list);
> >> +}
> >> +
> >> +/* Must be called with blkcg->lock held */
> >> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
> >> +{
> >> +     list_del(&pn->node);
> >> +}
> >> +
> >> +/* Must be called with blkcg->lock held */
> >> +static struct blkio_policy_node *
> >> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
> >> +{
> >> +     struct blkio_policy_node *pn;
> >> +
> >> +     list_for_each_entry(pn, &blkcg->policy_list, node) {
> >> +             if (pn->dev == dev)
> >> +                     return pn;
> >> +     }
> >> +
> >> +     return NULL;
> >> +}
> >> +
> >>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> >>  {
> >>       return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
> >> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >>       struct blkio_group *blkg;
> >>       struct hlist_node *n;
> >>       struct blkio_policy_type *blkiop;
> >> +     struct blkio_policy_node *pn;
> >>
> >>       if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
> >>               return -EINVAL;
> >> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> >>       spin_lock(&blkio_list_lock);
> >>       spin_lock_irq(&blkcg->lock);
> >>       blkcg->weight = (unsigned int)val;
> >> +
> >>       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> >> +             pn = blkio_policy_search_node(blkcg, blkg->dev);
> >> +
> >> +             if (pn)
> >> +                     continue;
> >> +
> >>               list_for_each_entry(blkiop, &blkio_list, list)
> >>                       blkiop->ops.blkio_update_group_weight_fn(blkg,
> >>                                       blkcg->weight);
> >> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
> >>
> >>  #ifdef CONFIG_DEBUG_BLK_CGROUP
> >>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
> >> -                     unsigned long dequeue)
> >> +                                           unsigned long dequeue)
> >>  {
> >>       blkg->dequeue += dequeue;
> >>  }
> >>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
> >>  #endif
> >>
> >> +static int blkio_check_dev_num(dev_t dev)
> >> +{
> >> +     int part = 0;
> >> +     struct gendisk *disk;
> >> +
> >> +     disk = get_gendisk(dev, &part);
> >> +     if (!disk || part)
> >> +             return -ENODEV;
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static int blkio_policy_parse_and_set(char *buf,
> >> +                                   struct blkio_policy_node *newpn)
> >> +{
> >> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> >> +     int ret;
> >> +     unsigned long major, minor, temp;
> >> +     int i = 0;
> >> +     dev_t dev;
> >> +
> >> +     memset(s, 0, sizeof(s));
> >> +
> >> +     while ((p = strsep(&buf, " ")) != NULL) {
> >> +             if (!*p)
> >> +                     continue;
> >> +
> >> +             s[i++] = p;
> >> +
> >> +             /* Prevent from inputing too many things */
> >> +             if (i == 3)
> >> +                     break;
> >> +     }
> >> +
> >> +     if (i != 2)
> >> +             return -EINVAL;
> >> +
> >> +     p = strsep(&s[0], ":");
> >> +     if (p != NULL)
> >> +             major_s = p;
> >> +     else
> >> +             return -EINVAL;
> >> +
> >> +     minor_s = s[0];
> >> +     if (!minor_s)
> >> +             return -EINVAL;
> >> +
> >> +     ret = strict_strtoul(major_s, 10, &major);
> >> +     if (ret)
> >> +             return -EINVAL;
> >> +
> >> +     ret = strict_strtoul(minor_s, 10, &minor);
> >> +     if (ret)
> >> +             return -EINVAL;
> >> +
> >> +     dev = MKDEV(major, minor);
> 
> I am not quite sure if exposing a mojor,minor number is the best
> interface that can be exposed to user space. How about actual disk
> names like sda, sdb, .. etc? The only problem I see there is that it
> seems tricky to get to these disk names from within the block layer.
> "struct request_queue" has a pointer to backing_dev which has a device
> from which we can get major,minor. But in order to get to disk name,
> we would have to call get_gendisk which can hold a semaphore. Is this
> the reason for us going with major,minor as a user interface to
> specify a disk? I bet there are good reasons for us not keeping a
> pointer to "struct gendisk" from "struct request_queue". If we could
> keep that pointer, our user interface could be very easily modified to
> be the disk name like sda, sdb, etc.

That's a good question. Why not use device names instead of device
numbers? From user's perspective, device names will be more intutive
to use.

At the same time, will it look odd to handle devices with their names as WWID.

/dev/mapper/3600508b400105df70000e000026f0000

Though I see that there is an alternate way to address the same device
like /dev/dm-2 etc.

So from user's perspective I think it will be more intutive to handle
disk names instead of numbers.

Gui, did you forsee issues in implementing disk names?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-08 23:09                     ` Vivek Goyal
@ 2010-03-09  1:52                       ` Gui Jianfeng
  2010-03-09 19:03                         ` Vivek Goyal
  0 siblings, 1 reply; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-09  1:52 UTC (permalink / raw)
  To: Vivek Goyal, Nauman Rafique, jens.axboe
  Cc: Chad Talbott, linux-kernel, Li Zefan

Vivek Goyal wrote:
> On Mon, Mar 08, 2010 at 11:39:54AM -0800, Nauman Rafique wrote:
>> On Fri, Mar 5, 2010 at 6:13 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
>>> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>>>> Currently, IO Controller makes use of blkio.weight to assign weight for
>>>> all devices. Here a new user interface "blkio.weight_device" is introduced to
>>>> assign different weights for different devices. blkio.weight becomes the
>>>> default value for devices which are not configured by "blkio.weight_device"
>>>>
>>>> You can use the following format to assigned specific weight for a given
>>>> device:
>>>>
>>>> major:minor represents device number.
>>>>
>>>> And you can remove a specific weight as following:
>>>>
>>>> V1->V2 changes:
>>>> - use user interface "weight_device" instead of "policy" suggested by Vivek
>>>> - rename some struct suggested by Vivek
>>>> - rebase to 2.6-block "for-linus" branch
>>>> - remove an useless list_empty check pointed out by Li Zefan
>>>> - some trivial typo fix
>>>>
>>>> V2->V3 changes:
>>>> - Move policy_*_node() functions up to get rid of forward declarations
>>>> - rename related functions by adding prefix "blkio_"
>>>>
>>> Thanks for the changes Gui. Looks good to me.
>>>
>>> Acked-by: Vivek Goyal <vgoyal@redhat.com>
>>>
>>> Vivek
>>>
>>>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>>>> ---
>>>>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>  block/blk-cgroup.h  |   10 ++
>>>>  block/cfq-iosched.c |    2 +-
>>>>  3 files changed, 246 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>>>> index c85d74c..8825e49 100644
>>>> --- a/block/blk-cgroup.c
>>>> +++ b/block/blk-cgroup.c
>>>> @@ -16,6 +16,7 @@
>>>>  #include <linux/module.h>
>>>>  #include <linux/err.h>
>>>>  #include "blk-cgroup.h"
>>>> +#include <linux/genhd.h>
>>>>
>>>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>>>  static LIST_HEAD(blkio_list);
>>>> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
>>>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>>>
>>>> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
>>>> +                                         struct blkio_policy_node *pn)
>>>> +{
>>>> +     list_add(&pn->node, &blkcg->policy_list);
>>>> +}
>>>> +
>>>> +/* Must be called with blkcg->lock held */
>>>> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
>>>> +{
>>>> +     list_del(&pn->node);
>>>> +}
>>>> +
>>>> +/* Must be called with blkcg->lock held */
>>>> +static struct blkio_policy_node *
>>>> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
>>>> +{
>>>> +     struct blkio_policy_node *pn;
>>>> +
>>>> +     list_for_each_entry(pn, &blkcg->policy_list, node) {
>>>> +             if (pn->dev == dev)
>>>> +                     return pn;
>>>> +     }
>>>> +
>>>> +     return NULL;
>>>> +}
>>>> +
>>>>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>>>>  {
>>>>       return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
>>>> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>>>       struct blkio_group *blkg;
>>>>       struct hlist_node *n;
>>>>       struct blkio_policy_type *blkiop;
>>>> +     struct blkio_policy_node *pn;
>>>>
>>>>       if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>>>               return -EINVAL;
>>>> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>>>       spin_lock(&blkio_list_lock);
>>>>       spin_lock_irq(&blkcg->lock);
>>>>       blkcg->weight = (unsigned int)val;
>>>> +
>>>>       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>>>> +             pn = blkio_policy_search_node(blkcg, blkg->dev);
>>>> +
>>>> +             if (pn)
>>>> +                     continue;
>>>> +
>>>>               list_for_each_entry(blkiop, &blkio_list, list)
>>>>                       blkiop->ops.blkio_update_group_weight_fn(blkg,
>>>>                                       blkcg->weight);
>>>> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>>>>
>>>>  #ifdef CONFIG_DEBUG_BLK_CGROUP
>>>>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>>>> -                     unsigned long dequeue)
>>>> +                                           unsigned long dequeue)
>>>>  {
>>>>       blkg->dequeue += dequeue;
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>>>  #endif
>>>>
>>>> +static int blkio_check_dev_num(dev_t dev)
>>>> +{
>>>> +     int part = 0;
>>>> +     struct gendisk *disk;
>>>> +
>>>> +     disk = get_gendisk(dev, &part);
>>>> +     if (!disk || part)
>>>> +             return -ENODEV;
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static int blkio_policy_parse_and_set(char *buf,
>>>> +                                   struct blkio_policy_node *newpn)
>>>> +{
>>>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>>>> +     int ret;
>>>> +     unsigned long major, minor, temp;
>>>> +     int i = 0;
>>>> +     dev_t dev;
>>>> +
>>>> +     memset(s, 0, sizeof(s));
>>>> +
>>>> +     while ((p = strsep(&buf, " ")) != NULL) {
>>>> +             if (!*p)
>>>> +                     continue;
>>>> +
>>>> +             s[i++] = p;
>>>> +
>>>> +             /* Prevent from inputing too many things */
>>>> +             if (i == 3)
>>>> +                     break;
>>>> +     }
>>>> +
>>>> +     if (i != 2)
>>>> +             return -EINVAL;
>>>> +
>>>> +     p = strsep(&s[0], ":");
>>>> +     if (p != NULL)
>>>> +             major_s = p;
>>>> +     else
>>>> +             return -EINVAL;
>>>> +
>>>> +     minor_s = s[0];
>>>> +     if (!minor_s)
>>>> +             return -EINVAL;
>>>> +
>>>> +     ret = strict_strtoul(major_s, 10, &major);
>>>> +     if (ret)
>>>> +             return -EINVAL;
>>>> +
>>>> +     ret = strict_strtoul(minor_s, 10, &minor);
>>>> +     if (ret)
>>>> +             return -EINVAL;
>>>> +
>>>> +     dev = MKDEV(major, minor);
>> I am not quite sure if exposing a mojor,minor number is the best
>> interface that can be exposed to user space. How about actual disk
>> names like sda, sdb, .. etc? The only problem I see there is that it
>> seems tricky to get to these disk names from within the block layer.
>> "struct request_queue" has a pointer to backing_dev which has a device
>> from which we can get major,minor. But in order to get to disk name,
>> we would have to call get_gendisk which can hold a semaphore. Is this
>> the reason for us going with major,minor as a user interface to
>> specify a disk? I bet there are good reasons for us not keeping a
>> pointer to "struct gendisk" from "struct request_queue". If we could
>> keep that pointer, our user interface could be very easily modified to
>> be the disk name like sda, sdb, etc.
> 
> That's a good question. Why not use device names instead of device
> numbers? From user's perspective, device names will be more intutive
> to use.
> 
> At the same time, will it look odd to handle devices with their names as WWID.
> 
> /dev/mapper/3600508b400105df70000e000026f0000
> 
> Though I see that there is an alternate way to address the same device
> like /dev/dm-2 etc.
> 
> So from user's perspective I think it will be more intutive to handle
> disk names instead of numbers.
> 
> Gui, did you forsee issues in implementing disk names?

Hi Vivek,

>From the implementation of view, we need a device number as a key in blkio_policy_node, 
if using device name as user interface, i can't figure out a way to retirve the 
corresponding device number by means of device name (like sda, not "/dev/sda"). 
Jens, is there any method to handle this?
Another cgroup subsystem "device" cgroup also make use of device number as user interface,
I guess the reason is the same.

Thanks, 
Gui

> 
> Thanks
> Vivek
> 
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-09  1:52                       ` Gui Jianfeng
@ 2010-03-09 19:03                         ` Vivek Goyal
  2010-03-10  0:33                           ` Gui Jianfeng
  0 siblings, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-09 19:03 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: Nauman Rafique, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Tue, Mar 09, 2010 at 09:52:06AM +0800, Gui Jianfeng wrote:

[..]
> >>>> +static int blkio_policy_parse_and_set(char *buf,
> >>>> +                                   struct blkio_policy_node *newpn)
> >>>> +{
> >>>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> >>>> +     int ret;
> >>>> +     unsigned long major, minor, temp;
> >>>> +     int i = 0;
> >>>> +     dev_t dev;
> >>>> +
> >>>> +     memset(s, 0, sizeof(s));
> >>>> +
> >>>> +     while ((p = strsep(&buf, " ")) != NULL) {
> >>>> +             if (!*p)
> >>>> +                     continue;
> >>>> +
> >>>> +             s[i++] = p;
> >>>> +
> >>>> +             /* Prevent from inputing too many things */
> >>>> +             if (i == 3)
> >>>> +                     break;
> >>>> +     }
> >>>> +
> >>>> +     if (i != 2)
> >>>> +             return -EINVAL;
> >>>> +
> >>>> +     p = strsep(&s[0], ":");
> >>>> +     if (p != NULL)
> >>>> +             major_s = p;
> >>>> +     else
> >>>> +             return -EINVAL;
> >>>> +
> >>>> +     minor_s = s[0];
> >>>> +     if (!minor_s)
> >>>> +             return -EINVAL;
> >>>> +
> >>>> +     ret = strict_strtoul(major_s, 10, &major);
> >>>> +     if (ret)
> >>>> +             return -EINVAL;
> >>>> +
> >>>> +     ret = strict_strtoul(minor_s, 10, &minor);
> >>>> +     if (ret)
> >>>> +             return -EINVAL;
> >>>> +
> >>>> +     dev = MKDEV(major, minor);
> >> I am not quite sure if exposing a mojor,minor number is the best
> >> interface that can be exposed to user space. How about actual disk
> >> names like sda, sdb, .. etc? The only problem I see there is that it
> >> seems tricky to get to these disk names from within the block layer.
> >> "struct request_queue" has a pointer to backing_dev which has a device
> >> from which we can get major,minor. But in order to get to disk name,
> >> we would have to call get_gendisk which can hold a semaphore. Is this
> >> the reason for us going with major,minor as a user interface to
> >> specify a disk? I bet there are good reasons for us not keeping a
> >> pointer to "struct gendisk" from "struct request_queue". If we could
> >> keep that pointer, our user interface could be very easily modified to
> >> be the disk name like sda, sdb, etc.
> > 
> > That's a good question. Why not use device names instead of device
> > numbers? From user's perspective, device names will be more intutive
> > to use.
> > 
> > At the same time, will it look odd to handle devices with their names as WWID.
> > 
> > /dev/mapper/3600508b400105df70000e000026f0000
> > 
> > Though I see that there is an alternate way to address the same device
> > like /dev/dm-2 etc.
> > 
> > So from user's perspective I think it will be more intutive to handle
> > disk names instead of numbers.
> > 
> > Gui, did you forsee issues in implementing disk names?
> 
> Hi Vivek,
> 
> >From the implementation of view, we need a device number as a key in blkio_policy_node, 
> if using device name as user interface, i can't figure out a way to retirve the 
> corresponding device number by means of device name (like sda, not "/dev/sda"). 

Hi Gui,

How about using full device path names (/dev/sda)? "blockdev" utility also
expects full device pathnames. Same seems to be the case with device mapper
targets.

"device" cgroup controller probably is using major and minor numbers because
it needs to control creation of device file (mknod).

May be we can use lookup_bdev() to get block_device pointer and then
get_gendisk() to check if it is a partition.

I am not very sure but device name/path interface might turn out to be
more intutive.

Jens, do you have any thoughts on this?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-08 19:39                   ` Nauman Rafique
  2010-03-08 23:09                     ` Vivek Goyal
@ 2010-03-09 20:16                     ` Vivek Goyal
  2010-03-09 20:39                       ` Nauman Rafique
  1 sibling, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-09 20:16 UTC (permalink / raw)
  To: Nauman Rafique
  Cc: Gui Jianfeng, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Mon, Mar 08, 2010 at 11:39:54AM -0800, Nauman Rafique wrote:
[..]
> >> +static int blkio_policy_parse_and_set(char *buf,
> >> +                                   struct blkio_policy_node *newpn)
> >> +{
> >> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> >> +     int ret;
> >> +     unsigned long major, minor, temp;
> >> +     int i = 0;
> >> +     dev_t dev;
> >> +
> >> +     memset(s, 0, sizeof(s));
> >> +
> >> +     while ((p = strsep(&buf, " ")) != NULL) {
> >> +             if (!*p)
> >> +                     continue;
> >> +
> >> +             s[i++] = p;
> >> +
> >> +             /* Prevent from inputing too many things */
> >> +             if (i == 3)
> >> +                     break;
> >> +     }
> >> +
> >> +     if (i != 2)
> >> +             return -EINVAL;
> >> +
> >> +     p = strsep(&s[0], ":");
> >> +     if (p != NULL)
> >> +             major_s = p;
> >> +     else
> >> +             return -EINVAL;
> >> +
> >> +     minor_s = s[0];
> >> +     if (!minor_s)
> >> +             return -EINVAL;
> >> +
> >> +     ret = strict_strtoul(major_s, 10, &major);
> >> +     if (ret)
> >> +             return -EINVAL;
> >> +
> >> +     ret = strict_strtoul(minor_s, 10, &minor);
> >> +     if (ret)
> >> +             return -EINVAL;
> >> +
> >> +     dev = MKDEV(major, minor);
> 
> I am not quite sure if exposing a mojor,minor number is the best
> interface that can be exposed to user space. How about actual disk
> names like sda, sdb, .. etc? The only problem I see there is that it
> seems tricky to get to these disk names from within the block layer.
> "struct request_queue" has a pointer to backing_dev which has a device
> from which we can get major,minor. But in order to get to disk name,
> we would have to call get_gendisk which can hold a semaphore. Is this
> the reason for us going with major,minor as a user interface to
> specify a disk? I bet there are good reasons for us not keeping a
> pointer to "struct gendisk" from "struct request_queue". If we could
> keep that pointer, our user interface could be very easily modified to
> be the disk name like sda, sdb, etc.

Hi Nauman,

Do we really store a device name in "struct gendisk"? IIUC, a disk is
identified by its major and minor number and then there can be multiple
device files pointing to same disk. 

So I have a disk /dev/sdc in my system and I created another alias to
same disk using mknod and mounted the disk using the alias.

mknod /dev/sdc-alias b 8 32
mount /dev/sdc-alias /mnt

If that's the case, there is no way gendisk can store the pathname.
Instead, device file has inode associated with it, and there we store
major, minor number of disk, and using that we operate on disk/partition.

If that's the case, then major/minor number based interface for blkio
makes sense. Because we also need to export stats regarding the disk
time consumed by cgroup on a particular device, the only unique identifier
of the disk seems to be {major,minor} pair and multiple block device
files can be pointing to same disk. Because it is many to one mapping, it
will not be possible to reverse map it.

So I guess we need to continue to handle rules and stats using major/minor
numbers. One improvement probably we can do and that is allow setting
rules both by major/minor number and device file path. But internally
cgroup code will map device file path to major minor numbers and rules
will be displayed against major/minor number and not original device path.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-09 20:16                     ` Vivek Goyal
@ 2010-03-09 20:39                       ` Nauman Rafique
  0 siblings, 0 replies; 40+ messages in thread
From: Nauman Rafique @ 2010-03-09 20:39 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Gui Jianfeng, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Tue, Mar 9, 2010 at 12:16 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Mon, Mar 08, 2010 at 11:39:54AM -0800, Nauman Rafique wrote:
> [..]
>> >> +static int blkio_policy_parse_and_set(char *buf,
>> >> +                                   struct blkio_policy_node *newpn)
>> >> +{
>> >> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> >> +     int ret;
>> >> +     unsigned long major, minor, temp;
>> >> +     int i = 0;
>> >> +     dev_t dev;
>> >> +
>> >> +     memset(s, 0, sizeof(s));
>> >> +
>> >> +     while ((p = strsep(&buf, " ")) != NULL) {
>> >> +             if (!*p)
>> >> +                     continue;
>> >> +
>> >> +             s[i++] = p;
>> >> +
>> >> +             /* Prevent from inputing too many things */
>> >> +             if (i == 3)
>> >> +                     break;
>> >> +     }
>> >> +
>> >> +     if (i != 2)
>> >> +             return -EINVAL;
>> >> +
>> >> +     p = strsep(&s[0], ":");
>> >> +     if (p != NULL)
>> >> +             major_s = p;
>> >> +     else
>> >> +             return -EINVAL;
>> >> +
>> >> +     minor_s = s[0];
>> >> +     if (!minor_s)
>> >> +             return -EINVAL;
>> >> +
>> >> +     ret = strict_strtoul(major_s, 10, &major);
>> >> +     if (ret)
>> >> +             return -EINVAL;
>> >> +
>> >> +     ret = strict_strtoul(minor_s, 10, &minor);
>> >> +     if (ret)
>> >> +             return -EINVAL;
>> >> +
>> >> +     dev = MKDEV(major, minor);
>>
>> I am not quite sure if exposing a mojor,minor number is the best
>> interface that can be exposed to user space. How about actual disk
>> names like sda, sdb, .. etc? The only problem I see there is that it
>> seems tricky to get to these disk names from within the block layer.
>> "struct request_queue" has a pointer to backing_dev which has a device
>> from which we can get major,minor. But in order to get to disk name,
>> we would have to call get_gendisk which can hold a semaphore. Is this
>> the reason for us going with major,minor as a user interface to
>> specify a disk? I bet there are good reasons for us not keeping a
>> pointer to "struct gendisk" from "struct request_queue". If we could
>> keep that pointer, our user interface could be very easily modified to
>> be the disk name like sda, sdb, etc.
>
> Hi Nauman,
>
> Do we really store a device name in "struct gendisk"? IIUC, a disk is
> identified by its major and minor number and then there can be multiple
> device files pointing to same disk.
>
> So I have a disk /dev/sdc in my system and I created another alias to
> same disk using mknod and mounted the disk using the alias.
>
> mknod /dev/sdc-alias b 8 32
> mount /dev/sdc-alias /mnt
>
> If that's the case, there is no way gendisk can store the pathname.
> Instead, device file has inode associated with it, and there we store
> major, minor number of disk, and using that we operate on disk/partition.

You are right, you can create more aliases to point the same device.
But there is one name stored in disk_name field of struct gendisk. And
I guess this is the same name that you will see if you do "ls
/sys/block/". block layer exposes all its sysfs variables going
through the disk names. For example, if you have to switch a scheduler
on a block device, you would use the name like sdb and do "echo cfq >
/sys/block/sdb/queue/scheduler". The same holds true if you want to
change a io scheduler specific tunable, e.g.
/sys/block/sdb/queue/iosched/back_seek_max.

My point is that all per device interfaces in the io cgroup category
should use the same device names; this includes all the stats
reporting, interfaces to set weights, and so on. And not come up with
a different way of identifying devices.

>
> If that's the case, then major/minor number based interface for blkio
> makes sense. Because we also need to export stats regarding the disk
> time consumed by cgroup on a particular device, the only unique identifier
> of the disk seems to be {major,minor} pair and multiple block device
> files can be pointing to same disk. Because it is many to one mapping, it
> will not be possible to reverse map it.
>
> So I guess we need to continue to handle rules and stats using major/minor
> numbers. One improvement probably we can do and that is allow setting
> rules both by major/minor number and device file path. But internally
> cgroup code will map device file path to major minor numbers and rules
> will be displayed against major/minor number and not original device path.
>
> Thanks
> Vivek
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-09 19:03                         ` Vivek Goyal
@ 2010-03-10  0:33                           ` Gui Jianfeng
  2010-03-10  5:41                             ` Chad Talbott
  2010-03-10 15:30                             ` Vivek Goyal
  0 siblings, 2 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-10  0:33 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Nauman Rafique, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

Vivek Goyal wrote:
> On Tue, Mar 09, 2010 at 09:52:06AM +0800, Gui Jianfeng wrote:
> 
> [..]
>>>>>> +static int blkio_policy_parse_and_set(char *buf,
>>>>>> +                                   struct blkio_policy_node *newpn)
>>>>>> +{
>>>>>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>>>>>> +     int ret;
>>>>>> +     unsigned long major, minor, temp;
>>>>>> +     int i = 0;
>>>>>> +     dev_t dev;
>>>>>> +
>>>>>> +     memset(s, 0, sizeof(s));
>>>>>> +
>>>>>> +     while ((p = strsep(&buf, " ")) != NULL) {
>>>>>> +             if (!*p)
>>>>>> +                     continue;
>>>>>> +
>>>>>> +             s[i++] = p;
>>>>>> +
>>>>>> +             /* Prevent from inputing too many things */
>>>>>> +             if (i == 3)
>>>>>> +                     break;
>>>>>> +     }
>>>>>> +
>>>>>> +     if (i != 2)
>>>>>> +             return -EINVAL;
>>>>>> +
>>>>>> +     p = strsep(&s[0], ":");
>>>>>> +     if (p != NULL)
>>>>>> +             major_s = p;
>>>>>> +     else
>>>>>> +             return -EINVAL;
>>>>>> +
>>>>>> +     minor_s = s[0];
>>>>>> +     if (!minor_s)
>>>>>> +             return -EINVAL;
>>>>>> +
>>>>>> +     ret = strict_strtoul(major_s, 10, &major);
>>>>>> +     if (ret)
>>>>>> +             return -EINVAL;
>>>>>> +
>>>>>> +     ret = strict_strtoul(minor_s, 10, &minor);
>>>>>> +     if (ret)
>>>>>> +             return -EINVAL;
>>>>>> +
>>>>>> +     dev = MKDEV(major, minor);
>>>> I am not quite sure if exposing a mojor,minor number is the best
>>>> interface that can be exposed to user space. How about actual disk
>>>> names like sda, sdb, .. etc? The only problem I see there is that it
>>>> seems tricky to get to these disk names from within the block layer.
>>>> "struct request_queue" has a pointer to backing_dev which has a device
>>>> from which we can get major,minor. But in order to get to disk name,
>>>> we would have to call get_gendisk which can hold a semaphore. Is this
>>>> the reason for us going with major,minor as a user interface to
>>>> specify a disk? I bet there are good reasons for us not keeping a
>>>> pointer to "struct gendisk" from "struct request_queue". If we could
>>>> keep that pointer, our user interface could be very easily modified to
>>>> be the disk name like sda, sdb, etc.
>>> That's a good question. Why not use device names instead of device
>>> numbers? From user's perspective, device names will be more intutive
>>> to use.
>>>
>>> At the same time, will it look odd to handle devices with their names as WWID.
>>>
>>> /dev/mapper/3600508b400105df70000e000026f0000
>>>
>>> Though I see that there is an alternate way to address the same device
>>> like /dev/dm-2 etc.
>>>
>>> So from user's perspective I think it will be more intutive to handle
>>> disk names instead of numbers.
>>>
>>> Gui, did you forsee issues in implementing disk names?
>> Hi Vivek,
>>
>> >From the implementation of view, we need a device number as a key in blkio_policy_node, 
>> if using device name as user interface, i can't figure out a way to retirve the 
>> corresponding device number by means of device name (like sda, not "/dev/sda"). 
> 
> Hi Gui,
> 
> How about using full device path names (/dev/sda)? "blockdev" utility also
> expects full device pathnames. Same seems to be the case with device mapper
> targets.
> 
> "device" cgroup controller probably is using major and minor numbers because
> it needs to control creation of device file (mknod).
> 
> May be we can use lookup_bdev() to get block_device pointer and then
> get_gendisk() to check if it is a partition.
> 
> I am not very sure but device name/path interface might turn out to be
> more intutive.

Hi Vivek,

I don't think using an inode path name as interface is a good idea. Because, one 
can create new file to point to the same device. Also, pathname could be removed
or renamed by user.
So, i think device number is a better choice.

Thanks
Gui

> 
> Jens, do you have any thoughts on this?
> 
> Thanks
> Vivek
> 
> 
> 



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-10  0:33                           ` Gui Jianfeng
@ 2010-03-10  5:41                             ` Chad Talbott
  2010-03-10 15:30                             ` Vivek Goyal
  1 sibling, 0 replies; 40+ messages in thread
From: Chad Talbott @ 2010-03-10  5:41 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: Vivek Goyal, Nauman Rafique, jens.axboe, linux-kernel, Li Zefan

On Tue, Mar 9, 2010 at 4:33 PM, Gui Jianfeng <guijianfeng@cn.fujitsu.com> wrote:
> Vivek Goyal wrote:
>> On Tue, Mar 09, 2010 at 09:52:06AM +0800, Gui Jianfeng wrote:
>> I am not very sure but device name/path interface might turn out to be
>> more intutive.
>
> I don't think using an inode path name as interface is a good idea. Because, one
> can create new file to point to the same device. Also, pathname could be removed
> or renamed by user.
> So, i think device number is a better choice.

Sorry to butt in, but I don't think anyone is suggesting trying to
remember names from device nodes.  I think the idea is to use the
names derived from the subsystem - such as 'sda' which is reflected in
/sys and dmesg.

Chad

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-10  0:33                           ` Gui Jianfeng
  2010-03-10  5:41                             ` Chad Talbott
@ 2010-03-10 15:30                             ` Vivek Goyal
  2010-03-10 17:38                               ` Chad Talbott
  1 sibling, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-10 15:30 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: Nauman Rafique, jens.axboe, Chad Talbott, linux-kernel, Li Zefan

On Wed, Mar 10, 2010 at 08:33:16AM +0800, Gui Jianfeng wrote:
> Vivek Goyal wrote:
> > On Tue, Mar 09, 2010 at 09:52:06AM +0800, Gui Jianfeng wrote:
> > 
> > [..]
> >>>>>> +static int blkio_policy_parse_and_set(char *buf,
> >>>>>> +                                   struct blkio_policy_node *newpn)
> >>>>>> +{
> >>>>>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> >>>>>> +     int ret;
> >>>>>> +     unsigned long major, minor, temp;
> >>>>>> +     int i = 0;
> >>>>>> +     dev_t dev;
> >>>>>> +
> >>>>>> +     memset(s, 0, sizeof(s));
> >>>>>> +
> >>>>>> +     while ((p = strsep(&buf, " ")) != NULL) {
> >>>>>> +             if (!*p)
> >>>>>> +                     continue;
> >>>>>> +
> >>>>>> +             s[i++] = p;
> >>>>>> +
> >>>>>> +             /* Prevent from inputing too many things */
> >>>>>> +             if (i == 3)
> >>>>>> +                     break;
> >>>>>> +     }
> >>>>>> +
> >>>>>> +     if (i != 2)
> >>>>>> +             return -EINVAL;
> >>>>>> +
> >>>>>> +     p = strsep(&s[0], ":");
> >>>>>> +     if (p != NULL)
> >>>>>> +             major_s = p;
> >>>>>> +     else
> >>>>>> +             return -EINVAL;
> >>>>>> +
> >>>>>> +     minor_s = s[0];
> >>>>>> +     if (!minor_s)
> >>>>>> +             return -EINVAL;
> >>>>>> +
> >>>>>> +     ret = strict_strtoul(major_s, 10, &major);
> >>>>>> +     if (ret)
> >>>>>> +             return -EINVAL;
> >>>>>> +
> >>>>>> +     ret = strict_strtoul(minor_s, 10, &minor);
> >>>>>> +     if (ret)
> >>>>>> +             return -EINVAL;
> >>>>>> +
> >>>>>> +     dev = MKDEV(major, minor);
> >>>> I am not quite sure if exposing a mojor,minor number is the best
> >>>> interface that can be exposed to user space. How about actual disk
> >>>> names like sda, sdb, .. etc? The only problem I see there is that it
> >>>> seems tricky to get to these disk names from within the block layer.
> >>>> "struct request_queue" has a pointer to backing_dev which has a device
> >>>> from which we can get major,minor. But in order to get to disk name,
> >>>> we would have to call get_gendisk which can hold a semaphore. Is this
> >>>> the reason for us going with major,minor as a user interface to
> >>>> specify a disk? I bet there are good reasons for us not keeping a
> >>>> pointer to "struct gendisk" from "struct request_queue". If we could
> >>>> keep that pointer, our user interface could be very easily modified to
> >>>> be the disk name like sda, sdb, etc.
> >>> That's a good question. Why not use device names instead of device
> >>> numbers? From user's perspective, device names will be more intutive
> >>> to use.
> >>>
> >>> At the same time, will it look odd to handle devices with their names as WWID.
> >>>
> >>> /dev/mapper/3600508b400105df70000e000026f0000
> >>>
> >>> Though I see that there is an alternate way to address the same device
> >>> like /dev/dm-2 etc.
> >>>
> >>> So from user's perspective I think it will be more intutive to handle
> >>> disk names instead of numbers.
> >>>
> >>> Gui, did you forsee issues in implementing disk names?
> >> Hi Vivek,
> >>
> >> >From the implementation of view, we need a device number as a key in blkio_policy_node, 
> >> if using device name as user interface, i can't figure out a way to retirve the 
> >> corresponding device number by means of device name (like sda, not "/dev/sda"). 
> > 
> > Hi Gui,
> > 
> > How about using full device path names (/dev/sda)? "blockdev" utility also
> > expects full device pathnames. Same seems to be the case with device mapper
> > targets.
> > 
> > "device" cgroup controller probably is using major and minor numbers because
> > it needs to control creation of device file (mknod).
> > 
> > May be we can use lookup_bdev() to get block_device pointer and then
> > get_gendisk() to check if it is a partition.
> > 
> > I am not very sure but device name/path interface might turn out to be
> > more intutive.
> 
> Hi Vivek,
> 
> I don't think using an inode path name as interface is a good idea. Because, one 
> can create new file to point to the same device. Also, pathname could be removed
> or renamed by user.
> So, i think device number is a better choice.
> 

Hi Gui,

Yes, full device file path name might not be the best option. But
disk_name (as seen in sysfs) still remains appealing.

For mapping a device name (sda) to its gendisk object, can't we iterate
through all the devices in block class in sysfs and then verify that device
name user has passed to set the rule is valid or not?

Look at block/genhd.c printk_all_partitions(). This functions iterates
through all gendisk objects.

This still leaves the issue of reaching a gendisk object from request
queue. Looking into it.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-10 15:30                             ` Vivek Goyal
@ 2010-03-10 17:38                               ` Chad Talbott
  2010-03-10 18:03                                 ` Vivek Goyal
  0 siblings, 1 reply; 40+ messages in thread
From: Chad Talbott @ 2010-03-10 17:38 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Gui Jianfeng, Nauman Rafique, jens.axboe, linux-kernel, Li Zefan

On Wed, Mar 10, 2010 at 7:30 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> This still leaves the issue of reaching a gendisk object from request
> queue. Looking into it.

It looks like we have that pairing way back in blk_register_queue()
which takes a gendisk.  Is there any reason we don't hold onto the
gendisk there?  Eyeballing add_disk() and unlink_gendisk() seems to
confirm that gendisk lifetime spans request_queue.

Nauman and I were also wondering why blkio_group and blkio_policy_node
store a dev_t, rather than a direct pointer to gendisk.  dev_t seems
more like a userspace<->kernel interface than an inside-the-kernel
interface.

Chad

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-10 17:38                               ` Chad Talbott
@ 2010-03-10 18:03                                 ` Vivek Goyal
  2010-03-10 20:31                                   ` Vivek Goyal
  0 siblings, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-10 18:03 UTC (permalink / raw)
  To: Chad Talbott
  Cc: Gui Jianfeng, Nauman Rafique, jens.axboe, linux-kernel, Li Zefan

On Wed, Mar 10, 2010 at 09:38:35AM -0800, Chad Talbott wrote:
> On Wed, Mar 10, 2010 at 7:30 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > This still leaves the issue of reaching a gendisk object from request
> > queue. Looking into it.
> 
> It looks like we have that pairing way back in blk_register_queue()
> which takes a gendisk.  Is there any reason we don't hold onto the
> gendisk there?  Eyeballing add_disk() and unlink_gendisk() seems to
> confirm that gendisk lifetime spans request_queue.
> 

Yes, looking at the code, it looks like gendisk and request_queue object's
lifetime is same and probably we can store a pointer to gendisk in
request_queue at blk_register_queue() time. And then use this pointer to
retrieve gendisk->disk_name to report stats.

> Nauman and I were also wondering why blkio_group and blkio_policy_node
> store a dev_t, rather than a direct pointer to gendisk.  dev_t seems
> more like a userspace<->kernel interface than an inside-the-kernel
> interface.
> 

blkio_policy_node currently can't store a pointer to gendisk because there
is no mechanism to call back into blkio if device is removed. So if we
implement something so that once device is removed, blkio layer gets a
callback and we cleanup any state/rules associated with that device, then
I think we should be able to store the pointer to gendisk.

I am still trying to figure out how elevator/ioscheduler state is cleaned
up if a device is removed while some IO is happening to it.

OTOH, Gui, may be one can use blk_lookup_devt() to lookup the dev_t of a
device using the disk name (sda). I just noticed it while reading the
code.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-10 18:03                                 ` Vivek Goyal
@ 2010-03-10 20:31                                   ` Vivek Goyal
  2010-03-11 19:21                                     ` Manuel Benitez
  0 siblings, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-03-10 20:31 UTC (permalink / raw)
  To: Chad Talbott
  Cc: Gui Jianfeng, Nauman Rafique, jens.axboe, linux-kernel, Li Zefan

On Wed, Mar 10, 2010 at 01:03:36PM -0500, Vivek Goyal wrote:
> On Wed, Mar 10, 2010 at 09:38:35AM -0800, Chad Talbott wrote:
> > On Wed, Mar 10, 2010 at 7:30 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > > This still leaves the issue of reaching a gendisk object from request
> > > queue. Looking into it.
> > 
> > It looks like we have that pairing way back in blk_register_queue()
> > which takes a gendisk.  Is there any reason we don't hold onto the
> > gendisk there?  Eyeballing add_disk() and unlink_gendisk() seems to
> > confirm that gendisk lifetime spans request_queue.
> > 
> 
> Yes, looking at the code, it looks like gendisk and request_queue object's
> lifetime is same and probably we can store a pointer to gendisk in
> request_queue at blk_register_queue() time. And then use this pointer to
> retrieve gendisk->disk_name to report stats.
> 

Well, gendisk and request_queue have little different life span. Following
seems to be the sequence a block driver follows.

	blk_init_queue()
	alloc_disk() and add_disk()
	device_removed
	del_gendisk()
	blk_cleanup_queue()

So first we cleaup the gendisk structure and later driver calls to cleanup
the request queue.

> > Nauman and I were also wondering why blkio_group and blkio_policy_node
> > store a dev_t, rather than a direct pointer to gendisk.  dev_t seems
> > more like a userspace<->kernel interface than an inside-the-kernel
> > interface.
> > 
> 
> blkio_policy_node currently can't store a pointer to gendisk because there
> is no mechanism to call back into blkio if device is removed. So if we
> implement something so that once device is removed, blkio layer gets a
> callback and we cleanup any state/rules associated with that device, then
> I think we should be able to store the pointer to gendisk.
> 
> I am still trying to figure out how elevator/ioscheduler state is cleaned
> up if a device is removed while some IO is happening to it.
> 

So blk_cleanup_queue() will do this. That means few things.

- We can't store pointers to gendisk in blkio_policy_node or blkio_group
  because gendisk might have gone away but request queue is still there.
  May be one can try saving a pointer and taking a reference, but I guess
  that becomes littles complicated.

- If we are using disk name for rules and reporting stats, then we also
  need to make sure that these rules are cleared from cgroups once device
  has disappeared. Otherwise, following might happen.

	- Create a rule for sda (x,y) for cgroup test1. x,y are major and
	  minor numbers.
	- sda goes away. Rules still remains in blkio cgroup.
	- Another device gets plugged in and i guess following can happen.
		- device name is different but dev_t is same as sda.
		- device name is same (sda) but device number is
		  different.

		In both the cases a user will be confused with stale rules
		in cgroups.

 Cleaning up cgroups rules can get little complicated. I guess we need to
 create a function in blkio-cgroup.c to traverse through all the cgroups
 and cleanup any blkio_policy_nodes belonging to device going away.
	
In a nutshell, it probably is doable. You are welcome to write a patch. At
the same time I am not against deivce major/minor number based interface,
because it keeps things little simple.

Thanks
Vivek

-   
> OTOH, Gui, may be one can use blk_lookup_devt() to lookup the dev_t of a
> device using the disk name (sda). I just noticed it while reading the
> code.
> 
> Thanks
> Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-10 20:31                                   ` Vivek Goyal
@ 2010-03-11 19:21                                     ` Manuel Benitez
  2010-03-15 13:55                                       ` Vivek Goyal
  0 siblings, 1 reply; 40+ messages in thread
From: Manuel Benitez @ 2010-03-11 19:21 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Chad Talbott, Gui Jianfeng, Nauman Rafique, jens.axboe,
	linux-kernel, Li Zefan

On a closely related topic, I've just recently made a change to one of
my branches that exposes the blkio.time and blkio.sectors information
for the root cgroup. These stats would not show because the major and
minor information for the root blkio_croup structures is zero. This
information is not available at the when the root blkio_cgroup
structures are instantiated, so they are left without major and minor
information.

I have a simple fix that updates the major and minor information for
the root structures at a later time. It looks something like this:

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cd79be0..b34c952 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -956,6 +956,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *c
                return NULL;;

        cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+               cfqg->blkg.dev = MKDEV(major, minor);
+               goto done;
+       }
        if (cfqg || !create)
                goto done;

If folks think that this would be of interest, I can submit a formal
patch. If someone can suggest a better way to do it that doesn't
require extensive changes elsewhere, I'm open to working that up as
well.

-Ricky

On Wed, Mar 10, 2010 at 12:31 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Wed, Mar 10, 2010 at 01:03:36PM -0500, Vivek Goyal wrote:
>> On Wed, Mar 10, 2010 at 09:38:35AM -0800, Chad Talbott wrote:
>> > On Wed, Mar 10, 2010 at 7:30 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
>> > > This still leaves the issue of reaching a gendisk object from request
>> > > queue. Looking into it.
>> >
>> > It looks like we have that pairing way back in blk_register_queue()
>> > which takes a gendisk.  Is there any reason we don't hold onto the
>> > gendisk there?  Eyeballing add_disk() and unlink_gendisk() seems to
>> > confirm that gendisk lifetime spans request_queue.
>> >
>>
>> Yes, looking at the code, it looks like gendisk and request_queue object's
>> lifetime is same and probably we can store a pointer to gendisk in
>> request_queue at blk_register_queue() time. And then use this pointer to
>> retrieve gendisk->disk_name to report stats.
>>
>
> Well, gendisk and request_queue have little different life span. Following
> seems to be the sequence a block driver follows.
>
>        blk_init_queue()
>        alloc_disk() and add_disk()
>        device_removed
>        del_gendisk()
>        blk_cleanup_queue()
>
> So first we cleaup the gendisk structure and later driver calls to cleanup
> the request queue.
>
>> > Nauman and I were also wondering why blkio_group and blkio_policy_node
>> > store a dev_t, rather than a direct pointer to gendisk.  dev_t seems
>> > more like a userspace<->kernel interface than an inside-the-kernel
>> > interface.
>> >
>>
>> blkio_policy_node currently can't store a pointer to gendisk because there
>> is no mechanism to call back into blkio if device is removed. So if we
>> implement something so that once device is removed, blkio layer gets a
>> callback and we cleanup any state/rules associated with that device, then
>> I think we should be able to store the pointer to gendisk.
>>
>> I am still trying to figure out how elevator/ioscheduler state is cleaned
>> up if a device is removed while some IO is happening to it.
>>
>
> So blk_cleanup_queue() will do this. That means few things.
>
> - We can't store pointers to gendisk in blkio_policy_node or blkio_group
>  because gendisk might have gone away but request queue is still there.
>  May be one can try saving a pointer and taking a reference, but I guess
>  that becomes littles complicated.
>
> - If we are using disk name for rules and reporting stats, then we also
>  need to make sure that these rules are cleared from cgroups once device
>  has disappeared. Otherwise, following might happen.
>
>        - Create a rule for sda (x,y) for cgroup test1. x,y are major and
>          minor numbers.
>        - sda goes away. Rules still remains in blkio cgroup.
>        - Another device gets plugged in and i guess following can happen.
>                - device name is different but dev_t is same as sda.
>                - device name is same (sda) but device number is
>                  different.
>
>                In both the cases a user will be confused with stale rules
>                in cgroups.
>
>  Cleaning up cgroups rules can get little complicated. I guess we need to
>  create a function in blkio-cgroup.c to traverse through all the cgroups
>  and cleanup any blkio_policy_nodes belonging to device going away.
>
> In a nutshell, it probably is doable. You are welcome to write a patch. At
> the same time I am not against deivce major/minor number based interface,
> because it keeps things little simple.
>
> Thanks
> Vivek
>
> -
>> OTOH, Gui, may be one can use blk_lookup_devt() to lookup the dev_t of a
>> device using the disk name (sda). I just noticed it while reading the
>> code.
>>
>> Thanks
>> Vivek
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-11 19:21                                     ` Manuel Benitez
@ 2010-03-15 13:55                                       ` Vivek Goyal
  0 siblings, 0 replies; 40+ messages in thread
From: Vivek Goyal @ 2010-03-15 13:55 UTC (permalink / raw)
  To: Manuel Benitez
  Cc: Chad Talbott, Gui Jianfeng, Nauman Rafique, jens.axboe,
	linux-kernel, Li Zefan

On Thu, Mar 11, 2010 at 11:21:50AM -0800, Manuel Benitez wrote:
> On a closely related topic, I've just recently made a change to one of
> my branches that exposes the blkio.time and blkio.sectors information
> for the root cgroup. These stats would not show because the major and
> minor information for the root blkio_croup structures is zero. This
> information is not available at the when the root blkio_cgroup
> structures are instantiated, so they are left without major and minor
> information.
> 
> I have a simple fix that updates the major and minor information for
> the root structures at a later time. It looks something like this:
> 

Hi Ricky,

I think it is a good idea to export stats for root cgroup also. I also had
noticed this problem of major,minor not being available for root group as
at request queue initialization time it is not available. Checking for
blkg.dev being NULL at group creation time is not the cleanest solution
but at the same time can't think of better thing right now.

Can you please write a separate patch to fix it and post to lkml.

Thanks
Vivek

> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index cd79be0..b34c952 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -956,6 +956,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *c
>                 return NULL;;
> 
>         cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
> +       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
> +               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
> +               cfqg->blkg.dev = MKDEV(major, minor);
> +               goto done;
> +       }
>         if (cfqg || !create)
>                 goto done;
> 
> If folks think that this would be of interest, I can submit a formal
> patch. If someone can suggest a better way to do it that doesn't
> require extensive changes elsewhere, I'm open to working that up as
> well.
> 
> -Ricky
> 
> On Wed, Mar 10, 2010 at 12:31 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > On Wed, Mar 10, 2010 at 01:03:36PM -0500, Vivek Goyal wrote:
> >> On Wed, Mar 10, 2010 at 09:38:35AM -0800, Chad Talbott wrote:
> >> > On Wed, Mar 10, 2010 at 7:30 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
> >> > > This still leaves the issue of reaching a gendisk object from request
> >> > > queue. Looking into it.
> >> >
> >> > It looks like we have that pairing way back in blk_register_queue()
> >> > which takes a gendisk.  Is there any reason we don't hold onto the
> >> > gendisk there?  Eyeballing add_disk() and unlink_gendisk() seems to
> >> > confirm that gendisk lifetime spans request_queue.
> >> >
> >>
> >> Yes, looking at the code, it looks like gendisk and request_queue object's
> >> lifetime is same and probably we can store a pointer to gendisk in
> >> request_queue at blk_register_queue() time. And then use this pointer to
> >> retrieve gendisk->disk_name to report stats.
> >>
> >
> > Well, gendisk and request_queue have little different life span. Following
> > seems to be the sequence a block driver follows.
> >
> >        blk_init_queue()
> >        alloc_disk() and add_disk()
> >        device_removed
> >        del_gendisk()
> >        blk_cleanup_queue()
> >
> > So first we cleaup the gendisk structure and later driver calls to cleanup
> > the request queue.
> >
> >> > Nauman and I were also wondering why blkio_group and blkio_policy_node
> >> > store a dev_t, rather than a direct pointer to gendisk.  dev_t seems
> >> > more like a userspace<->kernel interface than an inside-the-kernel
> >> > interface.
> >> >
> >>
> >> blkio_policy_node currently can't store a pointer to gendisk because there
> >> is no mechanism to call back into blkio if device is removed. So if we
> >> implement something so that once device is removed, blkio layer gets a
> >> callback and we cleanup any state/rules associated with that device, then
> >> I think we should be able to store the pointer to gendisk.
> >>
> >> I am still trying to figure out how elevator/ioscheduler state is cleaned
> >> up if a device is removed while some IO is happening to it.
> >>
> >
> > So blk_cleanup_queue() will do this. That means few things.
> >
> > - We can't store pointers to gendisk in blkio_policy_node or blkio_group
> >  because gendisk might have gone away but request queue is still there.
> >  May be one can try saving a pointer and taking a reference, but I guess
> >  that becomes littles complicated.
> >
> > - If we are using disk name for rules and reporting stats, then we also
> >  need to make sure that these rules are cleared from cgroups once device
> >  has disappeared. Otherwise, following might happen.
> >
> >        - Create a rule for sda (x,y) for cgroup test1. x,y are major and
> >          minor numbers.
> >        - sda goes away. Rules still remains in blkio cgroup.
> >        - Another device gets plugged in and i guess following can happen.
> >                - device name is different but dev_t is same as sda.
> >                - device name is same (sda) but device number is
> >                  different.
> >
> >                In both the cases a user will be confused with stale rules
> >                in cgroups.
> >
> >  Cleaning up cgroups rules can get little complicated. I guess we need to
> >  create a function in blkio-cgroup.c to traverse through all the cgroups
> >  and cleanup any blkio_policy_nodes belonging to device going away.
> >
> > In a nutshell, it probably is doable. You are welcome to write a patch. At
> > the same time I am not against deivce major/minor number based interface,
> > because it keeps things little simple.
> >
> > Thanks
> > Vivek
> >
> > -
> >> OTOH, Gui, may be one can use blk_lookup_devt() to lookup the dev_t of a
> >> device using the disk name (sda). I just noticed it while reading the
> >> code.
> >>
> >> Thanks
> >> Vivek
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-03-05 14:13                 ` Vivek Goyal
  2010-03-08 19:39                   ` Nauman Rafique
@ 2010-03-25  6:28                   ` Gui Jianfeng
  2010-04-07 17:12                     ` Chad Talbott
  1 sibling, 1 reply; 40+ messages in thread
From: Gui Jianfeng @ 2010-03-25  6:28 UTC (permalink / raw)
  To: jens.axboe
  Cc: Vivek Goyal, Chad Talbott, linux-kernel, Nauman Rafique, Li Zefan

Vivek Goyal wrote:
> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>> Currently, IO Controller makes use of blkio.weight to assign weight for
>> all devices. Here a new user interface "blkio.weight_device" is introduced to
>> assign different weights for different devices. blkio.weight becomes the
>> default value for devices which are not configured by "blkio.weight_device"
>>
>> You can use the following format to assigned specific weight for a given
>> device:
>>
>> major:minor represents device number.
>>
>> And you can remove a specific weight as following:
>>
>> V1->V2 changes:
>> - use user interface "weight_device" instead of "policy" suggested by Vivek
>> - rename some struct suggested by Vivek
>> - rebase to 2.6-block "for-linus" branch
>> - remove an useless list_empty check pointed out by Li Zefan
>> - some trivial typo fix
>>
>> V2->V3 changes:
>> - Move policy_*_node() functions up to get rid of forward declarations
>> - rename related functions by adding prefix "blkio_"
>>
> 
> Thanks for the changes Gui. Looks good to me.
> 
> Acked-by: Vivek Goyal <vgoyal@redhat.com>

Hi Jens,

This patchset seems to get lost. Just for reminding. Any comments?

Thanks,
Gui

> 
> Vivek
> 
>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>> ---
>>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  block/blk-cgroup.h  |   10 ++
>>  block/cfq-iosched.c |    2 +-
>>  3 files changed, 246 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>> index c85d74c..8825e49 100644
>> --- a/block/blk-cgroup.c
>> +++ b/block/blk-cgroup.c
>> @@ -16,6 +16,7 @@
>>  #include <linux/module.h>
>>  #include <linux/err.h>
>>  #include "blk-cgroup.h"
>> +#include <linux/genhd.h>
>>  
>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>  static LIST_HEAD(blkio_list);
>> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>  
>> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
>> +					    struct blkio_policy_node *pn)
>> +{
>> +	list_add(&pn->node, &blkcg->policy_list);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
>> +{
>> +	list_del(&pn->node);
>> +}
>> +
>> +/* Must be called with blkcg->lock held */
>> +static struct blkio_policy_node *
>> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
>> +{
>> +	struct blkio_policy_node *pn;
>> +
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		if (pn->dev == dev)
>> +			return pn;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>>  {
>>  	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
>> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	struct blkio_group *blkg;
>>  	struct hlist_node *n;
>>  	struct blkio_policy_type *blkiop;
>> +	struct blkio_policy_node *pn;
>>  
>>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>  		return -EINVAL;
>> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>  	spin_lock(&blkio_list_lock);
>>  	spin_lock_irq(&blkcg->lock);
>>  	blkcg->weight = (unsigned int)val;
>> +
>>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		pn = blkio_policy_search_node(blkcg, blkg->dev);
>> +
>> +		if (pn)
>> +			continue;
>> +
>>  		list_for_each_entry(blkiop, &blkio_list, list)
>>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>>  					blkcg->weight);
>> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>>  
>>  #ifdef CONFIG_DEBUG_BLK_CGROUP
>>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>> -			unsigned long dequeue)
>> +					      unsigned long dequeue)
>>  {
>>  	blkg->dequeue += dequeue;
>>  }
>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>  #endif
>>  
>> +static int blkio_check_dev_num(dev_t dev)
>> +{
>> +	int part = 0;
>> +	struct gendisk *disk;
>> +
>> +	disk = get_gendisk(dev, &part);
>> +	if (!disk || part)
>> +		return -ENODEV;
>> +
>> +	return 0;
>> +}
>> +
>> +static int blkio_policy_parse_and_set(char *buf,
>> +				      struct blkio_policy_node *newpn)
>> +{
>> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>> +	int ret;
>> +	unsigned long major, minor, temp;
>> +	int i = 0;
>> +	dev_t dev;
>> +
>> +	memset(s, 0, sizeof(s));
>> +
>> +	while ((p = strsep(&buf, " ")) != NULL) {
>> +		if (!*p)
>> +			continue;
>> +
>> +		s[i++] = p;
>> +
>> +		/* Prevent from inputing too many things */
>> +		if (i == 3)
>> +			break;
>> +	}
>> +
>> +	if (i != 2)
>> +		return -EINVAL;
>> +
>> +	p = strsep(&s[0], ":");
>> +	if (p != NULL)
>> +		major_s = p;
>> +	else
>> +		return -EINVAL;
>> +
>> +	minor_s = s[0];
>> +	if (!minor_s)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(major_s, 10, &major);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(minor_s, 10, &minor);
>> +	if (ret)
>> +		return -EINVAL;
>> +
>> +	dev = MKDEV(major, minor);
>> +
>> +	ret = blkio_check_dev_num(dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	newpn->dev = dev;
>> +
>> +	if (s[1] == NULL)
>> +		return -EINVAL;
>> +
>> +	ret = strict_strtoul(s[1], 10, &temp);
>> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
>> +	    temp > BLKIO_WEIGHT_MAX)
>> +		return -EINVAL;
>> +
>> +	newpn->weight =  temp;
>> +
>> +	return 0;
>> +}
>> +
>> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +			      dev_t dev)
>> +{
>> +	struct blkio_policy_node *pn;
>> +
>> +	pn = blkio_policy_search_node(blkcg, dev);
>> +	if (pn)
>> +		return pn->weight;
>> +	else
>> +		return blkcg->weight;
>> +}
>> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
>> +
>> +static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
>> +				       const char *buffer)
>> +{
>> +	int ret = 0;
>> +	char *buf;
>> +	struct blkio_policy_node *newpn, *pn;
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_group *blkg;
>> +	int keep_newpn = 0;
>> +	struct hlist_node *n;
>> +	struct blkio_policy_type *blkiop;
>> +
>> +	buf = kstrdup(buffer, GFP_KERNEL);
>> +	if (!buf)
>> +		return -ENOMEM;
>> +
>> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
>> +	if (!newpn) {
>> +		ret = -ENOMEM;
>> +		goto free_buf;
>> +	}
>> +
>> +	ret = blkio_policy_parse_and_set(buf, newpn);
>> +	if (ret)
>> +		goto free_newpn;
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	pn = blkio_policy_search_node(blkcg, newpn->dev);
>> +	if (!pn) {
>> +		if (newpn->weight != 0) {
>> +			blkio_policy_insert_node(blkcg, newpn);
>> +			keep_newpn = 1;
>> +		}
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +
>> +	if (newpn->weight == 0) {
>> +		/* weight == 0 means deleteing a specific weight */
>> +		blkio_policy_delete_node(pn);
>> +		spin_unlock_irq(&blkcg->lock);
>> +		goto update_io_group;
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +
>> +	pn->weight = newpn->weight;
>> +
>> +update_io_group:
>> +	/* update weight for each cfqg */
>> +	spin_lock(&blkio_list_lock);
>> +	spin_lock_irq(&blkcg->lock);
>> +
>> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>> +		if (newpn->dev == blkg->dev) {
>> +			list_for_each_entry(blkiop, &blkio_list, list)
>> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
>> +						newpn->weight ?
>> +						newpn->weight :
>> +						blkcg->weight);
>> +		}
>> +	}
>> +
>> +	spin_unlock_irq(&blkcg->lock);
>> +	spin_unlock(&blkio_list_lock);
>> +
>> +free_newpn:
>> +	if (!keep_newpn)
>> +		kfree(newpn);
>> +free_buf:
>> +	kfree(buf);
>> +	return ret;
>> +}
>> +
>> +static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
>> +				      struct seq_file *m)
>> +{
>> +	struct blkio_cgroup *blkcg;
>> +	struct blkio_policy_node *pn;
>> +
>> +	seq_printf(m, "dev\tweight\n");
>> +
>> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
>> +	if (list_empty(&blkcg->policy_list))
>> +		goto out;
>> +
>> +	spin_lock_irq(&blkcg->lock);
>> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
>> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
>> +			   MINOR(pn->dev), pn->weight);
>> +	}
>> +	spin_unlock_irq(&blkcg->lock);
>> +out:
>> +	return 0;
>> +}
>> +
>>  struct cftype blkio_files[] = {
>>  	{
>> +		.name = "weight_device",
>> +		.read_seq_string = blkiocg_weight_device_read,
>> +		.write_string = blkiocg_weight_device_write,
>> +		.max_write_len = 256,
>> +	},
>> +	{
>>  		.name = "weight",
>>  		.read_u64 = blkiocg_weight_read,
>>  		.write_u64 = blkiocg_weight_write,
>> @@ -220,6 +447,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>>  	struct blkio_group *blkg;
>>  	void *key;
>>  	struct blkio_policy_type *blkiop;
>> +	struct blkio_policy_node *pn, *pntmp;
>>  
>>  	rcu_read_lock();
>>  remove_entry:
>> @@ -250,7 +478,12 @@ remove_entry:
>>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>>  	spin_unlock(&blkio_list_lock);
>>  	goto remove_entry;
>> +
>>  done:
>> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
>> +		blkio_policy_delete_node(pn);
>> +		kfree(pn);
>> +	}
>>  	free_css_id(&blkio_subsys, &blkcg->css);
>>  	rcu_read_unlock();
>>  	kfree(blkcg);
>> @@ -280,6 +513,7 @@ done:
>>  	spin_lock_init(&blkcg->lock);
>>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>>  
>> +	INIT_LIST_HEAD(&blkcg->policy_list);
>>  	return &blkcg->css;
>>  }
>>  
>> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
>> index 84bf745..c8144a2 100644
>> --- a/block/blk-cgroup.h
>> +++ b/block/blk-cgroup.h
>> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>>  	unsigned int weight;
>>  	spinlock_t lock;
>>  	struct hlist_head blkg_list;
>> +	struct list_head policy_list; /* list of blkio_policy_node */
>>  };
>>  
>>  struct blkio_group {
>> @@ -43,6 +44,15 @@ struct blkio_group {
>>  	unsigned long sectors;
>>  };
>>  
>> +struct blkio_policy_node {
>> +	struct list_head node;
>> +	dev_t dev;
>> +	unsigned int weight;
>> +};
>> +
>> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>> +				     dev_t dev);
>> +
>>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
>>  						unsigned int weight);
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index dee9d93..6945e9b 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -954,7 +954,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	if (!cfqg)
>>  		goto done;
>>  
>> -	cfqg->weight = blkcg->weight;
>>  	for_each_cfqg_st(cfqg, i, j, st)
>>  		*st = CFQ_RB_ROOT;
>>  	RB_CLEAR_NODE(&cfqg->rb_node);
>> @@ -971,6 +970,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>>  					MKDEV(major, minor));
>> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>>  
>>  	/* Add group on cfqd list */
>>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>> -- 
>> 1.5.4.rc3
>>
>>
>>
>>
> 
> 
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device"  for IO-Controller
  2010-03-25  6:28                   ` Gui Jianfeng
@ 2010-04-07 17:12                     ` Chad Talbott
  2010-04-07 17:28                       ` Vivek Goyal
  0 siblings, 1 reply; 40+ messages in thread
From: Chad Talbott @ 2010-04-07 17:12 UTC (permalink / raw)
  To: Gui Jianfeng
  Cc: jens.axboe, Vivek Goyal, linux-kernel, Nauman Rafique, Li Zefan,
	Divyesh Shah

On Wed, Mar 24, 2010 at 11:28 PM, Gui Jianfeng
<guijianfeng@cn.fujitsu.com> wrote:
> Vivek Goyal wrote:
>> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>>> V2->V3 changes:
>>> - Move policy_*_node() functions up to get rid of forward declarations
>>> - rename related functions by adding prefix "blkio_"
>>>
>>
>> Thanks for the changes Gui. Looks good to me.
>>
>> Acked-by: Vivek Goyal <vgoyal@redhat.com>
>
> Hi Jens,
>
> This patchset seems to get lost. Just for reminding. Any comments?

Hi Jens,

Just want to add a quick note that we're excited about this
functionality and are hoping to see it go in.

Chad

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-04-07 17:12                     ` Chad Talbott
@ 2010-04-07 17:28                       ` Vivek Goyal
  2010-04-08  0:11                         ` Gui Jianfeng
  0 siblings, 1 reply; 40+ messages in thread
From: Vivek Goyal @ 2010-04-07 17:28 UTC (permalink / raw)
  To: Chad Talbott
  Cc: Gui Jianfeng, jens.axboe, linux-kernel, Nauman Rafique, Li Zefan,
	Divyesh Shah

On Wed, Apr 07, 2010 at 10:12:03AM -0700, Chad Talbott wrote:
> On Wed, Mar 24, 2010 at 11:28 PM, Gui Jianfeng
> <guijianfeng@cn.fujitsu.com> wrote:
> > Vivek Goyal wrote:
> >> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
> >>> V2->V3 changes:
> >>> - Move policy_*_node() functions up to get rid of forward declarations
> >>> - rename related functions by adding prefix "blkio_"
> >>>
> >>
> >> Thanks for the changes Gui. Looks good to me.
> >>
> >> Acked-by: Vivek Goyal <vgoyal@redhat.com>
> >
> > Hi Jens,
> >
> > This patchset seems to get lost. Just for reminding. Any comments?
> 
> Hi Jens,
> 
> Just want to add a quick note that we're excited about this
> functionality and are hoping to see it go in.
> 

I guess this patchset is now going to conflict with Divyesh's changes.

Gui, once blkio stat patches (cleaned up ones) are committed may be you
can rebase your changes on top of that. It will become easier for Jens.
(Until and unless he has some objections to this patchset).

Thanks
Vivek

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2 V3] io-controller: Add a new interface "weight_device" for IO-Controller
  2010-04-07 17:28                       ` Vivek Goyal
@ 2010-04-08  0:11                         ` Gui Jianfeng
  0 siblings, 0 replies; 40+ messages in thread
From: Gui Jianfeng @ 2010-04-08  0:11 UTC (permalink / raw)
  To: Vivek Goyal
  Cc: Chad Talbott, jens.axboe, linux-kernel, Nauman Rafique, Li Zefan,
	Divyesh Shah

Vivek Goyal wrote:
> On Wed, Apr 07, 2010 at 10:12:03AM -0700, Chad Talbott wrote:
>> On Wed, Mar 24, 2010 at 11:28 PM, Gui Jianfeng
>> <guijianfeng@cn.fujitsu.com> wrote:
>>> Vivek Goyal wrote:
>>>> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>>>>> V2->V3 changes:
>>>>> - Move policy_*_node() functions up to get rid of forward declarations
>>>>> - rename related functions by adding prefix "blkio_"
>>>>>
>>>> Thanks for the changes Gui. Looks good to me.
>>>>
>>>> Acked-by: Vivek Goyal <vgoyal@redhat.com>
>>> Hi Jens,
>>>
>>> This patchset seems to get lost. Just for reminding. Any comments?
>> Hi Jens,
>>
>> Just want to add a quick note that we're excited about this
>> functionality and are hoping to see it go in.
>>
> 
> I guess this patchset is now going to conflict with Divyesh's changes.
> 
> Gui, once blkio stat patches (cleaned up ones) are committed may be you
> can rebase your changes on top of that. It will become easier for Jens.
> (Until and unless he has some objections to this patchset).

Sure, will do.

Thanks,
Gui

> 
> Thanks
> Vivek
> 
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2010-04-08  0:11 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-03  6:21 [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Gui Jianfeng
2010-03-03  9:01 ` Jens Axboe
2010-03-03 14:33 ` Vivek Goyal
2010-03-03 15:47   ` Vivek Goyal
     [not found]     ` <e98e18941003031519v14d4f79cg33da5c3cf1f5ac03@mail.gmail.com>
2010-03-04  2:25       ` Chad Talbott
2010-03-04  3:23         ` Gui Jianfeng
2010-03-04  7:35           ` [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller Gui Jianfeng
2010-03-04 15:24             ` Vivek Goyal
2010-03-05  0:31               ` Gui Jianfeng
2010-03-05  2:25               ` [PATCH 1/2 V3] " Gui Jianfeng
2010-03-05 14:13                 ` Vivek Goyal
2010-03-08 19:39                   ` Nauman Rafique
2010-03-08 23:09                     ` Vivek Goyal
2010-03-09  1:52                       ` Gui Jianfeng
2010-03-09 19:03                         ` Vivek Goyal
2010-03-10  0:33                           ` Gui Jianfeng
2010-03-10  5:41                             ` Chad Talbott
2010-03-10 15:30                             ` Vivek Goyal
2010-03-10 17:38                               ` Chad Talbott
2010-03-10 18:03                                 ` Vivek Goyal
2010-03-10 20:31                                   ` Vivek Goyal
2010-03-11 19:21                                     ` Manuel Benitez
2010-03-15 13:55                                       ` Vivek Goyal
2010-03-09 20:16                     ` Vivek Goyal
2010-03-09 20:39                       ` Nauman Rafique
2010-03-25  6:28                   ` Gui Jianfeng
2010-04-07 17:12                     ` Chad Talbott
2010-04-07 17:28                       ` Vivek Goyal
2010-04-08  0:11                         ` Gui Jianfeng
2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
2010-03-05  2:44                 ` Takuya Yoshikawa
2010-03-05  2:42                   ` Gui Jianfeng
2010-03-05 14:13                 ` Vivek Goyal
2010-03-04  7:35           ` [PATCH 2/2 V2] " Gui Jianfeng
2010-03-04 11:22             ` Takuya Yoshikawa
2010-03-05  0:38               ` Gui Jianfeng
2010-03-04 15:06         ` [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Vivek Goyal
2010-03-04  0:27   ` Gui Jianfeng
2010-03-03 15:36 ` Vivek Goyal
2010-03-04  0:29   ` Gui Jianfeng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.