All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
To: Vivek Goyal <vgoyal@redhat.com>,
	Nauman Rafique <nauman@google.com>,
	jens.axboe@oracle.com
Cc: Chad Talbott <ctalbott@google.com>,
	linux-kernel@vger.kernel.org, Li Zefan <lizf@cn.fujitsu.com>
Subject: Re: [PATCH 1/2 V3] io-controller: Add a new interface	"weight_device" for IO-Controller
Date: Tue, 09 Mar 2010 09:52:06 +0800	[thread overview]
Message-ID: <4B95A9C6.9060504@cn.fujitsu.com> (raw)
In-Reply-To: <20100308230905.GB3614@redhat.com>

Vivek Goyal wrote:
> On Mon, Mar 08, 2010 at 11:39:54AM -0800, Nauman Rafique wrote:
>> On Fri, Mar 5, 2010 at 6:13 AM, Vivek Goyal <vgoyal@redhat.com> wrote:
>>> On Fri, Mar 05, 2010 at 10:25:58AM +0800, Gui Jianfeng wrote:
>>>> Currently, IO Controller makes use of blkio.weight to assign weight for
>>>> all devices. Here a new user interface "blkio.weight_device" is introduced to
>>>> assign different weights for different devices. blkio.weight becomes the
>>>> default value for devices which are not configured by "blkio.weight_device"
>>>>
>>>> You can use the following format to assigned specific weight for a given
>>>> device:
>>>>
>>>> major:minor represents device number.
>>>>
>>>> And you can remove a specific weight as following:
>>>>
>>>> V1->V2 changes:
>>>> - use user interface "weight_device" instead of "policy" suggested by Vivek
>>>> - rename some struct suggested by Vivek
>>>> - rebase to 2.6-block "for-linus" branch
>>>> - remove an useless list_empty check pointed out by Li Zefan
>>>> - some trivial typo fix
>>>>
>>>> V2->V3 changes:
>>>> - Move policy_*_node() functions up to get rid of forward declarations
>>>> - rename related functions by adding prefix "blkio_"
>>>>
>>> Thanks for the changes Gui. Looks good to me.
>>>
>>> Acked-by: Vivek Goyal <vgoyal@redhat.com>
>>>
>>> Vivek
>>>
>>>> Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
>>>> ---
>>>>  block/blk-cgroup.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>  block/blk-cgroup.h  |   10 ++
>>>>  block/cfq-iosched.c |    2 +-
>>>>  3 files changed, 246 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
>>>> index c85d74c..8825e49 100644
>>>> --- a/block/blk-cgroup.c
>>>> +++ b/block/blk-cgroup.c
>>>> @@ -16,6 +16,7 @@
>>>>  #include <linux/module.h>
>>>>  #include <linux/err.h>
>>>>  #include "blk-cgroup.h"
>>>> +#include <linux/genhd.h>
>>>>
>>>>  static DEFINE_SPINLOCK(blkio_list_lock);
>>>>  static LIST_HEAD(blkio_list);
>>>> @@ -23,6 +24,32 @@ static LIST_HEAD(blkio_list);
>>>>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>>>>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>>>>
>>>> +static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
>>>> +                                         struct blkio_policy_node *pn)
>>>> +{
>>>> +     list_add(&pn->node, &blkcg->policy_list);
>>>> +}
>>>> +
>>>> +/* Must be called with blkcg->lock held */
>>>> +static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
>>>> +{
>>>> +     list_del(&pn->node);
>>>> +}
>>>> +
>>>> +/* Must be called with blkcg->lock held */
>>>> +static struct blkio_policy_node *
>>>> +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
>>>> +{
>>>> +     struct blkio_policy_node *pn;
>>>> +
>>>> +     list_for_each_entry(pn, &blkcg->policy_list, node) {
>>>> +             if (pn->dev == dev)
>>>> +                     return pn;
>>>> +     }
>>>> +
>>>> +     return NULL;
>>>> +}
>>>> +
>>>>  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
>>>>  {
>>>>       return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
>>>> @@ -128,6 +155,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>>>       struct blkio_group *blkg;
>>>>       struct hlist_node *n;
>>>>       struct blkio_policy_type *blkiop;
>>>> +     struct blkio_policy_node *pn;
>>>>
>>>>       if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>>>>               return -EINVAL;
>>>> @@ -136,7 +164,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>>>>       spin_lock(&blkio_list_lock);
>>>>       spin_lock_irq(&blkcg->lock);
>>>>       blkcg->weight = (unsigned int)val;
>>>> +
>>>>       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
>>>> +             pn = blkio_policy_search_node(blkcg, blkg->dev);
>>>> +
>>>> +             if (pn)
>>>> +                     continue;
>>>> +
>>>>               list_for_each_entry(blkiop, &blkio_list, list)
>>>>                       blkiop->ops.blkio_update_group_weight_fn(blkg,
>>>>                                       blkcg->weight);
>>>> @@ -178,15 +212,208 @@ SHOW_FUNCTION_PER_GROUP(dequeue);
>>>>
>>>>  #ifdef CONFIG_DEBUG_BLK_CGROUP
>>>>  void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>>>> -                     unsigned long dequeue)
>>>> +                                           unsigned long dequeue)
>>>>  {
>>>>       blkg->dequeue += dequeue;
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>>>>  #endif
>>>>
>>>> +static int blkio_check_dev_num(dev_t dev)
>>>> +{
>>>> +     int part = 0;
>>>> +     struct gendisk *disk;
>>>> +
>>>> +     disk = get_gendisk(dev, &part);
>>>> +     if (!disk || part)
>>>> +             return -ENODEV;
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static int blkio_policy_parse_and_set(char *buf,
>>>> +                                   struct blkio_policy_node *newpn)
>>>> +{
>>>> +     char *s[4], *p, *major_s = NULL, *minor_s = NULL;
>>>> +     int ret;
>>>> +     unsigned long major, minor, temp;
>>>> +     int i = 0;
>>>> +     dev_t dev;
>>>> +
>>>> +     memset(s, 0, sizeof(s));
>>>> +
>>>> +     while ((p = strsep(&buf, " ")) != NULL) {
>>>> +             if (!*p)
>>>> +                     continue;
>>>> +
>>>> +             s[i++] = p;
>>>> +
>>>> +             /* Prevent from inputing too many things */
>>>> +             if (i == 3)
>>>> +                     break;
>>>> +     }
>>>> +
>>>> +     if (i != 2)
>>>> +             return -EINVAL;
>>>> +
>>>> +     p = strsep(&s[0], ":");
>>>> +     if (p != NULL)
>>>> +             major_s = p;
>>>> +     else
>>>> +             return -EINVAL;
>>>> +
>>>> +     minor_s = s[0];
>>>> +     if (!minor_s)
>>>> +             return -EINVAL;
>>>> +
>>>> +     ret = strict_strtoul(major_s, 10, &major);
>>>> +     if (ret)
>>>> +             return -EINVAL;
>>>> +
>>>> +     ret = strict_strtoul(minor_s, 10, &minor);
>>>> +     if (ret)
>>>> +             return -EINVAL;
>>>> +
>>>> +     dev = MKDEV(major, minor);
>> I am not quite sure if exposing a mojor,minor number is the best
>> interface that can be exposed to user space. How about actual disk
>> names like sda, sdb, .. etc? The only problem I see there is that it
>> seems tricky to get to these disk names from within the block layer.
>> "struct request_queue" has a pointer to backing_dev which has a device
>> from which we can get major,minor. But in order to get to disk name,
>> we would have to call get_gendisk which can hold a semaphore. Is this
>> the reason for us going with major,minor as a user interface to
>> specify a disk? I bet there are good reasons for us not keeping a
>> pointer to "struct gendisk" from "struct request_queue". If we could
>> keep that pointer, our user interface could be very easily modified to
>> be the disk name like sda, sdb, etc.
> 
> That's a good question. Why not use device names instead of device
> numbers? From user's perspective, device names will be more intutive
> to use.
> 
> At the same time, will it look odd to handle devices with their names as WWID.
> 
> /dev/mapper/3600508b400105df70000e000026f0000
> 
> Though I see that there is an alternate way to address the same device
> like /dev/dm-2 etc.
> 
> So from user's perspective I think it will be more intutive to handle
> disk names instead of numbers.
> 
> Gui, did you forsee issues in implementing disk names?

Hi Vivek,

>From the implementation of view, we need a device number as a key in blkio_policy_node, 
if using device name as user interface, i can't figure out a way to retirve the 
corresponding device number by means of device name (like sda, not "/dev/sda"). 
Jens, is there any method to handle this?
Another cgroup subsystem "device" cgroup also make use of device number as user interface,
I guess the reason is the same.

Thanks, 
Gui

> 
> Thanks
> Vivek
> 
> 


  reply	other threads:[~2010-03-09  1:52 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-03  6:21 [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Gui Jianfeng
2010-03-03  9:01 ` Jens Axboe
2010-03-03 14:33 ` Vivek Goyal
2010-03-03 15:47   ` Vivek Goyal
     [not found]     ` <e98e18941003031519v14d4f79cg33da5c3cf1f5ac03@mail.gmail.com>
2010-03-04  2:25       ` Chad Talbott
2010-03-04  3:23         ` Gui Jianfeng
2010-03-04  7:35           ` [PATCH 1/2 V2] io-controller: Add a new interface "weight_device" for IO-Controller Gui Jianfeng
2010-03-04 15:24             ` Vivek Goyal
2010-03-05  0:31               ` Gui Jianfeng
2010-03-05  2:25               ` [PATCH 1/2 V3] " Gui Jianfeng
2010-03-05 14:13                 ` Vivek Goyal
2010-03-08 19:39                   ` Nauman Rafique
2010-03-08 23:09                     ` Vivek Goyal
2010-03-09  1:52                       ` Gui Jianfeng [this message]
2010-03-09 19:03                         ` Vivek Goyal
2010-03-10  0:33                           ` Gui Jianfeng
2010-03-10  5:41                             ` Chad Talbott
2010-03-10 15:30                             ` Vivek Goyal
2010-03-10 17:38                               ` Chad Talbott
2010-03-10 18:03                                 ` Vivek Goyal
2010-03-10 20:31                                   ` Vivek Goyal
2010-03-11 19:21                                     ` Manuel Benitez
2010-03-15 13:55                                       ` Vivek Goyal
2010-03-09 20:16                     ` Vivek Goyal
2010-03-09 20:39                       ` Nauman Rafique
2010-03-25  6:28                   ` Gui Jianfeng
2010-04-07 17:12                     ` Chad Talbott
2010-04-07 17:28                       ` Vivek Goyal
2010-04-08  0:11                         ` Gui Jianfeng
2010-03-05  2:26               ` [PATCH 2/2 V3] io-controller: Document for blkio.weight_device Gui Jianfeng
2010-03-05  2:44                 ` Takuya Yoshikawa
2010-03-05  2:42                   ` Gui Jianfeng
2010-03-05 14:13                 ` Vivek Goyal
2010-03-04  7:35           ` [PATCH 2/2 V2] " Gui Jianfeng
2010-03-04 11:22             ` Takuya Yoshikawa
2010-03-05  0:38               ` Gui Jianfeng
2010-03-04 15:06         ` [PATCH 2/1] io-controller: Add a new interface "policy" for IO Controller Vivek Goyal
2010-03-04  0:27   ` Gui Jianfeng
2010-03-03 15:36 ` Vivek Goyal
2010-03-04  0:29   ` Gui Jianfeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B95A9C6.9060504@cn.fujitsu.com \
    --to=guijianfeng@cn.fujitsu.com \
    --cc=ctalbott@google.com \
    --cc=jens.axboe@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=nauman@google.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.