* [RFC PATCH] sched: dynamically schedule domain configuration
@ 2012-07-16 9:16 Michael Wang
2012-07-17 2:46 ` Michael Wang
0 siblings, 1 reply; 3+ messages in thread
From: Michael Wang @ 2012-07-16 9:16 UTC (permalink / raw)
To: LKML; +Cc: Peter Zijlstra, Ingo Molnar
From: Michael Wang <wangyun@linux.vnet.ibm.com>
This patch is trying to provide a way for user to dynamically change
the behaviour of load balance by setting flags of schedule domain.
Currently it's rely on cpu cgroup and only SD_LOAD_BALANCE was
implemented, usage:
1. /sys/fs/cgroup/domain/domain.config_level
the default config_level is 0, which means we currenlty configure
the sibling domain for all cpus, we can use:
echo 'number' > /sys/fs/cgroup/domain/domain.config_level
to change the level.
2. /sys/fs/cgroup/domain/domain.topology
this will help to show the SD_LOAD_BALANCE status of all the cpu's
all domain level, we can use:
cat /sys/fs/cgroup/domain/domain.topology
3. /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
this will help us to change the bit SD_LOAD_BALANCE in the flag of
schedule domain on level 'config_level', we can use:
echo 1 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
to enable this bit, and:
echo 0 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
to disable it.
It may not works well now(may be even not work at all as I can't see any
changes on my server even after disabled SD_LOAD_BALANCE on all domains),
but it is interesting and should be liked by some people who desire a
way to 'kill' the load balance by their own hands if we can implement it.
Comments and questions are very welcomed ;-)
Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
---
include/linux/cgroup_subsys.h | 1 +
kernel/sched/core.c | 143 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 144 insertions(+), 0 deletions(-)
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0bd390c..25eb842 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -21,6 +21,7 @@ SUBSYS(debug)
#ifdef CONFIG_CGROUP_SCHED
SUBSYS(cpu_cgroup)
+SUBSYS(domain_cgroup)
#endif
/* */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3987b9d..544bf78 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8423,6 +8423,149 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.early_init = 1,
};
+static struct cgroup_subsys_state domain_cgroup_css;
+static struct cgroup_subsys_state *domain_cgroup_create(struct cgroup *cgrp)
+{
+ if (!cgrp->parent) {
+ /* This is early initialization for the top cgroup */
+ return &domain_cgroup_css;
+ }
+
+ return ERR_PTR(-EPERM);
+}
+
+static void domain_cgroup_destroy(struct cgroup *cgrp)
+{
+ return;
+}
+
+static int domain_cgroup_can_attach(struct cgroup *cgrp,
+ struct cgroup_taskset *tset)
+{
+ return -EINVAL;
+}
+
+static void domain_cgroup_attach(struct cgroup *cgrp,
+ struct cgroup_taskset *tset)
+{
+ return;
+}
+
+static void domain_cgroup_exit(struct cgroup *cgrp,
+ struct cgroup *old_cgrp,
+ struct task_struct *task)
+{
+ return;
+}
+
+static int domain_config_level;
+
+static int domain_cl_write_u64(struct cgroup *cgrp,
+ struct cftype *cftype,
+ u64 shareval)
+{
+ domain_config_level = shareval;
+ return 0;
+}
+
+static u64 domain_cl_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+ return (u64)domain_config_level;
+}
+
+static int domain_slb_write_u64(struct cgroup *cgrp,
+ struct cftype *cftype,
+ u64 shareval)
+{
+ int cpu;
+ struct sched_domain *sd;
+ if (shareval != 0 && shareval != 1)
+ return -EINVAL;
+
+ mutex_lock(&sched_domains_mutex);
+ for_each_cpu(cpu, cpu_active_mask) {
+ for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+ if (sd->level == domain_config_level) {
+ if (shareval)
+ sd->flags |= SD_LOAD_BALANCE;
+ else
+ sd->flags &= ~SD_LOAD_BALANCE;
+ }
+ }
+ }
+ mutex_unlock(&sched_domains_mutex);
+ return 0;
+}
+
+static u64 domain_slb_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+ int cpu, ret = 0;
+ struct sched_domain *sd;
+ mutex_lock(&sched_domains_mutex);
+ for_each_cpu(cpu, cpu_active_mask) {
+ for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+ if (sd->level == domain_config_level) {
+ if (sd->flags & SD_LOAD_BALANCE)
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+out:
+ mutex_unlock(&sched_domains_mutex);
+ return ret;
+}
+
+static int domain_topology_show(struct cgroup *cgrp,
+ struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ int cpu;
+ struct sched_domain *sd;
+ mutex_lock(&sched_domains_mutex);
+ for_each_cpu(cpu, cpu_active_mask) {
+ cb->fill(cb, "cpu", cpu);
+ for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
+ cb->fill(cb, "\tlevel", sd->level);
+ cb->fill(cb, "\t\tSD_LOAD_BALANCE",
+ sd->flags & SD_LOAD_BALANCE);
+ }
+ }
+ mutex_unlock(&sched_domains_mutex);
+
+ return 0;
+}
+
+static struct cftype domain_files[] = {
+ {
+ .name = "config_level",
+ .read_u64 = domain_cl_read_u64,
+ .write_u64 = domain_cl_write_u64,
+ },
+ {
+ .name = "SD_LOAD_BALANCE",
+ .read_u64 = domain_slb_read_u64,
+ .write_u64 = domain_slb_write_u64,
+ },
+ {
+ .name = "topology",
+ .read_map = domain_topology_show,
+ },
+ { } /* terminate */
+};
+
+struct cgroup_subsys domain_cgroup_subsys = {
+ .name = "domain",
+ .create = domain_cgroup_create,
+ .destroy = domain_cgroup_destroy,
+ .can_attach = domain_cgroup_can_attach,
+ .attach = domain_cgroup_attach,
+ .exit = domain_cgroup_exit,
+ .subsys_id = domain_cgroup_subsys_id,
+ .base_cftypes = domain_files,
+ .early_init = 1,
+};
+
#endif /* CONFIG_CGROUP_SCHED */
#ifdef CONFIG_CGROUP_CPUACCT
--
1.7.4.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC PATCH] sched: dynamically schedule domain configuration
@ 2012-07-17 2:46 ` Michael Wang
0 siblings, 0 replies; 3+ messages in thread
From: Michael Wang @ 2012-07-17 2:46 UTC (permalink / raw)
To: LKML; +Cc: Peter Zijlstra, Ingo Molnar, tj, lizefan, cgroups
Add the missing cc list.
On 07/16/2012 05:16 PM, Michael Wang wrote:
> From: Michael Wang <wangyun@linux.vnet.ibm.com>
>
> This patch is trying to provide a way for user to dynamically change
> the behaviour of load balance by setting flags of schedule domain.
>
> Currently it's rely on cpu cgroup and only SD_LOAD_BALANCE was
> implemented, usage:
>
> 1. /sys/fs/cgroup/domain/domain.config_level
> the default config_level is 0, which means we currenlty configure
> the sibling domain for all cpus, we can use:
> echo 'number' > /sys/fs/cgroup/domain/domain.config_level
> to change the level.
>
> 2. /sys/fs/cgroup/domain/domain.topology
> this will help to show the SD_LOAD_BALANCE status of all the cpu's
> all domain level, we can use:
> cat /sys/fs/cgroup/domain/domain.topology
>
> 3. /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> this will help us to change the bit SD_LOAD_BALANCE in the flag of
> schedule domain on level 'config_level', we can use:
> echo 1 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> to enable this bit, and:
> echo 0 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> to disable it.
>
> It may not works well now(may be even not work at all as I can't see any
> changes on my server even after disabled SD_LOAD_BALANCE on all domains),
> but it is interesting and should be liked by some people who desire a
> way to 'kill' the load balance by their own hands if we can implement it.
>
> Comments and questions are very welcomed ;-)
>
> Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
> ---
> include/linux/cgroup_subsys.h | 1 +
> kernel/sched/core.c | 143 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 144 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 0bd390c..25eb842 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -21,6 +21,7 @@ SUBSYS(debug)
>
> #ifdef CONFIG_CGROUP_SCHED
> SUBSYS(cpu_cgroup)
> +SUBSYS(domain_cgroup)
> #endif
>
> /* */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3987b9d..544bf78 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8423,6 +8423,149 @@ struct cgroup_subsys cpu_cgroup_subsys = {
> .early_init = 1,
> };
>
> +static struct cgroup_subsys_state domain_cgroup_css;
> +static struct cgroup_subsys_state *domain_cgroup_create(struct cgroup *cgrp)
> +{
> + if (!cgrp->parent) {
> + /* This is early initialization for the top cgroup */
> + return &domain_cgroup_css;
> + }
> +
> + return ERR_PTR(-EPERM);
> +}
> +
> +static void domain_cgroup_destroy(struct cgroup *cgrp)
> +{
> + return;
> +}
> +
> +static int domain_cgroup_can_attach(struct cgroup *cgrp,
> + struct cgroup_taskset *tset)
> +{
> + return -EINVAL;
> +}
> +
> +static void domain_cgroup_attach(struct cgroup *cgrp,
> + struct cgroup_taskset *tset)
> +{
> + return;
> +}
> +
> +static void domain_cgroup_exit(struct cgroup *cgrp,
> + struct cgroup *old_cgrp,
> + struct task_struct *task)
> +{
> + return;
> +}
> +
> +static int domain_config_level;
> +
> +static int domain_cl_write_u64(struct cgroup *cgrp,
> + struct cftype *cftype,
> + u64 shareval)
> +{
> + domain_config_level = shareval;
> + return 0;
> +}
> +
> +static u64 domain_cl_read_u64(struct cgroup *cgrp, struct cftype *cft)
> +{
> + return (u64)domain_config_level;
> +}
> +
> +static int domain_slb_write_u64(struct cgroup *cgrp,
> + struct cftype *cftype,
> + u64 shareval)
> +{
> + int cpu;
> + struct sched_domain *sd;
> + if (shareval != 0 && shareval != 1)
> + return -EINVAL;
> +
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + if (sd->level == domain_config_level) {
> + if (shareval)
> + sd->flags |= SD_LOAD_BALANCE;
> + else
> + sd->flags &= ~SD_LOAD_BALANCE;
> + }
> + }
> + }
> + mutex_unlock(&sched_domains_mutex);
> + return 0;
> +}
> +
> +static u64 domain_slb_read_u64(struct cgroup *cgrp, struct cftype *cft)
> +{
> + int cpu, ret = 0;
> + struct sched_domain *sd;
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + if (sd->level == domain_config_level) {
> + if (sd->flags & SD_LOAD_BALANCE)
> + ret = 1;
> + goto out;
> + }
> + }
> + }
> +out:
> + mutex_unlock(&sched_domains_mutex);
> + return ret;
> +}
> +
> +static int domain_topology_show(struct cgroup *cgrp,
> + struct cftype *cft,
> + struct cgroup_map_cb *cb)
> +{
> + int cpu;
> + struct sched_domain *sd;
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + cb->fill(cb, "cpu", cpu);
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + cb->fill(cb, "\tlevel", sd->level);
> + cb->fill(cb, "\t\tSD_LOAD_BALANCE",
> + sd->flags & SD_LOAD_BALANCE);
> + }
> + }
> + mutex_unlock(&sched_domains_mutex);
> +
> + return 0;
> +}
> +
> +static struct cftype domain_files[] = {
> + {
> + .name = "config_level",
> + .read_u64 = domain_cl_read_u64,
> + .write_u64 = domain_cl_write_u64,
> + },
> + {
> + .name = "SD_LOAD_BALANCE",
> + .read_u64 = domain_slb_read_u64,
> + .write_u64 = domain_slb_write_u64,
> + },
> + {
> + .name = "topology",
> + .read_map = domain_topology_show,
> + },
> + { } /* terminate */
> +};
> +
> +struct cgroup_subsys domain_cgroup_subsys = {
> + .name = "domain",
> + .create = domain_cgroup_create,
> + .destroy = domain_cgroup_destroy,
> + .can_attach = domain_cgroup_can_attach,
> + .attach = domain_cgroup_attach,
> + .exit = domain_cgroup_exit,
> + .subsys_id = domain_cgroup_subsys_id,
> + .base_cftypes = domain_files,
> + .early_init = 1,
> +};
> +
> #endif /* CONFIG_CGROUP_SCHED */
>
> #ifdef CONFIG_CGROUP_CPUACCT
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC PATCH] sched: dynamically schedule domain configuration
@ 2012-07-17 2:46 ` Michael Wang
0 siblings, 0 replies; 3+ messages in thread
From: Michael Wang @ 2012-07-17 2:46 UTC (permalink / raw)
To: LKML
Cc: Peter Zijlstra, Ingo Molnar, tj-DgEjT+Ai2ygdnm+yROfE0A,
lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA
Add the missing cc list.
On 07/16/2012 05:16 PM, Michael Wang wrote:
> From: Michael Wang <wangyun-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
>
> This patch is trying to provide a way for user to dynamically change
> the behaviour of load balance by setting flags of schedule domain.
>
> Currently it's rely on cpu cgroup and only SD_LOAD_BALANCE was
> implemented, usage:
>
> 1. /sys/fs/cgroup/domain/domain.config_level
> the default config_level is 0, which means we currenlty configure
> the sibling domain for all cpus, we can use:
> echo 'number' > /sys/fs/cgroup/domain/domain.config_level
> to change the level.
>
> 2. /sys/fs/cgroup/domain/domain.topology
> this will help to show the SD_LOAD_BALANCE status of all the cpu's
> all domain level, we can use:
> cat /sys/fs/cgroup/domain/domain.topology
>
> 3. /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> this will help us to change the bit SD_LOAD_BALANCE in the flag of
> schedule domain on level 'config_level', we can use:
> echo 1 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> to enable this bit, and:
> echo 0 > /sys/fs/cgroup/domain/domain.SD_LOAD_BALANCE
> to disable it.
>
> It may not works well now(may be even not work at all as I can't see any
> changes on my server even after disabled SD_LOAD_BALANCE on all domains),
> but it is interesting and should be liked by some people who desire a
> way to 'kill' the load balance by their own hands if we can implement it.
>
> Comments and questions are very welcomed ;-)
>
> Signed-off-by: Michael Wang <wangyun-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
> ---
> include/linux/cgroup_subsys.h | 1 +
> kernel/sched/core.c | 143 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 144 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 0bd390c..25eb842 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -21,6 +21,7 @@ SUBSYS(debug)
>
> #ifdef CONFIG_CGROUP_SCHED
> SUBSYS(cpu_cgroup)
> +SUBSYS(domain_cgroup)
> #endif
>
> /* */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3987b9d..544bf78 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8423,6 +8423,149 @@ struct cgroup_subsys cpu_cgroup_subsys = {
> .early_init = 1,
> };
>
> +static struct cgroup_subsys_state domain_cgroup_css;
> +static struct cgroup_subsys_state *domain_cgroup_create(struct cgroup *cgrp)
> +{
> + if (!cgrp->parent) {
> + /* This is early initialization for the top cgroup */
> + return &domain_cgroup_css;
> + }
> +
> + return ERR_PTR(-EPERM);
> +}
> +
> +static void domain_cgroup_destroy(struct cgroup *cgrp)
> +{
> + return;
> +}
> +
> +static int domain_cgroup_can_attach(struct cgroup *cgrp,
> + struct cgroup_taskset *tset)
> +{
> + return -EINVAL;
> +}
> +
> +static void domain_cgroup_attach(struct cgroup *cgrp,
> + struct cgroup_taskset *tset)
> +{
> + return;
> +}
> +
> +static void domain_cgroup_exit(struct cgroup *cgrp,
> + struct cgroup *old_cgrp,
> + struct task_struct *task)
> +{
> + return;
> +}
> +
> +static int domain_config_level;
> +
> +static int domain_cl_write_u64(struct cgroup *cgrp,
> + struct cftype *cftype,
> + u64 shareval)
> +{
> + domain_config_level = shareval;
> + return 0;
> +}
> +
> +static u64 domain_cl_read_u64(struct cgroup *cgrp, struct cftype *cft)
> +{
> + return (u64)domain_config_level;
> +}
> +
> +static int domain_slb_write_u64(struct cgroup *cgrp,
> + struct cftype *cftype,
> + u64 shareval)
> +{
> + int cpu;
> + struct sched_domain *sd;
> + if (shareval != 0 && shareval != 1)
> + return -EINVAL;
> +
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + if (sd->level == domain_config_level) {
> + if (shareval)
> + sd->flags |= SD_LOAD_BALANCE;
> + else
> + sd->flags &= ~SD_LOAD_BALANCE;
> + }
> + }
> + }
> + mutex_unlock(&sched_domains_mutex);
> + return 0;
> +}
> +
> +static u64 domain_slb_read_u64(struct cgroup *cgrp, struct cftype *cft)
> +{
> + int cpu, ret = 0;
> + struct sched_domain *sd;
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + if (sd->level == domain_config_level) {
> + if (sd->flags & SD_LOAD_BALANCE)
> + ret = 1;
> + goto out;
> + }
> + }
> + }
> +out:
> + mutex_unlock(&sched_domains_mutex);
> + return ret;
> +}
> +
> +static int domain_topology_show(struct cgroup *cgrp,
> + struct cftype *cft,
> + struct cgroup_map_cb *cb)
> +{
> + int cpu;
> + struct sched_domain *sd;
> + mutex_lock(&sched_domains_mutex);
> + for_each_cpu(cpu, cpu_active_mask) {
> + cb->fill(cb, "cpu", cpu);
> + for (sd = cpu_rq(cpu)->sd; sd; sd = sd->parent) {
> + cb->fill(cb, "\tlevel", sd->level);
> + cb->fill(cb, "\t\tSD_LOAD_BALANCE",
> + sd->flags & SD_LOAD_BALANCE);
> + }
> + }
> + mutex_unlock(&sched_domains_mutex);
> +
> + return 0;
> +}
> +
> +static struct cftype domain_files[] = {
> + {
> + .name = "config_level",
> + .read_u64 = domain_cl_read_u64,
> + .write_u64 = domain_cl_write_u64,
> + },
> + {
> + .name = "SD_LOAD_BALANCE",
> + .read_u64 = domain_slb_read_u64,
> + .write_u64 = domain_slb_write_u64,
> + },
> + {
> + .name = "topology",
> + .read_map = domain_topology_show,
> + },
> + { } /* terminate */
> +};
> +
> +struct cgroup_subsys domain_cgroup_subsys = {
> + .name = "domain",
> + .create = domain_cgroup_create,
> + .destroy = domain_cgroup_destroy,
> + .can_attach = domain_cgroup_can_attach,
> + .attach = domain_cgroup_attach,
> + .exit = domain_cgroup_exit,
> + .subsys_id = domain_cgroup_subsys_id,
> + .base_cftypes = domain_files,
> + .early_init = 1,
> +};
> +
> #endif /* CONFIG_CGROUP_SCHED */
>
> #ifdef CONFIG_CGROUP_CPUACCT
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-07-17 2:46 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-16 9:16 [RFC PATCH] sched: dynamically schedule domain configuration Michael Wang
2012-07-17 2:46 ` Michael Wang
2012-07-17 2:46 ` Michael Wang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.