linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov@parallels.com>
To: <linux-kernel@vger.kernel.org>
Cc: <linux-mm@kvack.org>, <cgroups@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Tejun Heo <tj@kernel.org>, Li Zefan <lizefan@huawei.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>, Mel Gorman <mgorman@suse.de>,
	Rik van Riel <riel@redhat.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Hugh Dickins <hughd@google.com>,
	David Rientjes <rientjes@google.com>,
	Pavel Emelyanov <xemul@parallels.com>,
	Balbir Singh <bsingharora@gmail.com>
Subject: [PATCH RFC 1/5] vm_cgroup: basic infrastructure
Date: Thu, 3 Jul 2014 16:48:17 +0400	[thread overview]
Message-ID: <5169989c3d82823f9675f00152c8bf28f91ab890.1404383187.git.vdavydov@parallels.com> (raw)
In-Reply-To: <cover.1404383187.git.vdavydov@parallels.com>

This patch introduces the vm cgroup to control address space expansion
of tasks that belong to a cgroup. The idea is to provide a mechanism to
limit memory overcommit not only for the whole system, but also on per
cgroup basis.

This patch only adds some basic cgroup methods, like alloc/free and
write/read, while the real accounting/limiting is done in the following
patches.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
---
 include/linux/cgroup_subsys.h |    4 ++
 include/linux/vm_cgroup.h     |   18 ++++++
 init/Kconfig                  |    4 ++
 mm/Makefile                   |    1 +
 mm/vm_cgroup.c                |  131 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 158 insertions(+)
 create mode 100644 include/linux/vm_cgroup.h
 create mode 100644 mm/vm_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 98c4f9b12b03..8eb7db12f6ea 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -47,6 +47,10 @@ SUBSYS(net_prio)
 SUBSYS(hugetlb)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_VM)
+SUBSYS(vm)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/include/linux/vm_cgroup.h b/include/linux/vm_cgroup.h
new file mode 100644
index 000000000000..b629c9affa4b
--- /dev/null
+++ b/include/linux/vm_cgroup.h
@@ -0,0 +1,18 @@
+#ifndef _LINUX_VM_CGROUP_H
+#define _LINUX_VM_CGROUP_H
+
+#ifdef CONFIG_CGROUP_VM
+static inline bool vm_cgroup_disabled(void)
+{
+	if (vm_cgrp_subsys.disabled)
+		return true;
+	return false;
+}
+#else /* !CONFIG_CGROUP_VM */
+static inline bool vm_cgroup_disabled(void)
+{
+	return true;
+}
+#endif /* CONFIG_CGROUP_VM */
+
+#endif /* _LINUX_VM_CGROUP_H */
diff --git a/init/Kconfig b/init/Kconfig
index 9d76b99af1b9..4419835bea7c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1008,6 +1008,10 @@ config MEMCG_KMEM
 	  unusable in real life so DO NOT SELECT IT unless for development
 	  purposes.
 
+config CGROUP_VM
+	bool "Virtual Memory Resource Controller for Control Groups"
+	default n
+
 config CGROUP_HUGETLB
 	bool "HugeTLB Resource Controller for Control Groups"
 	depends on RESOURCE_COUNTERS && HUGETLB_PAGE
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..914520d2669f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
 obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
+obj-$(CONFIG_CGROUP_VM) += vm_cgroup.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
diff --git a/mm/vm_cgroup.c b/mm/vm_cgroup.c
new file mode 100644
index 000000000000..7f5b81482748
--- /dev/null
+++ b/mm/vm_cgroup.c
@@ -0,0 +1,131 @@
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vm_cgroup.h>
+
+struct vm_cgroup {
+	struct cgroup_subsys_state css;
+
+	/*
+	 * The counter to account for vm usage.
+	 */
+	struct res_counter res;
+};
+
+static struct vm_cgroup *root_vm_cgroup __read_mostly;
+
+static inline bool vm_cgroup_is_root(struct vm_cgroup *vmcg)
+{
+	return vmcg == root_vm_cgroup;
+}
+
+static struct vm_cgroup *vm_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+	return s ? container_of(s, struct vm_cgroup, css) : NULL;
+}
+
+static struct cgroup_subsys_state *
+vm_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct vm_cgroup *parent = vm_cgroup_from_css(parent_css);
+	struct vm_cgroup *vmcg;
+
+	vmcg = kzalloc(sizeof(*vmcg), GFP_KERNEL);
+	if (!vmcg)
+		return ERR_PTR(-ENOMEM);
+
+	res_counter_init(&vmcg->res, parent ? &parent->res : NULL);
+
+	if (!parent)
+		root_vm_cgroup = vmcg;
+
+	return &vmcg->css;
+}
+
+static void vm_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+
+	kfree(vmcg);
+}
+
+static u64 vm_cgroup_read_u64(struct cgroup_subsys_state *css,
+			      struct cftype *cft)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+	int memb = cft->private;
+
+	return res_counter_read_u64(&vmcg->res, memb);
+}
+
+static ssize_t vm_cgroup_write(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
+{
+	struct vm_cgroup *vmcg = vm_cgroup_from_css(of_css(of));
+	unsigned long long val;
+	int ret;
+
+	if (vm_cgroup_is_root(vmcg))
+		return -EINVAL;
+
+	buf = strstrip(buf);
+	ret = res_counter_memparse_write_strategy(buf, &val);
+	if (ret)
+		return ret;
+
+	ret = res_counter_set_limit(&vmcg->res, val);
+	return ret ?: nbytes;
+}
+
+static ssize_t vm_cgroup_reset(struct kernfs_open_file *of, char *buf,
+			       size_t nbytes, loff_t off)
+{
+	struct vm_cgroup *vmcg= vm_cgroup_from_css(of_css(of));
+	int memb = of_cft(of)->private;
+
+	switch (memb) {
+	case RES_MAX_USAGE:
+		res_counter_reset_max(&vmcg->res);
+		break;
+	case RES_FAILCNT:
+		res_counter_reset_failcnt(&vmcg->res);
+		break;
+	default:
+		BUG();
+	}
+	return nbytes;
+}
+
+static struct cftype vm_cgroup_files[] = {
+	{
+		.name = "usage_in_bytes",
+		.private = RES_USAGE,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "max_usage_in_bytes",
+		.private = RES_MAX_USAGE,
+		.write = vm_cgroup_reset,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "limit_in_bytes",
+		.private = RES_LIMIT,
+		.write = vm_cgroup_write,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{
+		.name = "failcnt",
+		.private = RES_FAILCNT,
+		.write = vm_cgroup_reset,
+		.read_u64 = vm_cgroup_read_u64,
+	},
+	{ },	/* terminate */
+};
+
+struct cgroup_subsys vm_cgrp_subsys = {
+	.css_alloc = vm_cgroup_css_alloc,
+	.css_free = vm_cgroup_css_free,
+	.base_cftypes = vm_cgroup_files,
+};
-- 
1.7.10.4


  reply	other threads:[~2014-07-03 12:49 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-03 12:48 [PATCH RFC 0/5] Virtual Memory Resource Controller for cgroups Vladimir Davydov
2014-07-03 12:48 ` Vladimir Davydov [this message]
2014-07-03 12:48 ` [PATCH RFC 2/5] vm_cgroup: private writable mappings accounting Vladimir Davydov
2014-07-03 12:48 ` [PATCH RFC 3/5] shmem: pass inode to shmem_acct_* methods Vladimir Davydov
2014-07-03 12:48 ` [PATCH RFC 4/5] vm_cgroup: shared memory accounting Vladimir Davydov
2014-07-03 12:48 ` [PATCH RFC 5/5] vm_cgroup: do not charge tasks in root cgroup Vladimir Davydov
2014-07-04 12:16 ` [PATCH RFC 0/5] Virtual Memory Resource Controller for cgroups Michal Hocko
2014-07-04 15:38   ` Vladimir Davydov
2014-07-16 12:01     ` Michal Hocko
2014-07-23 14:08       ` Vladimir Davydov
2014-07-09  7:52 ` Vladimir Davydov
2014-07-09 15:08   ` Tim Hockin
2014-07-09 16:36     ` Vladimir Davydov
2014-07-09 17:04       ` Greg Thelen
2014-07-10 16:35         ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5169989c3d82823f9675f00152c8bf28f91ab890.1404383187.git.vdavydov@parallels.com \
    --to=vdavydov@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=tj@kernel.org \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).