linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Emelianov <xemul@sw.ru>
To: Andrew Morton <akpm@osdl.org>, Paul Menage <menage@google.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Balbir Singh <balbir@in.ibm.com>
Cc: devel@openvz.org,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Kirill Korotaev <dev@sw.ru>,
	Chandra Seetharaman <sekharan@us.ibm.com>,
	Cedric Le Goater <clg@fr.ibm.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Rohit Seth <rohitseth@google.com>,
	Linux Containers <containers@lists.osdl.org>
Subject: [PATCH 4/8] RSS container core
Date: Mon, 09 Apr 2007 16:49:28 +0400	[thread overview]
Message-ID: <461A3658.2010206@sw.ru> (raw)
In-Reply-To: <461A3010.90403@sw.ru>

[-- Attachment #1: Type: text/plain, Size: 187 bytes --]

This includes
* definition of rss_container as container subsystem combined
  with resource counter;
* registration of RSS container in generic containers;
* routines for pages tracking.

[-- Attachment #2: diff-rss-container-core --]
[-- Type: text/plain, Size: 9402 bytes --]

diff -upr linux-2.6.20.orig/include/linux/container_subsys.h linux-2.6.20-2/include/linux/container_subsys.h
--- linux-2.6.20.orig/include/linux/container_subsys.h	2007-04-09 11:26:06.000000000 +0400
+++ linux-2.6.20-2/include/linux/container_subsys.h	2007-04-09 11:26:06.000000000 +0400
@@ -9,6 +9,10 @@
 SUBSYS(cpuset)
 #endif
 
+#ifdef CONFIG_RSS_CONTAINER
+SUBSYS(rss)
+#endif
+
 /* */
 
 /* */
diff -upr linux-2.6.20.orig/include/linux/rss_container.h linux-2.6.20-2/include/linux/rss_container.h
--- linux-2.6.20.orig/include/linux/rss_container.h	2007-04-09 11:26:12.000000000 +0400
+++ linux-2.6.20-2/include/linux/rss_container.h	2007-04-09 11:26:06.000000000 +0400
@@ -0,0 +1,55 @@
+#ifndef __RSS_CONTAINER_H__
+#define __RSS_CONTAINER_H__
+/*
+ * RSS container
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+struct page_container;
+struct rss_container;
+
+#ifdef CONFIG_RSS_CONTAINER
+int container_rss_prepare(struct page *, struct vm_area_struct *vma,
+		struct page_container **);
+
+void container_rss_add(struct page_container *);
+void container_rss_del(struct page_container *);
+void container_rss_release(struct page_container *);
+
+void mm_init_container(struct mm_struct *mm, struct task_struct *tsk);
+void mm_free_container(struct mm_struct *mm);
+
+#else
+static inline int container_rss_prepare(struct page *pg,
+		struct vm_area_struct *vma, struct page_container **pc)
+{
+	*pc = NULL; /* to make gcc happy */
+	return 0;
+}
+
+static inline void container_rss_add(struct page_container *pc)
+{
+}
+
+static inline void container_rss_del(struct page_container *pc)
+{
+}
+
+static inline void container_rss_release(struct page_container *pc)
+{
+}
+
+static inline void mm_init_container(struct mm_struct *mm, struct task_struct *t)
+{
+}
+
+static inline void mm_free_container(struct mm_struct *mm)
+{
+}
+
+#endif
+#endif
diff -upr linux-2.6.20.orig/init/Kconfig linux-2.6.20-2/init/Kconfig
--- linux-2.6.20.orig/init/Kconfig	2007-04-09 11:26:06.000000000 +0400
+++ linux-2.6.20-2/init/Kconfig	2007-04-09 11:26:06.000000000 +0400
@@ -257,6 +257,17 @@ config CPUSETS
 	bool
 	select CONTAINERS
 
+config RSS_CONTAINER
+	bool "RSS accounting container"
+	select RESOURCE_COUNTERS
+	help
+	  Provides a simple Resource Controller for monitoring and
+	  controlling the total Resident Set Size of the tasks in a container
+	  The reclaim logic is now container aware, when the container goes
+	  overlimit the page reclaimer reclaims pages belonging to this
+	  container. If we are unable to reclaim enough pages to satisfy the
+	  request, the process is killed with an out of memory warning.
+
 config SYSFS_DEPRECATED
 	bool "Create deprecated sysfs files"
 	default y
diff -upr linux-2.6.20.orig/mm/Makefile linux-2.6.20-2/mm/Makefile
--- linux-2.6.20.orig/mm/Makefile	2007-03-06 19:09:50.000000000 +0300
+++ linux-2.6.20-2/mm/Makefile	2007-04-09 11:26:06.000000000 +0400
@@ -29,3 +29,5 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
+
+obj-$(CONFIG_RSS_CONTAINER) += rss_container.o
diff -upr linux-2.6.20.orig/mm/rss_container.c linux-2.6.20-2/mm/rss_container.c
--- linux-2.6.20.orig/mm/rss_container.c	2007-04-09 11:26:12.000000000 +0400
+++ linux-2.6.20-2/mm/rss_container.c	2007-04-09 11:26:06.000000000 +0400
@@ -0,0 +1,274 @@
+/*
+ * RSS accounting container
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/res_counter.h>
+#include <linux/rss_container.h>
+
+struct rss_container {
+	struct res_counter res;
+	struct list_head inactive_list;
+	struct list_head active_list;
+	atomic_t rss_reclaimed;
+	struct container_subsys_state css;
+};
+
+struct page_container {
+	struct page *page;
+	struct rss_container *cnt;
+	struct list_head list;
+};
+
+static inline struct rss_container *rss_from_cont(struct container *cnt)
+{
+	return container_of(container_subsys_state(cnt, rss_subsys_id),
+			struct rss_container, css);
+}
+
+void mm_init_container(struct mm_struct *mm, struct task_struct *tsk)
+{
+	struct rss_container *cnt;
+
+	cnt = rss_from_cont(task_container(tsk, rss_subsys_id));
+	css_get(&cnt->css);
+	mm->rss_container = cnt;
+}
+
+void mm_free_container(struct mm_struct *mm)
+{
+	css_put(&mm->rss_container->css);
+}
+
+int container_rss_prepare(struct page *page, struct vm_area_struct *vma,
+		struct page_container **ppc)
+{
+	struct rss_container *rss;
+	struct page_container *pc;
+
+	rcu_read_lock();
+	rss = rcu_dereference(vma->vm_mm->rss_container);
+	css_get(&rss->css);
+	rcu_read_unlock();
+
+	pc = kmalloc(sizeof(struct page_container), GFP_KERNEL);
+	if (pc == NULL)
+		goto out_nomem;
+
+	while (res_counter_charge(&rss->res, 1)) {
+		if (try_to_free_pages_in_container(rss)) {
+			atomic_inc(&rss->rss_reclaimed);
+			continue;
+		}
+
+		container_out_of_memory(rss);
+		if (test_thread_flag(TIF_MEMDIE))
+			goto out_charge;
+	}
+
+	pc->page = page;
+	pc->cnt = rss;
+	*ppc = pc;
+	return 0;
+
+out_charge:
+	kfree(pc);
+out_nomem:
+	css_put(&rss->css);
+	return -ENOMEM;
+}
+
+void container_rss_release(struct page_container *pc)
+{
+	struct rss_container *rss;
+
+	rss = pc->cnt;
+	res_counter_uncharge(&rss->res, 1);
+	css_put(&rss->css);
+	kfree(pc);
+}
+
+void container_rss_add(struct page_container *pc)
+{
+	struct page *pg;
+	struct rss_container *rss;
+
+	pg = pc->page;
+	rss = pc->cnt;
+
+	spin_lock_irq(&rss->res.lock);
+	list_add(&pc->list, &rss->active_list);
+	spin_unlock_irq(&rss->res.lock);
+
+	page_container(pg) = pc;
+}
+
+void container_rss_del(struct page_container *pc)
+{
+	struct page *page;
+	struct rss_container *rss;
+
+	page = pc->page;
+	rss = pc->cnt;
+
+	spin_lock_irq(&rss->res.lock);
+	list_del(&pc->list);
+	res_counter_uncharge_locked(&rss->res, 1);
+	spin_unlock_irq(&rss->res.lock);
+
+	css_put(&rss->css);
+	kfree(pc);
+}
+
+static void rss_move_task(struct container_subsys *ss,
+		struct container *cont,
+		struct container *old_cont,
+		struct task_struct *p)
+{
+	struct mm_struct *mm;
+	struct rss_container *rss, *old_rss;
+
+	mm = get_task_mm(p);
+	if (mm == NULL)
+		goto out;
+
+	rss = rss_from_cont(cont);
+	old_rss = rss_from_cont(old_cont);
+	if (old_rss != mm->rss_container)
+		goto out_put;
+
+	css_get(&rss->css);
+	rcu_assign_pointer(mm->rss_container, rss);
+	css_put(&old_rss->css);
+
+out_put:
+	mmput(mm);
+out:
+	return;
+}
+
+static struct rss_container init_rss_container;
+
+static inline void rss_container_attach(struct rss_container *rss,
+		struct container *cont)
+{
+	cont->subsys[rss_subsys_id] = &rss->css;
+	rss->css.container = cont;
+}
+
+static int rss_create(struct container_subsys *ss, struct container *cont)
+{
+	struct rss_container *rss;
+
+	if (unlikely(cont->parent == NULL)) {
+		rss = &init_rss_container;
+		css_get(&rss->css);
+		init_mm.rss_container = rss;
+	} else
+		rss = kzalloc(sizeof(struct rss_container), GFP_KERNEL);
+
+	if (rss == NULL)
+		return -ENOMEM;
+
+	res_counter_init(&rss->res);
+	INIT_LIST_HEAD(&rss->inactive_list);
+	INIT_LIST_HEAD(&rss->active_list);
+	rss_container_attach(rss, cont);
+	return 0;
+}
+
+static void rss_destroy(struct container_subsys *ss,
+		struct container *cont)
+{
+	kfree(rss_from_cont(cont));
+}
+
+
+static ssize_t rss_read(struct container *cont, struct cftype *cft,
+		struct file *file, char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	return res_counter_read(&rss_from_cont(cont)->res, cft->private,
+			userbuf, nbytes, ppos);
+}
+
+static ssize_t rss_write(struct container *cont, struct cftype *cft,
+		struct file *file, const char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	return res_counter_write(&rss_from_cont(cont)->res, cft->private,
+			userbuf, nbytes, ppos);
+}
+
+static ssize_t rss_read_reclaimed(struct container *cont, struct cftype *cft,
+		struct file *file, char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	char buf[64], *s;
+
+	s = buf;
+	s += sprintf(s, "%d\n",
+			atomic_read(&rss_from_cont(cont)->rss_reclaimed));
+	return simple_read_from_buffer((void __user *)userbuf, nbytes,
+			ppos, buf, s - buf);
+}
+
+
+static struct cftype rss_usage = {
+	.name = "rss_usage",
+	.private = RES_USAGE,
+	.read = rss_read,
+};
+
+static struct cftype rss_limit = {
+	.name = "rss_limit",
+	.private = RES_LIMIT,
+	.read = rss_read,
+	.write = rss_write,
+};
+
+static struct cftype rss_failcnt = {
+	.name = "rss_failcnt",
+	.private = RES_FAILCNT,
+	.read = rss_read,
+};
+
+static struct cftype rss_reclaimed = {
+	.name = "rss_reclaimed",
+	.read = rss_read_reclaimed,
+};
+
+static int rss_populate(struct container_subsys *ss,
+		struct container *cont)
+{
+	int rc;
+
+	if ((rc = container_add_file(cont, &rss_usage)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_failcnt)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_limit)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_reclaimed)) < 0)
+		return rc;
+
+	return 0;
+}
+
+struct container_subsys rss_subsys = {
+	.name = "rss",
+	.subsys_id = rss_subsys_id,
+	.create = rss_create,
+	.destroy = rss_destroy,
+	.populate = rss_populate,
+	.attach = rss_move_task,
+	.early_init = 1,
+};

  parent reply	other threads:[~2007-04-09 12:46 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-09 12:22 [PATCH 0/8] RSS controller based on process containers (v2) Pavel Emelianov
2007-04-09 12:35 ` [PATCH 1/8] Resource counters Pavel Emelianov
2007-04-09 12:41 ` [PATCH 2/8] Add container pointer on struct page Pavel Emelianov
2007-04-13 13:56   ` Jean-Pierre Dion
2007-04-13 14:52     ` Pavel Emelianov
2007-04-09 12:46 ` [PATCH 3/8] Add container pointer on mm_struct Pavel Emelianov
2007-04-09 12:49 ` Pavel Emelianov [this message]
2007-04-09 12:54 ` [PATCH 5/8] RSS accounting hooks over the code Pavel Emelianov
2007-04-09 12:56 ` [PATCH 6/8] Per container OOM killer Pavel Emelianov
2007-04-09 13:00 ` [PATCH 7/8] Page scanner changes needed to implement per-container scanner Pavel Emelianov
2007-04-09 13:02 ` [PATCH 8/8] Per-container pages reclamation Pavel Emelianov
2007-04-24  9:47   ` Balbir Singh
2007-04-24 10:34     ` Pavel Emelianov
2007-04-24 11:01       ` Balbir Singh
2007-04-24 11:37         ` Pavel Emelianov
2007-05-02  9:51   ` Balbir Singh
2007-05-17 11:31   ` Balbir Singh
2007-05-21 15:15     ` Pavel Emelianov
2007-05-24  7:59       ` Balbir Singh
2007-04-09 15:54 ` [PATCH 0/8] RSS controller based on process containers (v2) Peter Zijlstra
2007-04-10  8:30   ` Pavel Emelianov
2007-04-19  5:37     ` Vaidyanathan Srinivasan
2007-05-30 15:24 [PATCH 0/8] RSS controller based on process containers (v3) Pavel Emelianov
2007-05-30 15:32 ` [PATCH 4/8] RSS container core Pavel Emelianov
2007-05-30 21:46   ` Andrew Morton
2007-05-31  9:00     ` Pavel Emelianov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=461A3658.2010206@sw.ru \
    --to=xemul@sw.ru \
    --cc=akpm@osdl.org \
    --cc=balbir@in.ibm.com \
    --cc=clg@fr.ibm.com \
    --cc=containers@lists.osdl.org \
    --cc=dev@sw.ru \
    --cc=devel@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=rohitseth@google.com \
    --cc=sekharan@us.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).