All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelianov <xemul@sw.ru>
To: Andrew Morton <akpm@osdl.org>, Paul Menage <menage@google.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Balbir Singh <balbir@in.ibm.com>
Cc: devel@openvz.org,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Kirill Korotaev <dev@sw.ru>,
	Chandra Seetharaman <sekharan@us.ibm.com>,
	Cedric Le Goater <clg@fr.ibm.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Rohit Seth <rohitseth@google.com>,
	Linux Containers <containers@lists.osdl.org>
Subject: [PATCH 4/8] RSS container core
Date: Mon, 09 Apr 2007 16:49:28 +0400	[thread overview]
Message-ID: <461A3658.2010206@sw.ru> (raw)
In-Reply-To: <461A3010.90403@sw.ru>

[-- Attachment #1: Type: text/plain, Size: 187 bytes --]

This includes
* definition of rss_container as container subsystem combined
  with resource counter;
* registration of RSS container in generic containers;
* routines for pages tracking.

[-- Attachment #2: diff-rss-container-core --]
[-- Type: text/plain, Size: 9402 bytes --]

diff -upr linux-2.6.20.orig/include/linux/container_subsys.h linux-2.6.20-2/include/linux/container_subsys.h
--- linux-2.6.20.orig/include/linux/container_subsys.h	2007-04-09 11:26:06.000000000 +0400
+++ linux-2.6.20-2/include/linux/container_subsys.h	2007-04-09 11:26:06.000000000 +0400
@@ -9,6 +9,10 @@
 SUBSYS(cpuset)
 #endif
 
+#ifdef CONFIG_RSS_CONTAINER
+SUBSYS(rss)
+#endif
+
 /* */
 
 /* */
diff -upr linux-2.6.20.orig/include/linux/rss_container.h linux-2.6.20-2/include/linux/rss_container.h
--- linux-2.6.20.orig/include/linux/rss_container.h	2007-04-09 11:26:12.000000000 +0400
+++ linux-2.6.20-2/include/linux/rss_container.h	2007-04-09 11:26:06.000000000 +0400
@@ -0,0 +1,55 @@
+#ifndef __RSS_CONTAINER_H__
+#define __RSS_CONTAINER_H__
+/*
+ * RSS container
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+struct page_container;
+struct rss_container;
+
+#ifdef CONFIG_RSS_CONTAINER
+int container_rss_prepare(struct page *, struct vm_area_struct *vma,
+		struct page_container **);
+
+void container_rss_add(struct page_container *);
+void container_rss_del(struct page_container *);
+void container_rss_release(struct page_container *);
+
+void mm_init_container(struct mm_struct *mm, struct task_struct *tsk);
+void mm_free_container(struct mm_struct *mm);
+
+#else
+static inline int container_rss_prepare(struct page *pg,
+		struct vm_area_struct *vma, struct page_container **pc)
+{
+	*pc = NULL; /* to make gcc happy */
+	return 0;
+}
+
+static inline void container_rss_add(struct page_container *pc)
+{
+}
+
+static inline void container_rss_del(struct page_container *pc)
+{
+}
+
+static inline void container_rss_release(struct page_container *pc)
+{
+}
+
+static inline void mm_init_container(struct mm_struct *mm, struct task_struct *t)
+{
+}
+
+static inline void mm_free_container(struct mm_struct *mm)
+{
+}
+
+#endif
+#endif
diff -upr linux-2.6.20.orig/init/Kconfig linux-2.6.20-2/init/Kconfig
--- linux-2.6.20.orig/init/Kconfig	2007-04-09 11:26:06.000000000 +0400
+++ linux-2.6.20-2/init/Kconfig	2007-04-09 11:26:06.000000000 +0400
@@ -257,6 +257,17 @@ config CPUSETS
 	bool
 	select CONTAINERS
 
+config RSS_CONTAINER
+	bool "RSS accounting container"
+	select RESOURCE_COUNTERS
+	help
+	  Provides a simple Resource Controller for monitoring and
+	  controlling the total Resident Set Size of the tasks in a container
+	  The reclaim logic is now container aware, when the container goes
+	  overlimit the page reclaimer reclaims pages belonging to this
+	  container. If we are unable to reclaim enough pages to satisfy the
+	  request, the process is killed with an out of memory warning.
+
 config SYSFS_DEPRECATED
 	bool "Create deprecated sysfs files"
 	default y
diff -upr linux-2.6.20.orig/mm/Makefile linux-2.6.20-2/mm/Makefile
--- linux-2.6.20.orig/mm/Makefile	2007-03-06 19:09:50.000000000 +0300
+++ linux-2.6.20-2/mm/Makefile	2007-04-09 11:26:06.000000000 +0400
@@ -29,3 +29,5 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
+
+obj-$(CONFIG_RSS_CONTAINER) += rss_container.o
diff -upr linux-2.6.20.orig/mm/rss_container.c linux-2.6.20-2/mm/rss_container.c
--- linux-2.6.20.orig/mm/rss_container.c	2007-04-09 11:26:12.000000000 +0400
+++ linux-2.6.20-2/mm/rss_container.c	2007-04-09 11:26:06.000000000 +0400
@@ -0,0 +1,274 @@
+/*
+ * RSS accounting container
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/res_counter.h>
+#include <linux/rss_container.h>
+
+struct rss_container {
+	struct res_counter res;
+	struct list_head inactive_list;
+	struct list_head active_list;
+	atomic_t rss_reclaimed;
+	struct container_subsys_state css;
+};
+
+struct page_container {
+	struct page *page;
+	struct rss_container *cnt;
+	struct list_head list;
+};
+
+static inline struct rss_container *rss_from_cont(struct container *cnt)
+{
+	return container_of(container_subsys_state(cnt, rss_subsys_id),
+			struct rss_container, css);
+}
+
+void mm_init_container(struct mm_struct *mm, struct task_struct *tsk)
+{
+	struct rss_container *cnt;
+
+	cnt = rss_from_cont(task_container(tsk, rss_subsys_id));
+	css_get(&cnt->css);
+	mm->rss_container = cnt;
+}
+
+void mm_free_container(struct mm_struct *mm)
+{
+	css_put(&mm->rss_container->css);
+}
+
+int container_rss_prepare(struct page *page, struct vm_area_struct *vma,
+		struct page_container **ppc)
+{
+	struct rss_container *rss;
+	struct page_container *pc;
+
+	rcu_read_lock();
+	rss = rcu_dereference(vma->vm_mm->rss_container);
+	css_get(&rss->css);
+	rcu_read_unlock();
+
+	pc = kmalloc(sizeof(struct page_container), GFP_KERNEL);
+	if (pc == NULL)
+		goto out_nomem;
+
+	while (res_counter_charge(&rss->res, 1)) {
+		if (try_to_free_pages_in_container(rss)) {
+			atomic_inc(&rss->rss_reclaimed);
+			continue;
+		}
+
+		container_out_of_memory(rss);
+		if (test_thread_flag(TIF_MEMDIE))
+			goto out_charge;
+	}
+
+	pc->page = page;
+	pc->cnt = rss;
+	*ppc = pc;
+	return 0;
+
+out_charge:
+	kfree(pc);
+out_nomem:
+	css_put(&rss->css);
+	return -ENOMEM;
+}
+
+void container_rss_release(struct page_container *pc)
+{
+	struct rss_container *rss;
+
+	rss = pc->cnt;
+	res_counter_uncharge(&rss->res, 1);
+	css_put(&rss->css);
+	kfree(pc);
+}
+
+void container_rss_add(struct page_container *pc)
+{
+	struct page *pg;
+	struct rss_container *rss;
+
+	pg = pc->page;
+	rss = pc->cnt;
+
+	spin_lock_irq(&rss->res.lock);
+	list_add(&pc->list, &rss->active_list);
+	spin_unlock_irq(&rss->res.lock);
+
+	page_container(pg) = pc;
+}
+
+void container_rss_del(struct page_container *pc)
+{
+	struct page *page;
+	struct rss_container *rss;
+
+	page = pc->page;
+	rss = pc->cnt;
+
+	spin_lock_irq(&rss->res.lock);
+	list_del(&pc->list);
+	res_counter_uncharge_locked(&rss->res, 1);
+	spin_unlock_irq(&rss->res.lock);
+
+	css_put(&rss->css);
+	kfree(pc);
+}
+
+static void rss_move_task(struct container_subsys *ss,
+		struct container *cont,
+		struct container *old_cont,
+		struct task_struct *p)
+{
+	struct mm_struct *mm;
+	struct rss_container *rss, *old_rss;
+
+	mm = get_task_mm(p);
+	if (mm == NULL)
+		goto out;
+
+	rss = rss_from_cont(cont);
+	old_rss = rss_from_cont(old_cont);
+	if (old_rss != mm->rss_container)
+		goto out_put;
+
+	css_get(&rss->css);
+	rcu_assign_pointer(mm->rss_container, rss);
+	css_put(&old_rss->css);
+
+out_put:
+	mmput(mm);
+out:
+	return;
+}
+
+static struct rss_container init_rss_container;
+
+static inline void rss_container_attach(struct rss_container *rss,
+		struct container *cont)
+{
+	cont->subsys[rss_subsys_id] = &rss->css;
+	rss->css.container = cont;
+}
+
+static int rss_create(struct container_subsys *ss, struct container *cont)
+{
+	struct rss_container *rss;
+
+	if (unlikely(cont->parent == NULL)) {
+		rss = &init_rss_container;
+		css_get(&rss->css);
+		init_mm.rss_container = rss;
+	} else
+		rss = kzalloc(sizeof(struct rss_container), GFP_KERNEL);
+
+	if (rss == NULL)
+		return -ENOMEM;
+
+	res_counter_init(&rss->res);
+	INIT_LIST_HEAD(&rss->inactive_list);
+	INIT_LIST_HEAD(&rss->active_list);
+	rss_container_attach(rss, cont);
+	return 0;
+}
+
+static void rss_destroy(struct container_subsys *ss,
+		struct container *cont)
+{
+	kfree(rss_from_cont(cont));
+}
+
+
+static ssize_t rss_read(struct container *cont, struct cftype *cft,
+		struct file *file, char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	return res_counter_read(&rss_from_cont(cont)->res, cft->private,
+			userbuf, nbytes, ppos);
+}
+
+static ssize_t rss_write(struct container *cont, struct cftype *cft,
+		struct file *file, const char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	return res_counter_write(&rss_from_cont(cont)->res, cft->private,
+			userbuf, nbytes, ppos);
+}
+
+static ssize_t rss_read_reclaimed(struct container *cont, struct cftype *cft,
+		struct file *file, char __user *userbuf,
+		size_t nbytes, loff_t *ppos)
+{
+	char buf[64], *s;
+
+	s = buf;
+	s += sprintf(s, "%d\n",
+			atomic_read(&rss_from_cont(cont)->rss_reclaimed));
+	return simple_read_from_buffer((void __user *)userbuf, nbytes,
+			ppos, buf, s - buf);
+}
+
+
+static struct cftype rss_usage = {
+	.name = "rss_usage",
+	.private = RES_USAGE,
+	.read = rss_read,
+};
+
+static struct cftype rss_limit = {
+	.name = "rss_limit",
+	.private = RES_LIMIT,
+	.read = rss_read,
+	.write = rss_write,
+};
+
+static struct cftype rss_failcnt = {
+	.name = "rss_failcnt",
+	.private = RES_FAILCNT,
+	.read = rss_read,
+};
+
+static struct cftype rss_reclaimed = {
+	.name = "rss_reclaimed",
+	.read = rss_read_reclaimed,
+};
+
+static int rss_populate(struct container_subsys *ss,
+		struct container *cont)
+{
+	int rc;
+
+	if ((rc = container_add_file(cont, &rss_usage)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_failcnt)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_limit)) < 0)
+		return rc;
+	if ((rc = container_add_file(cont, &rss_reclaimed)) < 0)
+		return rc;
+
+	return 0;
+}
+
+struct container_subsys rss_subsys = {
+	.name = "rss",
+	.subsys_id = rss_subsys_id,
+	.create = rss_create,
+	.destroy = rss_destroy,
+	.populate = rss_populate,
+	.attach = rss_move_task,
+	.early_init = 1,
+};

  parent reply	other threads:[~2007-04-09 12:46 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-09 12:22 [PATCH 0/8] RSS controller based on process containers (v2) Pavel Emelianov
2007-04-09 12:35 ` [PATCH 1/8] Resource counters Pavel Emelianov
2007-04-09 12:41 ` [PATCH 2/8] Add container pointer on struct page Pavel Emelianov
2007-04-13 13:56   ` Jean-Pierre Dion
2007-04-13 14:52     ` Pavel Emelianov
2007-04-09 12:46 ` [PATCH 3/8] Add container pointer on mm_struct Pavel Emelianov
2007-04-09 12:49 ` Pavel Emelianov [this message]
2007-04-09 12:54 ` [PATCH 5/8] RSS accounting hooks over the code Pavel Emelianov
2007-04-09 12:56 ` [PATCH 6/8] Per container OOM killer Pavel Emelianov
2007-04-09 13:00 ` [PATCH 7/8] Page scanner changes needed to implement per-container scanner Pavel Emelianov
2007-04-09 13:02 ` [PATCH 8/8] Per-container pages reclamation Pavel Emelianov
2007-04-24  9:47   ` Balbir Singh
2007-04-24 10:34     ` Pavel Emelianov
2007-04-24 11:01       ` Balbir Singh
2007-04-24 11:37         ` Pavel Emelianov
2007-05-02  9:51   ` Balbir Singh
2007-05-17 11:31   ` Balbir Singh
2007-05-21 15:15     ` Pavel Emelianov
2007-05-21 15:15       ` Pavel Emelianov
2007-05-24  7:59       ` Balbir Singh
2007-04-09 15:54 ` [PATCH 0/8] RSS controller based on process containers (v2) Peter Zijlstra
2007-04-10  8:30   ` Pavel Emelianov
2007-04-19  5:37     ` Vaidyanathan Srinivasan
2007-05-30 15:24 [PATCH 0/8] RSS controller based on process containers (v3) Pavel Emelianov
2007-05-30 15:32 ` [PATCH 4/8] RSS container core Pavel Emelianov
2007-05-30 21:46   ` Andrew Morton
2007-05-31  9:00     ` Pavel Emelianov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=461A3658.2010206@sw.ru \
    --to=xemul@sw.ru \
    --cc=akpm@osdl.org \
    --cc=balbir@in.ibm.com \
    --cc=clg@fr.ibm.com \
    --cc=containers@lists.osdl.org \
    --cc=dev@sw.ru \
    --cc=devel@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=rohitseth@google.com \
    --cc=sekharan@us.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.