linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Serge Hallyn <serue@us.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Hubertus Franke <frankeh@watson.ibm.com>,
	Cedric Le Goater <clg@fr.ibm.com>,
	Dave Hansen <haveblue@us.ibm.com>,
	Serge E Hallyn <serue@us.ibm.com>
Subject: RFC [patch 27/34] PID Virtualization pidspace
Date: Tue, 17 Jan 2006 08:33:25 -0600	[thread overview]
Message-ID: <20060117143328.694933000@sergelap> (raw)
In-Reply-To: 20060117143258.150807000@sergelap

[-- Attachment #1: G1-pidspace.patch --]
[-- Type: text/plain, Size: 7933 bytes --]

This patch introduces pitspaces to provide pid virtualization
capabilities. A pidspace will be allocated for each container
and destroyed (resources freed) when the container is 
terminated.

The global pid range ( 32 bit) is partitioned into 
PID_MAX_LIMIT sized pidspaces. The virtualization
is defined as kernel_pid ::= < pidspace_id, vpid >

In this patch we are utilizing the existing pid management,
i.e. allocation and hashing. We are providing a pidspace, as managed 
previously, for each pidspace id. 

Patch eliminates the explicit management of vpids and allows
continued usage of the existing pid hashing and lookup functions.

Signed-off-by: Hubertus Franke <frankeh@watson.ibm.com>
---
 include/linux/pid.h     |   27 +++++++++++-
 include/linux/threads.h |   17 +++++--
 kernel/fork.c           |    2 
 kernel/pid.c            |  105 +++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 135 insertions(+), 16 deletions(-)

Index: linux-2.6.15/kernel/fork.c
===================================================================
--- linux-2.6.15.orig/kernel/fork.c	2006-01-17 08:37:07.000000000 -0500
+++ linux-2.6.15/kernel/fork.c	2006-01-17 08:37:08.000000000 -0500
@@ -1238,7 +1238,7 @@
 {
 	struct task_struct *p;
 	int trace = 0;
-	long pid = alloc_pidmap();
+	long pid = alloc_pidmap(DEFAULT_PIDSPACE);
 	long vpid;
 
 	if (pid < 0)
Index: linux-2.6.15/include/linux/pid.h
===================================================================
--- linux-2.6.15.orig/include/linux/pid.h	2006-01-17 08:17:29.000000000 -0500
+++ linux-2.6.15/include/linux/pid.h	2006-01-17 08:37:08.000000000 -0500
@@ -36,7 +36,7 @@
  */
 extern struct pid *FASTCALL(find_pid(enum pid_type, int));
 
-extern int alloc_pidmap(void);
+extern int alloc_pidmap(int pidspace_id);
 extern void FASTCALL(free_pidmap(int));
 extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
@@ -51,5 +51,30 @@
 			prefetch((task)->pids[type].pid_list.next),	\
 			hlist_unhashed(&(task)->pids[type].pid_chain));	\
 	}								\
+/*
+ * Pidspace related definition for translation  real <-> virtual
+ * and initialization functions
+ */
+
+#define DEFAULT_PIDSPACE	0
+
+extern int pidspace_init(int pidspace_id);
+extern int pidspace_free(int pidspace_id);
+
+static inline int pid_to_pidspace(int pid)
+{
+	return (pid >> PID_MAX_LIMIT_SHIFT);
+}
+
+static inline int pidspace_vpid_to_pid(int pidspace_id, pid_t pid)
+{
+	return (pidspace_id << PID_MAX_LIMIT_SHIFT) | pid;
+}
+
+static inline int pidspace_pid_to_vpid(pid_t pid)
+{
+	return (pid & (PID_MAX_LIMIT-1));
+}
+
 
 #endif /* _LINUX_PID_H */
Index: linux-2.6.15/include/linux/threads.h
===================================================================
--- linux-2.6.15.orig/include/linux/threads.h	2006-01-17 08:17:29.000000000 -0500
+++ linux-2.6.15/include/linux/threads.h	2006-01-17 08:37:08.000000000 -0500
@@ -25,12 +25,21 @@
 /*
  * This controls the default maximum pid allocated to a process
  */
-#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
+#define PID_MAX_DEFAULT_SHIFT	(CONFIG_BASE_SMALL ? 12 : 15)
+#define PID_MAX_DEFAULT 	(1<< PID_MAX_DEFAULT_SHIFT)
 
 /*
- * A maximum of 4 million PIDs should be enough for a while:
+ * The entire global pid range is devided into pidspaces
+ * each able to hold upto PID_MAX_LIMIT pids.
+ * A maximum of 512 pidspace should be enough for a while
+ * A maximum of 4 million PIDs per pidspace should be enough for a while:
+ * we keep high bit reserved for negative values
  */
-#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
-	(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
+#define PID_MAX_LIMIT_SHIFT (CONFIG_BASE_SMALL ? PAGE_SHIFT + 8 : \
+	(sizeof(long) > 4 ? 22 : PID_MAX_DEFAULT_SHIFT))
+#define PID_MAX_LIMIT 		(1<<PID_MAX_LIMIT_SHIFT)
+
+#define MAX_NR_PIDSPACES 	(PID_MAX_LIMIT_SHIFT > 22 ?   \
+				 1<<(32-PID_MAX_LIMIT_SHIFT-1) : 512)
 
 #endif
Index: linux-2.6.15/kernel/pid.c
===================================================================
--- linux-2.6.15.orig/kernel/pid.c	2006-01-17 08:36:59.000000000 -0500
+++ linux-2.6.15/kernel/pid.c	2006-01-17 08:37:08.000000000 -0500
@@ -35,6 +35,7 @@
 int last_pid;
 
 #define RESERVED_PIDS		300
+#define RESERVED_PIDS_NON_DFLT    1
 
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
@@ -57,29 +58,103 @@
 	void *page;
 } pidmap_t;
 
-static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+struct pidspace {
+	int last_pid;
+	pidmap_t *pidmap_array;
+};
+
+static pidmap_t dflt_pidmap_array[PIDMAP_ENTRIES] =
 	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
 
+static struct pidspace pid_spaces[MAX_NR_PIDSPACES] =
+	{ { 0, dflt_pidmap_array } };
+
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
+int pidspace_init(int pidspace_id)
+{
+	pidmap_t *map;
+	struct pidspace *pid_space =  &pid_spaces[pidspace_id];
+	int i;
+	int rc;
+
+	if (unlikely(pid_space->pidmap_array))
+		return -EBUSY;
+
+	map = kmalloc(PIDMAP_ENTRIES*sizeof(pidmap_t), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	for (i=0 ; i< PIDMAP_ENTRIES ; i++)
+		map[i] = (pidmap_t){ ATOMIC_INIT(BITS_PER_PAGE), NULL };
+
+	/*
+	 * Free the pidspace if someone raced with us
+	 * installing it:
+	 */
+
+	spin_lock(&pidmap_lock);
+	if (pid_space->pidmap_array) {
+		kfree(map);
+		rc = -EAGAIN;
+	} else {
+		pid_space->pidmap_array = map;
+		pid_space->last_pid = RESERVED_PIDS_NON_DFLT;
+		rc = 0;
+	}
+	spin_unlock(&pidmap_lock);
+	return rc;
+}
+
+int pidspace_free(int pidspace_id)
+{
+	struct pidspace *pid_space =  &pid_spaces[pidspace_id];
+	pidmap_t *map;
+	int i;
+
+	spin_lock(&pidmap_lock);
+	BUG_ON(pid_space->pidmap_array == NULL);
+	map = pid_space->pidmap_array;
+	pid_space->pidmap_array = NULL;
+	spin_unlock(&pidmap_lock);
+
+	for ( i=0; i<PIDMAP_ENTRIES; i++)
+		free_page((unsigned long)map[i].page);
+	kfree(map);
+	return 0;
+}
+
 fastcall void free_pidmap(int pid)
 {
-	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
-	int offset = pid & BITS_PER_PAGE_MASK;
+	pidmap_t *map, *pidmap_array;
+	int offset;
+
+	pidmap_array = pid_spaces[pid_to_pidspace(pid)].pidmap_array;
+	pid = pidspace_pid_to_vpid(pid);
+	map = pidmap_array + pid / BITS_PER_PAGE;
+	offset = pid & BITS_PER_PAGE_MASK;
 
 	clear_bit(offset, map->page);
 	atomic_inc(&map->nr_free);
 }
 
-int alloc_pidmap(void)
+int alloc_pidmap(int pidspace_id)
 {
-	int i, offset, max_scan, pid, last = last_pid;
-	pidmap_t *map;
+	int i, offset, max_scan, pid, last;
+	struct pidspace *pid_space;
+	pidmap_t *map, *pidmap_array;
 
+	pid_space = &pid_spaces[pidspace_id];
+	last = pid_space->last_pid;
 	pid = last + 1;
-	if (pid >= pid_max)
-		pid = RESERVED_PIDS;
+	if (pid >= pid_max) {
+		if (pidspace_id == DEFAULT_PIDSPACE)
+			pid = RESERVED_PIDS;
+		else
+			pid = RESERVED_PIDS_NON_DFLT;
+	}
 	offset = pid & BITS_PER_PAGE_MASK;
+	pidmap_array = pid_space->pidmap_array;
 	map = &pidmap_array[pid/BITS_PER_PAGE];
 	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
 	for (i = 0; i <= max_scan; ++i) {
@@ -102,7 +177,12 @@
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
 					atomic_dec(&map->nr_free);
-					last_pid = pid;
+					pid_space->last_pid = pid;
+					if (pidspace_id == 0) {
+						last_pid = pid;
+						return pid;
+					}
+					pid = pidspace_vpid_to_pid(pidspace_id, pid);
 					return pid;
 				}
 				offset = find_next_offset(map, offset);
@@ -122,7 +202,10 @@
 			offset = 0;
 		} else {
 			map = &pidmap_array[0];
-			offset = RESERVED_PIDS;
+			if (pidspace_id == DEFAULT_PIDSPACE)
+				offset = RESERVED_PIDS;
+			else
+				offset = RESERVED_PIDS_NON_DFLT;
 			if (unlikely(last == offset))
 				break;
 		}
@@ -279,6 +362,8 @@
 {
 	int i;
 
+	pidmap_t *pidmap_array = dflt_pidmap_array;
+
 	pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
 	set_bit(0, pidmap_array->page);
 	atomic_dec(&pidmap_array->nr_free);

--


  parent reply	other threads:[~2006-01-17 15:00 UTC|newest]

Thread overview: 136+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-17 14:32 RFC [patch 00/34] PID Virtualization Overview Serge Hallyn
2006-01-17 14:32 ` RFC [patch 01/34] PID Virtualization Change pid accesses: drivers Serge Hallyn
2006-01-17 14:33 ` RFC [patch 02/34] PID Virtualization Change pid accesses: most archs Serge Hallyn
2006-01-17 14:33 ` RFC [patch 03/34] PID Virtualization Change pid accesses: filesystems Serge Hallyn
2006-01-17 14:33 ` RFC [patch 04/34] PID Virtualization Change pid accesses: include/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 05/34] PID Virtualization Change pid accesses: ipc Serge Hallyn
2006-01-17 14:33 ` RFC [patch 06/34] PID Virtualization Change pid accesses: kernel/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 07/34] PID Virtualization Change pid accesses: lib/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 08/34] PID Virtualization Change pid accesses: mm/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 09/34] PID Virtualization Change pid accesses: net/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 10/34] PID Virtualization Change pid accesses: security/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 11/34] PID Virtualization Change pid accesses: sound/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 12/34] PID Virtualization Change pid accesses: ia64 and mips Serge Hallyn
2006-01-17 14:33 ` RFC [patch 13/34] PID Virtualization Define new task_pid api Serge Hallyn
2006-01-17 15:32   ` Arjan van de Ven
2006-01-17 15:56     ` Serge E. Hallyn
2006-01-17 16:02       ` Arjan van de Ven
2006-01-17 16:03       ` Alan Cox
2006-01-17 17:16         ` Kyle Moffett
2006-01-17 17:25         ` Dave Hansen
2006-01-18  4:54           ` Greg KH
2006-01-18  4:55           ` Greg KH
2006-01-18 16:23             ` Dave Hansen
2006-01-20 17:00               ` Eric W. Biederman
2006-01-20 20:18                 ` Hubertus Franke
2006-01-21 10:25                   ` Eric W. Biederman
2006-01-23 18:38                     ` Hubertus Franke
2006-01-23 18:48                       ` Eric W. Biederman
2006-01-21 14:42                   ` Eric W. Biederman
2006-01-22  6:43                     ` Kyle Moffett
2006-01-22 15:48                       ` Eric W. Biederman
2006-01-22 15:55                         ` Arjan van de Ven
2006-01-22 16:24                           ` Eric W. Biederman
2006-01-26 20:01                             ` Herbert Poetzl
2006-01-27  9:04                               ` Eric W. Biederman
2006-01-27 12:27                                 ` Kyle Moffett
2006-01-27 13:15                                   ` Eric W. Biederman
2006-01-23 18:50                     ` Hubertus Franke
2006-01-23 19:28                       ` Eric W. Biederman
2006-01-23 21:11                         ` Alan Cox
2006-01-23 21:30                           ` Eric W. Biederman
2006-01-23 22:15                             ` Hubertus Franke
2006-01-24  6:56                               ` Arjan van de Ven
2006-01-24 19:34                               ` Eric W. Biederman
2006-01-24 21:09                                 ` Hubertus Franke
2006-01-24  0:22                             ` Alan Cox
2006-01-24 19:26                               ` Eric W. Biederman
2006-01-24 21:11                                 ` Alan Cox
2006-01-24 21:15                                   ` Arjan van de Ven
2006-01-25  9:58                                     ` Eric W. Biederman
2006-01-25 15:10                                       ` Trond Myklebust
2006-01-25 18:01                                         ` Eric W. Biederman
2006-01-25 19:30                                           ` Trond Myklebust
2006-01-25 21:59                                             ` Eric W. Biederman
2006-01-25  9:13                                   ` Eric W. Biederman
2006-01-25  9:51                                   ` Eric W. Biederman
2006-01-26 20:23                                     ` Herbert Poetzl
2006-01-27  8:28                                       ` Eric W. Biederman
     [not found]                       ` <m1k6cqlmfe.fsf_-_@ebiederm.dsl.xmission.com>
2006-01-23 21:57                         ` RFC: [PATCH] pids as weak references Dave Hansen
2006-01-31 21:02                   ` RFC [patch 13/34] PID Virtualization Define new task_pid api Linus Torvalds
2006-02-01  0:01                     ` Hubertus Franke
2006-02-01  4:18                     ` Eric W. Biederman
2006-02-01  4:39                       ` Linus Torvalds
2006-02-01  7:14                         ` Eric W. Biederman
2006-02-01 16:41                         ` Dave Hansen
2006-02-02  5:14                         ` Herbert Poetzl
2006-02-01 16:29                       ` Greg
2006-02-01 16:44                         ` Eric W. Biederman
2006-02-02 13:50                           ` Greg
2006-02-02 14:09                             ` Eric W. Biederman
2006-02-02 14:48                     ` Kirill Korotaev
2006-02-02 15:13                       ` Eric W. Biederman
2006-02-02 15:26                         ` Kirill Korotaev
2006-02-02 15:51                           ` Eric W. Biederman
2006-02-02 16:05                             ` Kirill Korotaev
2006-02-02 16:27                               ` Eric W. Biederman
2006-02-02 21:32                                 ` Cedric Le Goater
2006-02-02 21:43                                   ` Hubertus Franke
2006-02-02 21:46                                   ` Eric W. Biederman
2006-02-03 10:07                                     ` Kirill Korotaev
2006-02-03 10:52                                 ` Kirill Korotaev
2006-02-03 11:09                                   ` Eric W. Biederman
2006-02-03 15:45                                   ` Dave Hansen
2006-02-03 16:35                                     ` Kirill Korotaev
2006-02-02 21:10                             ` Cedric Le Goater
2006-02-02 21:24                               ` Eric W. Biederman
2006-02-06 20:15                         ` Pavel Machek
2006-02-06 20:34                           ` Eric W. Biederman
2006-02-06 20:36                           ` Kirill Korotaev
2006-02-06 20:40                             ` Eric W. Biederman
2006-02-02 14:49           ` Kirill Korotaev
2006-01-17 14:33 ` RFC [patch 14/34] PID Virtualization const parameter for process group Serge Hallyn
2006-01-17 14:33 ` RFC [patch 15/34] PID Virtualization task virtual pid access functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 16/34] PID Virtualization return virtual pids where required Serge Hallyn
2006-01-17 14:33 ` RFC [patch 17/34] PID Virtualization return virtual process group ids Serge Hallyn
2006-01-17 14:33 ` RFC [patch 18/34] PID Virtualization code enhancements for virtual pids in /proc Serge Hallyn
2006-01-17 14:33 ` RFC [patch 19/34] PID Virtualization Define pid_to_vpid functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 20/34] PID Virtualization Use pid_to_vpid conversion functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 21/34] PID Virtualization file owner pid virtualization Serge Hallyn
2006-01-17 14:33 ` RFC [patch 22/34] PID Virtualization define vpid_to_pid functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 23/34] PID Virtualization Use " Serge Hallyn
2006-01-17 14:33 ` RFC [patch 24/34] PID Virtualization use vpgid_to_pgid function Serge Hallyn
2006-01-17 14:33 ` RFC [patch 25/34] PID Virtualization Context for pid_to_vpid conversition functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 26/34] PID Virtualization Documentation Serge Hallyn
2006-01-17 14:33 ` Serge Hallyn [this message]
2006-01-17 14:33 ` RFC [patch 28/34] PID Virtualization container object and functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 29/34] PID Virtualization container attach/detach calls Serge Hallyn
2006-01-17 14:33 ` RFC [patch 30/34] PID Virtualization /proc/container filesystem Serge Hallyn
2006-01-17 14:33 ` RFC [patch 31/34] PID Virtualization Implementation of low level virtualization functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 32/34] PID Virtualization Handle special case vpid return cases Serge Hallyn
2006-01-17 14:33 ` RFC [patch 33/34] PID Virtualization per container /proc filesystem Serge Hallyn
2006-01-17 14:33 ` RFC [patch 34/34] PID Virtualization pidspace parent : signal behavior Serge Hallyn
2006-01-17 16:19 ` RFC [patch 00/34] PID Virtualization Overview Suleiman Souhlal
2006-01-17 17:08   ` Dave Hansen
2006-01-17 18:09     ` Suleiman Souhlal
2006-01-17 18:12       ` Dave Hansen
2006-01-17 18:29         ` Alan Cox
2006-01-18 19:01           ` Dave Hansen
2006-01-18 19:28             ` Arjan van de Ven
2006-01-18 19:38               ` Dave Hansen
2006-01-18 19:50                 ` Arjan van de Ven
2006-01-18 22:54                 ` Alan Cox
2006-01-19  7:15                   ` Arjan van de Ven
2006-01-20  5:11                     ` Eric W. Biederman
2006-01-20 20:23                       ` Serge E. Hallyn
2006-01-20 20:33                         ` Hubertus Franke
2006-01-21 10:34                           ` Eric W. Biederman
2006-01-20 19:53                   ` RFC: Multiple instances of kernel namespaces Eric W. Biederman
2006-01-20 20:13                     ` Serge E. Hallyn
2006-01-20 20:22                       ` Hubertus Franke
     [not found]                         ` <20060120203555.GC13265@sergelap.austin.ibm.com>
2006-01-20 21:47                           ` Hubertus Franke
2006-01-21 10:04                       ` Eric W. Biederman
2006-01-26 19:47                         ` Herbert Poetzl
2006-01-26 20:13                           ` Eric W. Biederman
2006-01-26 20:27                             ` Herbert Poetzl
2006-01-21 10:31             ` RFC [patch 00/34] PID Virtualization Overview Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060117143328.694933000@sergelap \
    --to=serue@us.ibm.com \
    --cc=clg@fr.ibm.com \
    --cc=frankeh@watson.ibm.com \
    --cc=haveblue@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).