From: Serge Hallyn <serue@us.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Hubertus Franke <frankeh@watson.ibm.com>,
Cedric Le Goater <clg@fr.ibm.com>,
Dave Hansen <haveblue@us.ibm.com>,
Serge E Hallyn <serue@us.ibm.com>
Subject: RFC [patch 27/34] PID Virtualization pidspace
Date: Tue, 17 Jan 2006 08:33:25 -0600 [thread overview]
Message-ID: <20060117143328.694933000@sergelap> (raw)
In-Reply-To: 20060117143258.150807000@sergelap
[-- Attachment #1: G1-pidspace.patch --]
[-- Type: text/plain, Size: 7933 bytes --]
This patch introduces pitspaces to provide pid virtualization
capabilities. A pidspace will be allocated for each container
and destroyed (resources freed) when the container is
terminated.
The global pid range ( 32 bit) is partitioned into
PID_MAX_LIMIT sized pidspaces. The virtualization
is defined as kernel_pid ::= < pidspace_id, vpid >
In this patch we are utilizing the existing pid management,
i.e. allocation and hashing. We are providing a pidspace, as managed
previously, for each pidspace id.
Patch eliminates the explicit management of vpids and allows
continued usage of the existing pid hashing and lookup functions.
Signed-off-by: Hubertus Franke <frankeh@watson.ibm.com>
---
include/linux/pid.h | 27 +++++++++++-
include/linux/threads.h | 17 +++++--
kernel/fork.c | 2
kernel/pid.c | 105 +++++++++++++++++++++++++++++++++++++++++++-----
4 files changed, 135 insertions(+), 16 deletions(-)
Index: linux-2.6.15/kernel/fork.c
===================================================================
--- linux-2.6.15.orig/kernel/fork.c 2006-01-17 08:37:07.000000000 -0500
+++ linux-2.6.15/kernel/fork.c 2006-01-17 08:37:08.000000000 -0500
@@ -1238,7 +1238,7 @@
{
struct task_struct *p;
int trace = 0;
- long pid = alloc_pidmap();
+ long pid = alloc_pidmap(DEFAULT_PIDSPACE);
long vpid;
if (pid < 0)
Index: linux-2.6.15/include/linux/pid.h
===================================================================
--- linux-2.6.15.orig/include/linux/pid.h 2006-01-17 08:17:29.000000000 -0500
+++ linux-2.6.15/include/linux/pid.h 2006-01-17 08:37:08.000000000 -0500
@@ -36,7 +36,7 @@
*/
extern struct pid *FASTCALL(find_pid(enum pid_type, int));
-extern int alloc_pidmap(void);
+extern int alloc_pidmap(int pidspace_id);
extern void FASTCALL(free_pidmap(int));
extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
@@ -51,5 +51,30 @@
prefetch((task)->pids[type].pid_list.next), \
hlist_unhashed(&(task)->pids[type].pid_chain)); \
} \
+/*
+ * Pidspace related definition for translation real <-> virtual
+ * and initialization functions
+ */
+
+#define DEFAULT_PIDSPACE 0
+
+extern int pidspace_init(int pidspace_id);
+extern int pidspace_free(int pidspace_id);
+
+static inline int pid_to_pidspace(int pid)
+{
+ return (pid >> PID_MAX_LIMIT_SHIFT);
+}
+
+static inline int pidspace_vpid_to_pid(int pidspace_id, pid_t pid)
+{
+ return (pidspace_id << PID_MAX_LIMIT_SHIFT) | pid;
+}
+
+static inline int pidspace_pid_to_vpid(pid_t pid)
+{
+ return (pid & (PID_MAX_LIMIT-1));
+}
+
#endif /* _LINUX_PID_H */
Index: linux-2.6.15/include/linux/threads.h
===================================================================
--- linux-2.6.15.orig/include/linux/threads.h 2006-01-17 08:17:29.000000000 -0500
+++ linux-2.6.15/include/linux/threads.h 2006-01-17 08:37:08.000000000 -0500
@@ -25,12 +25,21 @@
/*
* This controls the default maximum pid allocated to a process
*/
-#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
+#define PID_MAX_DEFAULT_SHIFT (CONFIG_BASE_SMALL ? 12 : 15)
+#define PID_MAX_DEFAULT (1<< PID_MAX_DEFAULT_SHIFT)
/*
- * A maximum of 4 million PIDs should be enough for a while:
+ * The entire global pid range is devided into pidspaces
+ * each able to hold upto PID_MAX_LIMIT pids.
+ * A maximum of 512 pidspace should be enough for a while
+ * A maximum of 4 million PIDs per pidspace should be enough for a while:
+ * we keep high bit reserved for negative values
*/
-#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
- (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
+#define PID_MAX_LIMIT_SHIFT (CONFIG_BASE_SMALL ? PAGE_SHIFT + 8 : \
+ (sizeof(long) > 4 ? 22 : PID_MAX_DEFAULT_SHIFT))
+#define PID_MAX_LIMIT (1<<PID_MAX_LIMIT_SHIFT)
+
+#define MAX_NR_PIDSPACES (PID_MAX_LIMIT_SHIFT > 22 ? \
+ 1<<(32-PID_MAX_LIMIT_SHIFT-1) : 512)
#endif
Index: linux-2.6.15/kernel/pid.c
===================================================================
--- linux-2.6.15.orig/kernel/pid.c 2006-01-17 08:36:59.000000000 -0500
+++ linux-2.6.15/kernel/pid.c 2006-01-17 08:37:08.000000000 -0500
@@ -35,6 +35,7 @@
int last_pid;
#define RESERVED_PIDS 300
+#define RESERVED_PIDS_NON_DFLT 1
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
@@ -57,29 +58,103 @@
void *page;
} pidmap_t;
-static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+struct pidspace {
+ int last_pid;
+ pidmap_t *pidmap_array;
+};
+
+static pidmap_t dflt_pidmap_array[PIDMAP_ENTRIES] =
{ [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+static struct pidspace pid_spaces[MAX_NR_PIDSPACES] =
+ { { 0, dflt_pidmap_array } };
+
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+int pidspace_init(int pidspace_id)
+{
+ pidmap_t *map;
+ struct pidspace *pid_space = &pid_spaces[pidspace_id];
+ int i;
+ int rc;
+
+ if (unlikely(pid_space->pidmap_array))
+ return -EBUSY;
+
+ map = kmalloc(PIDMAP_ENTRIES*sizeof(pidmap_t), GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+
+ for (i=0 ; i< PIDMAP_ENTRIES ; i++)
+ map[i] = (pidmap_t){ ATOMIC_INIT(BITS_PER_PAGE), NULL };
+
+ /*
+ * Free the pidspace if someone raced with us
+ * installing it:
+ */
+
+ spin_lock(&pidmap_lock);
+ if (pid_space->pidmap_array) {
+ kfree(map);
+ rc = -EAGAIN;
+ } else {
+ pid_space->pidmap_array = map;
+ pid_space->last_pid = RESERVED_PIDS_NON_DFLT;
+ rc = 0;
+ }
+ spin_unlock(&pidmap_lock);
+ return rc;
+}
+
+int pidspace_free(int pidspace_id)
+{
+ struct pidspace *pid_space = &pid_spaces[pidspace_id];
+ pidmap_t *map;
+ int i;
+
+ spin_lock(&pidmap_lock);
+ BUG_ON(pid_space->pidmap_array == NULL);
+ map = pid_space->pidmap_array;
+ pid_space->pidmap_array = NULL;
+ spin_unlock(&pidmap_lock);
+
+ for ( i=0; i<PIDMAP_ENTRIES; i++)
+ free_page((unsigned long)map[i].page);
+ kfree(map);
+ return 0;
+}
+
fastcall void free_pidmap(int pid)
{
- pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
- int offset = pid & BITS_PER_PAGE_MASK;
+ pidmap_t *map, *pidmap_array;
+ int offset;
+
+ pidmap_array = pid_spaces[pid_to_pidspace(pid)].pidmap_array;
+ pid = pidspace_pid_to_vpid(pid);
+ map = pidmap_array + pid / BITS_PER_PAGE;
+ offset = pid & BITS_PER_PAGE_MASK;
clear_bit(offset, map->page);
atomic_inc(&map->nr_free);
}
-int alloc_pidmap(void)
+int alloc_pidmap(int pidspace_id)
{
- int i, offset, max_scan, pid, last = last_pid;
- pidmap_t *map;
+ int i, offset, max_scan, pid, last;
+ struct pidspace *pid_space;
+ pidmap_t *map, *pidmap_array;
+ pid_space = &pid_spaces[pidspace_id];
+ last = pid_space->last_pid;
pid = last + 1;
- if (pid >= pid_max)
- pid = RESERVED_PIDS;
+ if (pid >= pid_max) {
+ if (pidspace_id == DEFAULT_PIDSPACE)
+ pid = RESERVED_PIDS;
+ else
+ pid = RESERVED_PIDS_NON_DFLT;
+ }
offset = pid & BITS_PER_PAGE_MASK;
+ pidmap_array = pid_space->pidmap_array;
map = &pidmap_array[pid/BITS_PER_PAGE];
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
for (i = 0; i <= max_scan; ++i) {
@@ -102,7 +177,12 @@
do {
if (!test_and_set_bit(offset, map->page)) {
atomic_dec(&map->nr_free);
- last_pid = pid;
+ pid_space->last_pid = pid;
+ if (pidspace_id == 0) {
+ last_pid = pid;
+ return pid;
+ }
+ pid = pidspace_vpid_to_pid(pidspace_id, pid);
return pid;
}
offset = find_next_offset(map, offset);
@@ -122,7 +202,10 @@
offset = 0;
} else {
map = &pidmap_array[0];
- offset = RESERVED_PIDS;
+ if (pidspace_id == DEFAULT_PIDSPACE)
+ offset = RESERVED_PIDS;
+ else
+ offset = RESERVED_PIDS_NON_DFLT;
if (unlikely(last == offset))
break;
}
@@ -279,6 +362,8 @@
{
int i;
+ pidmap_t *pidmap_array = dflt_pidmap_array;
+
pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
set_bit(0, pidmap_array->page);
atomic_dec(&pidmap_array->nr_free);
--
next prev parent reply other threads:[~2006-01-17 15:00 UTC|newest]
Thread overview: 136+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-01-17 14:32 RFC [patch 00/34] PID Virtualization Overview Serge Hallyn
2006-01-17 14:32 ` RFC [patch 01/34] PID Virtualization Change pid accesses: drivers Serge Hallyn
2006-01-17 14:33 ` RFC [patch 02/34] PID Virtualization Change pid accesses: most archs Serge Hallyn
2006-01-17 14:33 ` RFC [patch 03/34] PID Virtualization Change pid accesses: filesystems Serge Hallyn
2006-01-17 14:33 ` RFC [patch 04/34] PID Virtualization Change pid accesses: include/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 05/34] PID Virtualization Change pid accesses: ipc Serge Hallyn
2006-01-17 14:33 ` RFC [patch 06/34] PID Virtualization Change pid accesses: kernel/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 07/34] PID Virtualization Change pid accesses: lib/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 08/34] PID Virtualization Change pid accesses: mm/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 09/34] PID Virtualization Change pid accesses: net/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 10/34] PID Virtualization Change pid accesses: security/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 11/34] PID Virtualization Change pid accesses: sound/ Serge Hallyn
2006-01-17 14:33 ` RFC [patch 12/34] PID Virtualization Change pid accesses: ia64 and mips Serge Hallyn
2006-01-17 14:33 ` RFC [patch 13/34] PID Virtualization Define new task_pid api Serge Hallyn
2006-01-17 15:32 ` Arjan van de Ven
2006-01-17 15:56 ` Serge E. Hallyn
2006-01-17 16:02 ` Arjan van de Ven
2006-01-17 16:03 ` Alan Cox
2006-01-17 17:16 ` Kyle Moffett
2006-01-17 17:25 ` Dave Hansen
2006-01-18 4:54 ` Greg KH
2006-01-18 4:55 ` Greg KH
2006-01-18 16:23 ` Dave Hansen
2006-01-20 17:00 ` Eric W. Biederman
2006-01-20 20:18 ` Hubertus Franke
2006-01-21 10:25 ` Eric W. Biederman
2006-01-23 18:38 ` Hubertus Franke
2006-01-23 18:48 ` Eric W. Biederman
2006-01-21 14:42 ` Eric W. Biederman
2006-01-22 6:43 ` Kyle Moffett
2006-01-22 15:48 ` Eric W. Biederman
2006-01-22 15:55 ` Arjan van de Ven
2006-01-22 16:24 ` Eric W. Biederman
2006-01-26 20:01 ` Herbert Poetzl
2006-01-27 9:04 ` Eric W. Biederman
2006-01-27 12:27 ` Kyle Moffett
2006-01-27 13:15 ` Eric W. Biederman
2006-01-23 18:50 ` Hubertus Franke
2006-01-23 19:28 ` Eric W. Biederman
2006-01-23 21:11 ` Alan Cox
2006-01-23 21:30 ` Eric W. Biederman
2006-01-23 22:15 ` Hubertus Franke
2006-01-24 6:56 ` Arjan van de Ven
2006-01-24 19:34 ` Eric W. Biederman
2006-01-24 21:09 ` Hubertus Franke
2006-01-24 0:22 ` Alan Cox
2006-01-24 19:26 ` Eric W. Biederman
2006-01-24 21:11 ` Alan Cox
2006-01-24 21:15 ` Arjan van de Ven
2006-01-25 9:58 ` Eric W. Biederman
2006-01-25 15:10 ` Trond Myklebust
2006-01-25 18:01 ` Eric W. Biederman
2006-01-25 19:30 ` Trond Myklebust
2006-01-25 21:59 ` Eric W. Biederman
2006-01-25 9:13 ` Eric W. Biederman
2006-01-25 9:51 ` Eric W. Biederman
2006-01-26 20:23 ` Herbert Poetzl
2006-01-27 8:28 ` Eric W. Biederman
[not found] ` <m1k6cqlmfe.fsf_-_@ebiederm.dsl.xmission.com>
2006-01-23 21:57 ` RFC: [PATCH] pids as weak references Dave Hansen
2006-01-31 21:02 ` RFC [patch 13/34] PID Virtualization Define new task_pid api Linus Torvalds
2006-02-01 0:01 ` Hubertus Franke
2006-02-01 4:18 ` Eric W. Biederman
2006-02-01 4:39 ` Linus Torvalds
2006-02-01 7:14 ` Eric W. Biederman
2006-02-01 16:41 ` Dave Hansen
2006-02-02 5:14 ` Herbert Poetzl
2006-02-01 16:29 ` Greg
2006-02-01 16:44 ` Eric W. Biederman
2006-02-02 13:50 ` Greg
2006-02-02 14:09 ` Eric W. Biederman
2006-02-02 14:48 ` Kirill Korotaev
2006-02-02 15:13 ` Eric W. Biederman
2006-02-02 15:26 ` Kirill Korotaev
2006-02-02 15:51 ` Eric W. Biederman
2006-02-02 16:05 ` Kirill Korotaev
2006-02-02 16:27 ` Eric W. Biederman
2006-02-02 21:32 ` Cedric Le Goater
2006-02-02 21:43 ` Hubertus Franke
2006-02-02 21:46 ` Eric W. Biederman
2006-02-03 10:07 ` Kirill Korotaev
2006-02-03 10:52 ` Kirill Korotaev
2006-02-03 11:09 ` Eric W. Biederman
2006-02-03 15:45 ` Dave Hansen
2006-02-03 16:35 ` Kirill Korotaev
2006-02-02 21:10 ` Cedric Le Goater
2006-02-02 21:24 ` Eric W. Biederman
2006-02-06 20:15 ` Pavel Machek
2006-02-06 20:34 ` Eric W. Biederman
2006-02-06 20:36 ` Kirill Korotaev
2006-02-06 20:40 ` Eric W. Biederman
2006-02-02 14:49 ` Kirill Korotaev
2006-01-17 14:33 ` RFC [patch 14/34] PID Virtualization const parameter for process group Serge Hallyn
2006-01-17 14:33 ` RFC [patch 15/34] PID Virtualization task virtual pid access functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 16/34] PID Virtualization return virtual pids where required Serge Hallyn
2006-01-17 14:33 ` RFC [patch 17/34] PID Virtualization return virtual process group ids Serge Hallyn
2006-01-17 14:33 ` RFC [patch 18/34] PID Virtualization code enhancements for virtual pids in /proc Serge Hallyn
2006-01-17 14:33 ` RFC [patch 19/34] PID Virtualization Define pid_to_vpid functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 20/34] PID Virtualization Use pid_to_vpid conversion functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 21/34] PID Virtualization file owner pid virtualization Serge Hallyn
2006-01-17 14:33 ` RFC [patch 22/34] PID Virtualization define vpid_to_pid functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 23/34] PID Virtualization Use " Serge Hallyn
2006-01-17 14:33 ` RFC [patch 24/34] PID Virtualization use vpgid_to_pgid function Serge Hallyn
2006-01-17 14:33 ` RFC [patch 25/34] PID Virtualization Context for pid_to_vpid conversition functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 26/34] PID Virtualization Documentation Serge Hallyn
2006-01-17 14:33 ` Serge Hallyn [this message]
2006-01-17 14:33 ` RFC [patch 28/34] PID Virtualization container object and functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 29/34] PID Virtualization container attach/detach calls Serge Hallyn
2006-01-17 14:33 ` RFC [patch 30/34] PID Virtualization /proc/container filesystem Serge Hallyn
2006-01-17 14:33 ` RFC [patch 31/34] PID Virtualization Implementation of low level virtualization functions Serge Hallyn
2006-01-17 14:33 ` RFC [patch 32/34] PID Virtualization Handle special case vpid return cases Serge Hallyn
2006-01-17 14:33 ` RFC [patch 33/34] PID Virtualization per container /proc filesystem Serge Hallyn
2006-01-17 14:33 ` RFC [patch 34/34] PID Virtualization pidspace parent : signal behavior Serge Hallyn
2006-01-17 16:19 ` RFC [patch 00/34] PID Virtualization Overview Suleiman Souhlal
2006-01-17 17:08 ` Dave Hansen
2006-01-17 18:09 ` Suleiman Souhlal
2006-01-17 18:12 ` Dave Hansen
2006-01-17 18:29 ` Alan Cox
2006-01-18 19:01 ` Dave Hansen
2006-01-18 19:28 ` Arjan van de Ven
2006-01-18 19:38 ` Dave Hansen
2006-01-18 19:50 ` Arjan van de Ven
2006-01-18 22:54 ` Alan Cox
2006-01-19 7:15 ` Arjan van de Ven
2006-01-20 5:11 ` Eric W. Biederman
2006-01-20 20:23 ` Serge E. Hallyn
2006-01-20 20:33 ` Hubertus Franke
2006-01-21 10:34 ` Eric W. Biederman
2006-01-20 19:53 ` RFC: Multiple instances of kernel namespaces Eric W. Biederman
2006-01-20 20:13 ` Serge E. Hallyn
2006-01-20 20:22 ` Hubertus Franke
[not found] ` <20060120203555.GC13265@sergelap.austin.ibm.com>
2006-01-20 21:47 ` Hubertus Franke
2006-01-21 10:04 ` Eric W. Biederman
2006-01-26 19:47 ` Herbert Poetzl
2006-01-26 20:13 ` Eric W. Biederman
2006-01-26 20:27 ` Herbert Poetzl
2006-01-21 10:31 ` RFC [patch 00/34] PID Virtualization Overview Pavel Machek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060117143328.694933000@sergelap \
--to=serue@us.ibm.com \
--cc=clg@fr.ibm.com \
--cc=frankeh@watson.ibm.com \
--cc=haveblue@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).