* clone_with_pids() library interface
@ 2009-06-06 2:22 Sukadev Bhattiprolu
[not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 2+ messages in thread
From: Sukadev Bhattiprolu @ 2009-06-06 2:22 UTC (permalink / raw)
To: Oren Laadan; +Cc: Containers
[-- Attachment #1: Type: text/plain, Size: 268 bytes --]
Attached are two files -
cwp.c - implements clone_with_pids() library interface
cwp-test.c - a simple program to test the interface
There maybe more optimal ways of implementing it though :-)
If it makes sense, will submit as a patch to user-cr tree.
Sukadev
[-- Attachment #2: cwp.c --]
[-- Type: text/x-csrc, Size: 3134 bytes --]
/*
* Copied from
*
* http://lkml.indiana.edu/hypermail/linux/kernel/0104.3/0322.html
*
* and hacked to suit clone_with_pids() (Sukadev Bhattiprolu)
*/
/*
* Implementation of Dijkstra's parbegin/parend using clone()
* Modified from original Linus' clone.c example
* A proof of concept for academic purposes
* (c) Francesc Oller 2001, Linus Torvalds
* Under GPL license
*/
#include <unistd.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sched.h>
#include <errno.h>
#define STACKSIZE 8192
#define __NR_clone_with_pids 335
pid_t clone_with_pids(int clone_flags, long *ptid, long *ctid, void *setp)
{
long retval;
long *childsp;
register long * motherbp __asm__ ("%ebp");
/*
* allocate new stack for child
*/
childsp = malloc(STACKSIZE);
if (!childsp)
return -1;
childsp = (long *)(((char *)childsp) + STACKSIZE);
*--childsp = *(motherbp + 1); /* push return address */
*--childsp = *motherbp; /* push mother's bp */
/*
* Do clone() system call. We need to do the low-level stuff
* entirely in assembly as we're returning with a different
* stack in the child process and we couldn't otherwise guarantee
* that the program doesn't use the old stack incorrectly.
*
* Parameters to clone() system call:
* %eax - __NR_clone, clone system call number
* %ebx - clone_flags, bitmap of cloned data
* %ecx - new stack pointer for cloned child
*
* In this example %ebx is CLONE_VM | CLONE_FS | CLONE_FILES |
* CLONE_SIGHAND which shares as much as possible between parent
* and child. (We or in the signal to be sent on child termination
* into clone_flags: SIGCHLD makes the cloned process work like
* a "normal" unix child process)
*
* The clone() system call returns (in %eax) the pid of the newly
* cloned process to the mother, and 0 to the cloned process. If
* an error occurs, the return value will be the negative errno.
*
* Prior to the creation of the child process, we have stored
* return adress and caller's bp in child's stack. Child will
* restore caller's bp and jmp to the post-clone adress. The
* "_exit()" system call at the child's body end will terminate
* the child.
*/
/*
* The last (sixth) parameter goes into ebp but ebp is needed to
* reference local variables. So push values from local variables
* into registers before pushing the pid_set into ebp
*/
__asm__ (
"mov %0, %%ebx"
:
: "r" (clone_flags)
);
__asm__ (
"mov %0, %%ecx"
:
: "r" (childsp)
);
__asm__ (
"mov %0, %%edx"
:
: "r" (&ptid)
);
__asm__ (
"mov %0, %%edi"
:
: "r" (&ctid)
);
__asm__ (
"mov %0, %%ebp"
:
: "r" (setp)
);
__asm__ __volatile__(
"int $0x80\n\t" /* Linux/i386 system call */
"testl %0,%0\n\t" /* check return value */
"jne 1f\n\t" /* jump if mother */
"popl %%ebp\n\t" /* restore caller's bp */
"ret\n" /* jmp to return address */
"1:\t"
:"=a" (retval)
:"0" (__NR_clone_with_pids)
: "%ebx", "%ecx", "%edx", "%edi", "%ebp"
);
if (retval < 0) {
errno = -retval;
retval = -1;
}
return retval;
}
[-- Attachment #3: cwp-test.c --]
[-- Type: text/x-csrc, Size: 1171 bytes --]
#include <stdio.h>
#include <sched.h>
#include <signal.h>
#include <unistd.h>
#include <errno.h>
#define CLONE_NEWPID 0x20000000
#define __NR_gettid 224
/*
* TODO: getpid() in child returns pid of parent for some reason gettid()
* returns correct pid (i.e 1 if CLONE_NEWPID or 19799 otherwise)
*/
int gettid()
{
int rc;
rc = syscall(__NR_gettid, 0, 0, 0);
if (rc < 0) {
printf("rc %d, errno %d\n", rc, errno);
fflush(stdout);
}
return rc;
}
struct target_pid_set {
int num_pids;
pid_t *target_pids;
};
extern clone_with_pids(int clone_flags, int *ptid, int *ctid, void *pid_set);
main()
{
int rc;
int clone_flags;
struct target_pid_set pid_set;
int pids[1] = { 19799 };
pid_set.num_pids = 1;
pid_set.target_pids = &pids[0];
clone_flags = (CLONE_NEWPID| CLONE_FS | CLONE_FILES | SIGCHLD);
printf("Parent: Call clone_with_pids() for &pid_set %p\n", &pid_set);
rc = clone_with_pids(clone_flags, NULL, NULL, &pid_set);
if (rc == 0) {
printf("Child: tid %d\n", gettid());
_exit(0);
} else if (rc > 0) {
printf("Parent: child pid %d\n", rc);
_exit(0);
} else {
printf("myclone() failed, rc %d, errno %d\n", rc, errno);
}
}
[-- Attachment #4: Type: text/plain, Size: 206 bytes --]
_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: clone_with_pids() library interface
[not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-06-08 8:25 ` Oren Laadan
0 siblings, 0 replies; 2+ messages in thread
From: Oren Laadan @ 2009-06-08 8:25 UTC (permalink / raw)
To: Sukadev Bhattiprolu; +Cc: Containers
Coindicentally I also got it to run. The prototype is in agreement
with the usual clone() prototype.
#if defined(__i386__) && defined(__NR_clone_with_pids)
/*
* libc doesn't support clone_with_pid() yet...
* (see: http://lkml.indiana.edu/hypermail/linux/kernel/9604.3/0204.html)
*/
static int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
struct target_pid_set *target_pids, void *arg)
{
long retval;
void **newstack;
/*
* Set up the stack for child:
* - the (void *) arg will be the argument for the child function
* - the fn pointer will be loaded into ebx after the clone
*/
newstack = (void **) child_stack;
*--newstack = arg;
*--newstack = fn;
__asm__ __volatile__(
"movl %0, %%ebx\n\t" /* flags -> 1st (ebx) */
"movl %1, %%ecx\n\t" /* newstack -> 2nd (ecx)*/
"xorl %%edi, %%edi\n\t" /* 0 -> edi */
"xorl %%edx, %%edx\n\t" /* 0 -> edx */
"pushl %%ebp\n\t" /* save value of ebp */
"movl %2, %%ebp\n\t" /* flags -> 6th (ebp) */
:
:"b" (flags),
"c" (newstack),
"r" (target_pids)
);
__asm__ __volatile__(
"int $0x80\n\t" /* Linux/i386 system call */
"testl %0,%0\n\t" /* check return value */
"jne 1f\n\t" /* jump if parent */
"popl %%ebx\n\t" /* get subthread function */
"call *%%ebx\n\t" /* start subthread function */
"movl %2,%0\n\t"
"int $0x80\n" /* exit system call: exit subthread */
"1:\n\t"
"popl %%ebp\t" /* restore parent's ebp */
:"=a" (retval)
:"0" (__NR_clone_with_pids), "i" (__NR_exit)
:"ebx", "ecx"
);
if (retval < 0) {
errno = -retval;
retval = -1;
}
return retval;
}
#endif
I added it to mktree.c and commited to user-cr.git :: ckpt-v16-dev,
I also added the clone_with_pids patches to linux-cr.git :: ckpt-v16-dev
It works well when not in a new namespace (mktree --pids < ckpt.image)
assuming the the pids are avaiable.
It doesn't work yet when in a new namespace, as the kernel code needs
to be adjusted (for the restarting tasks to find the coordintor in an
ancestor namespace).
Oren.
On Fri, 5 Jun 2009, Sukadev Bhattiprolu wrote:
>
> Attached are two files -
>
> cwp.c - implements clone_with_pids() library interface
> cwp-test.c - a simple program to test the interface
>
> There maybe more optimal ways of implementing it though :-)
>
> If it makes sense, will submit as a patch to user-cr tree.
>
> Sukadev
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-06-08 8:25 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-06 2:22 clone_with_pids() library interface Sukadev Bhattiprolu
[not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-08 8:25 ` Oren Laadan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.