All of lore.kernel.org
 help / color / mirror / Atom feed
* clone_with_pids() library interface
@ 2009-06-06  2:22 Sukadev Bhattiprolu
       [not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 2+ messages in thread
From: Sukadev Bhattiprolu @ 2009-06-06  2:22 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Containers

[-- Attachment #1: Type: text/plain, Size: 268 bytes --]


Attached are two files -

	cwp.c 		- implements clone_with_pids() library interface
	cwp-test.c	- a simple program to test the interface

There maybe more optimal ways of implementing it though :-)

If it makes sense, will submit as a patch to user-cr tree.

Sukadev

[-- Attachment #2: cwp.c --]
[-- Type: text/x-csrc, Size: 3134 bytes --]


/*
 * Copied from
 *
 * 	http://lkml.indiana.edu/hypermail/linux/kernel/0104.3/0322.html
 *
 * and hacked to suit clone_with_pids() (Sukadev Bhattiprolu)
 */

/*
 * Implementation of Dijkstra's parbegin/parend using clone()
 * Modified from original Linus' clone.c example
 * A proof of concept for academic purposes
 * (c) Francesc Oller 2001, Linus Torvalds
 * Under GPL license
 */

#include <unistd.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sched.h>
#include <errno.h>

#define STACKSIZE 8192
#define __NR_clone_with_pids	335

pid_t clone_with_pids(int clone_flags, long *ptid, long *ctid, void *setp)
{
	long retval;
	long *childsp;
	register long * motherbp __asm__ ("%ebp");

	/*
	 * allocate new stack for child
	 */
	childsp = malloc(STACKSIZE);
	if (!childsp)
		return -1;

	childsp = (long *)(((char *)childsp) + STACKSIZE);
	*--childsp = *(motherbp + 1); /* push return address */
	*--childsp = *motherbp; /* push mother's bp */

	/*
	 * Do clone() system call. We need to do the low-level stuff
	 * entirely in assembly as we're returning with a different
	 * stack in the child process and we couldn't otherwise guarantee
	 * that the program doesn't use the old stack incorrectly.
	 *
	 * Parameters to clone() system call:
	 * %eax - __NR_clone, clone system call number
	 * %ebx - clone_flags, bitmap of cloned data
	 * %ecx - new stack pointer for cloned child
	 *
	 * In this example %ebx is CLONE_VM | CLONE_FS | CLONE_FILES |
	 * CLONE_SIGHAND which shares as much as possible between parent
	 * and child. (We or in the signal to be sent on child termination
	 * into clone_flags: SIGCHLD makes the cloned process work like
	 * a "normal" unix child process)
	 *
	 * The clone() system call returns (in %eax) the pid of the newly
	 * cloned process to the mother, and 0 to the cloned process. If
	 * an error occurs, the return value will be the negative errno.
	 *
	 * Prior to the creation of the child process, we have stored
	 * return adress and caller's bp in child's stack. Child will
	 * restore caller's bp and jmp to the post-clone adress. The
	 * "_exit()" system call at the child's body end will terminate
	 * the child.
	 */

	/* 
	 * The last (sixth) parameter goes into ebp but ebp is needed to
	 * reference local variables. So push values from local variables
	 * into registers before pushing the pid_set into ebp
	 */
	__asm__ (
			"mov %0, %%ebx"
			:
			: "r" (clone_flags)
	  );

	__asm__ (
			"mov %0, %%ecx"
			:
			: "r" (childsp)
	  );

	__asm__ (
			"mov %0, %%edx"
			:
			: "r" (&ptid)
	  );

	__asm__ (
			"mov %0, %%edi"
			:
			: "r" (&ctid)
	  );

	__asm__ (
			"mov %0, %%ebp"
			:
			: "r" (setp)
	  );

	__asm__ __volatile__(
			"int $0x80\n\t" /* Linux/i386 system call */
			"testl %0,%0\n\t" /* check return value */
			"jne 1f\n\t" /* jump if mother */
			"popl %%ebp\n\t" /* restore caller's bp */
			"ret\n" /* jmp to return address */
			"1:\t"
			:"=a" (retval)
			:"0" (__NR_clone_with_pids)
			: "%ebx", "%ecx", "%edx", "%edi", "%ebp"
	);

	if (retval < 0) {
		errno = -retval;
		retval = -1;
	}

	return retval;
}

[-- Attachment #3: cwp-test.c --]
[-- Type: text/x-csrc, Size: 1171 bytes --]

#include <stdio.h>
#include <sched.h>
#include <signal.h>
#include <unistd.h>
#include <errno.h>

#define CLONE_NEWPID		0x20000000
#define __NR_gettid		224

/*
 * TODO: getpid() in child returns pid of parent for some reason gettid()
 * 	 returns correct pid (i.e 1 if CLONE_NEWPID or 19799 otherwise)
 */
int gettid()
{
	int rc;

	rc = syscall(__NR_gettid, 0, 0, 0);
	if (rc < 0) {
		printf("rc %d, errno %d\n", rc, errno);
		fflush(stdout);
	}
	return rc;
}

struct target_pid_set {
	int num_pids;
	pid_t *target_pids;
};

extern clone_with_pids(int clone_flags, int *ptid, int *ctid, void *pid_set);

main()
{
	int rc;
	int clone_flags;
	struct target_pid_set pid_set;
	
	int pids[1] = { 19799 };
	
	pid_set.num_pids = 1;
	pid_set.target_pids = &pids[0];

	clone_flags = (CLONE_NEWPID| CLONE_FS | CLONE_FILES | SIGCHLD);

	printf("Parent: Call clone_with_pids() for &pid_set %p\n", &pid_set);

	rc = clone_with_pids(clone_flags, NULL, NULL, &pid_set);
	if (rc == 0) {
		printf("Child: tid %d\n", gettid());
		_exit(0);
	} else if (rc > 0) {
		printf("Parent: child pid %d\n", rc);
		_exit(0);
	} else {
		printf("myclone() failed, rc %d, errno %d\n", rc, errno);
	}
}

[-- Attachment #4: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: clone_with_pids() library interface
       [not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-06-08  8:25   ` Oren Laadan
  0 siblings, 0 replies; 2+ messages in thread
From: Oren Laadan @ 2009-06-08  8:25 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers


Coindicentally I also got it to run. The prototype is in agreement
with the usual clone() prototype.

#if defined(__i386__) && defined(__NR_clone_with_pids)
/*
 * libc doesn't support clone_with_pid() yet...
 * (see: http://lkml.indiana.edu/hypermail/linux/kernel/9604.3/0204.html)
 */
static int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
			   struct target_pid_set *target_pids, void *arg)
{
	long retval;
	void **newstack;

	/*
	 * Set up the stack for child:
	 *  - the (void *) arg will be the argument for the child function
	 *  - the fn pointer will be loaded into ebx after the clone
	 */
	newstack = (void **) child_stack;
	*--newstack = arg;
	*--newstack = fn;

	__asm__  __volatile__(
		 "movl %0, %%ebx\n\t"		/* flags -> 1st (ebx) */
		 "movl %1, %%ecx\n\t"		/* newstack -> 2nd (ecx)*/
		 "xorl %%edi, %%edi\n\t"	/* 0 -> edi */
		 "xorl %%edx, %%edx\n\t"	/* 0 -> edx */
		 "pushl %%ebp\n\t"		/* save value of ebp */
		 "movl %2, %%ebp\n\t"		/* flags -> 6th (ebp) */
		:
		:"b" (flags),
		 "c" (newstack),
		 "r" (target_pids)
		);

	__asm__ __volatile__(
		 "int $0x80\n\t"	/* Linux/i386 system call */
		 "testl %0,%0\n\t"	/* check return value */
		 "jne 1f\n\t"		/* jump if parent */
		 "popl %%ebx\n\t"	/* get subthread function */
		 "call *%%ebx\n\t"	/* start subthread function */
		 "movl %2,%0\n\t"
		 "int $0x80\n"		/* exit system call: exit subthread */
		 "1:\n\t"
		 "popl %%ebp\t"		/* restore parent's ebp */
		:"=a" (retval)
		:"0" (__NR_clone_with_pids), "i" (__NR_exit)
		:"ebx", "ecx"
		);

	if (retval < 0) {
		errno = -retval;
		retval = -1;
	}
	return retval;
}
#endif

I added it to mktree.c and commited to user-cr.git :: ckpt-v16-dev,
I also added the clone_with_pids patches to linux-cr.git :: ckpt-v16-dev

It works well when not in a new namespace (mktree --pids < ckpt.image)
assuming the the pids are avaiable.

It doesn't work yet when in a new namespace, as the kernel code needs
to be adjusted (for the restarting tasks to find the coordintor in an
ancestor namespace).

Oren.


On Fri, 5 Jun 2009, Sukadev Bhattiprolu wrote:

> 
> Attached are two files -
> 
> 	cwp.c 		- implements clone_with_pids() library interface
> 	cwp-test.c	- a simple program to test the interface
> 
> There maybe more optimal ways of implementing it though :-)
> 
> If it makes sense, will submit as a patch to user-cr tree.
> 
> Sukadev
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2009-06-08  8:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-06  2:22 clone_with_pids() library interface Sukadev Bhattiprolu
     [not found] ` <20090606022225.GA19083-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-08  8:25   ` Oren Laadan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.