Ying Han wrote: > Hi Serge: > I made a patch based on Oren's tree recently which implement a new > syscall clone_with_pid. I tested with checkpoint/restart process tree > and it works as expected. > This patch has some hack in it which i made a copy of libc's clone and > made modifications of passing one more argument(pid number). I will > try to clean up the code and do more testing. ok. 2 patches would also be interesting. one for the syscall and one for the kernel support (which might be acceptable) > New syscall clone_with_pid > Implement a new syscall which clone a thread with a preselected pid number. yes this definitely needed to restart a task/thread. we maintain an ugly hack which stores a pid in the current task that will be used by the next clone() call. > clone_with_pid(child_func, child_stack + CHILD_STACK - 16, > CLONE_WITH_PID|SIGCHLD, pid, NULL); since you're introducing a new syscall, I don't see why you need a CLONE_WITH_PID flag ? (FYI, attached is my old attempt of clone_with_pid but that's too old) [ ... ] > +#define DEBUG > #include > #include > #include > @@ -959,10 +959,19 @@ static struct task_struct *copy_process(unsigned long cl > int retval; > struct task_struct *p; > int cgroup_callbacks_done = 0; > + pid_t clone_pid = stack_size; > > if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) > return ERR_PTR(-EINVAL); > > + /* We only allow the clone_with_pid when a new pid namespace is > + * created. FIXME: how to restrict it. > + */ > + if ((clone_flags & CLONE_NEWPID) && (clone_flags & CLONE_WITH_PID)) > + return ERR_PTR(-EINVAL); > + if ((clone_flags & CLONE_WITH_PID) && (clone_pid <= 1)) > + return ERR_PTR(-EINVAL); I would let alloc_pid() handle the error. > /* > * Thread groups must share signals as well, and detached threads > * can only be started up within the thread group. > @@ -1135,7 +1144,10 @@ static struct task_struct *copy_process(unsigned long c > > if (pid != &init_struct_pid) { > retval = -ENOMEM; > - pid = alloc_pid(task_active_pid_ns(p)); > + if (clone_flags & CLONE_WITH_PID) > + pid = alloc_pid(task_active_pid_ns(p), clone_pid); > + else > + pid = alloc_pid(task_active_pid_ns(p), 0); this is overkill IMO. [ ... ] > -static int alloc_pidmap(struct pid_namespace *pid_ns) > +static int alloc_pidmap(struct pid_namespace *pid_ns, pid_t pid_nr) > { > int i, offset, max_scan, pid, last = pid_ns->last_pid; > struct pidmap *map; > > - pid = last + 1; > + if (pid_nr) > + pid = pid_nr; > + else > + pid = last + 1; > > if (pid >= pid_max) > pid = RESERVED_PIDS; > offset = pid & BITS_PER_PAGE_MASK; > @@ -153,9 +156,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) > do { > if (!test_and_set_bit(offset, map->page)) { > atomic_dec(&map->nr_free); > - pid_ns->last_pid = pid; > + if (!pid_nr) > + pid_ns->last_pid = pid; > return pid; > } > + if (pid_nr) > + return -1; > offset = find_next_offset(map, offset); > pid = mk_pid(pid_ns, map, offset); > /* > @@ -239,21 +245,25 @@ void free_pid(struct pid *pid) > call_rcu(&pid->rcu, delayed_put_pid); > } > > -struct pid *alloc_pid(struct pid_namespace *ns) > +struct pid *alloc_pid(struct pid_namespace *ns, pid_t pid_nr) > { > struct pid *pid; > enum pid_type type; > int i, nr; > struct pid_namespace *tmp; > struct upid *upid; > + int level = ns->level; > + > + if (pid_nr >= pid_max) > + return NULL; let alloc_pidmap() handle it ? > > pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); > if (!pid) > goto out; > > - tmp = ns; > - for (i = ns->level; i >= 0; i--) { > - nr = alloc_pidmap(tmp); > + tmp = ns->parent; > + for (i = level-1; i >= 0; i--) { > + nr = alloc_pidmap(tmp, 0); > if (nr < 0) > goto out_free; > > @@ -262,6 +272,14 @@ struct pid *alloc_pid(struct pid_namespace *ns) > tmp = tmp->parent; > } > > + nr = alloc_pidmap(ns, pid_nr); > + if (nr < 0) > + goto out_free; > + pid->numbers[level].nr = nr; > + pid->numbers[level].ns = ns; > + if (nr == pid_nr) > + pr_debug("nr == pid_nr == %d\n", nr); > + > get_pid_ns(ns); > pid->level = ns->level; > atomic_set(&pid->count, 1); > > > > > > > > > On Thu, Mar 12, 2009 at 2:21 PM, Serge E. Hallyn wrote: >> Quoting Greg Kurz (gkurz-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org): >>> On Thu, 2009-03-12 at 09:53 -0500, Serge E. Hallyn wrote: >>>> Or are you suggesting that you'll do a dummy clone of (5594,2) so that >>>> the next clone(CLONE_NEWPID) will be expected to be (5594,3,1)? >>>> >>> Of course not >> Ok - someone *did* argue that at some point I think... >> >>> but one should be able to tell clone() to pick a specific >>> pid. >> Can you explain exactly how? I must be missing something clever. >> >> -serge >> >> -- >> To unsubscribe, send a message with 'unsubscribe linux-mm' in >> the body to majordomo-Bw31MaZKKs0EbZ0PF+XxCw@public.gmane.org For more info on Linux MM, >> see: http://www.linux-mm.org/ . >> Don't email: email-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org