All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
@ 2011-11-28 12:08 Cyrill Gorcunov
  2011-11-28 13:10 ` Andrey Vagin
  2011-11-28 16:08 ` Tejun Heo
  0 siblings, 2 replies; 11+ messages in thread
From: Cyrill Gorcunov @ 2011-11-28 12:08 UTC (permalink / raw)
  To: LKML; +Cc: Li Zefan, Matt Helsley, Andrey Vagin, Pavel Emelyanov, Tejun Heo

In checkpoint/restore we need an ability to attach pids to
a frozen cgroup. Thus once pid reaches a frozen cgroup it is
not rejected, but the task get frozen immediately.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---

I would really appreciate complains and comments.

 include/linux/cgroup.h  |    2 
 kernel/cgroup.c         |    6 +-
 kernel/cgroup_freezer.c |  119 +++++++++++++++++++++++++++++++++---------------
 mm/memcontrol.c         |    2 
 4 files changed, 90 insertions(+), 39 deletions(-)

Index: linux-2.6.git/include/linux/cgroup.h
===================================================================
--- linux-2.6.git.orig/include/linux/cgroup.h
+++ linux-2.6.git/include/linux/cgroup.h
@@ -470,7 +470,7 @@ struct cgroup_subsys {
 			  struct task_struct *tsk);
 	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			      struct task_struct *tsk);
+			      struct cgroup *old_cgrp, struct task_struct *tsk);
 	void (*pre_attach)(struct cgroup *cgrp);
 	void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
Index: linux-2.6.git/kernel/cgroup.c
===================================================================
--- linux-2.6.git.orig/kernel/cgroup.c
+++ linux-2.6.git/kernel/cgroup.c
@@ -1884,7 +1884,7 @@ out:
 				 */
 				break;
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, tsk);
+				ss->cancel_attach(ss, cgrp, oldcgrp, tsk);
 		}
 	}
 	return retval;
@@ -2178,11 +2178,11 @@ out_cancel_attach:
 		for_each_subsys(root, ss) {
 			if (ss == failed_ss) {
 				if (cancel_failed_ss && ss->cancel_attach)
-					ss->cancel_attach(ss, cgrp, leader);
+					ss->cancel_attach(ss, cgrp, oldcgrp, leader);
 				break;
 			}
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, leader);
+				ss->cancel_attach(ss, cgrp, oldcgrp, leader);
 		}
 	}
 	/* clean up the array of referenced threads in the group. */
Index: linux-2.6.git/kernel/cgroup_freezer.c
===================================================================
--- linux-2.6.git.orig/kernel/cgroup_freezer.c
+++ linux-2.6.git/kernel/cgroup_freezer.c
@@ -153,39 +153,6 @@ static void freezer_destroy(struct cgrou
 	kfree(cgroup_freezer(cgroup));
 }
 
-/*
- * The call to cgroup_lock() in the freezer.state write method prevents
- * a write to that file racing against an attach, and hence the
- * can_attach() result will remain valid until the attach completes.
- */
-static int freezer_can_attach(struct cgroup_subsys *ss,
-			      struct cgroup *new_cgroup,
-			      struct task_struct *task)
-{
-	struct freezer *freezer;
-
-	/*
-	 * Anything frozen can't move or be moved to/from.
-	 */
-
-	freezer = cgroup_freezer(new_cgroup);
-	if (freezer->state != CGROUP_THAWED)
-		return -EBUSY;
-
-	return 0;
-}
-
-static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
-{
-	rcu_read_lock();
-	if (__cgroup_freezing_or_frozen(tsk)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-	rcu_read_unlock();
-	return 0;
-}
-
 static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 {
 	struct freezer *freezer;
@@ -374,14 +341,96 @@ static int freezer_populate(struct cgrou
 	return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
 }
 
+static int freezer_task_transition(struct task_struct *task, int state_to)
+{
+	int retval = 0;
+
+	switch (state_to) {
+	case CGROUP_THAWED:
+		thaw_process(task);
+		break;
+	case CGROUP_FROZEN:
+	case CGROUP_FREEZING:
+		if (freeze_task(task, true)) {
+			if (!frozen(task)) {
+				if (!freezing(task) &&
+				    !freezer_should_skip(task))
+					retval = -EBUSY;
+			}
+		}
+		break;
+	}
+
+	return retval;
+}
+
+static int freezer_can_attach_task(struct cgroup *cgroup, struct task_struct *task)
+{
+	struct freezer *old_freezer;
+	struct freezer *freezer;
+
+	int goal_state, orig_state;
+	int retval = 0;
+
+	old_freezer = task_freezer(task);
+	freezer = cgroup_freezer(cgroup);
+
+	spin_lock_irq(&freezer->lock);
+
+	if (!spin_trylock_irq(&old_freezer->lock)) {
+		retval = -EBUSY;
+		goto out_unlock_freezer;
+	}
+
+	goal_state = orig_state = CGROUP_THAWED;
+
+	if (freezer->state == CGROUP_FREEZING ||
+	    freezer->state == CGROUP_FROZEN)
+		goal_state = CGROUP_FREEZING;
+
+	if (old_freezer->state == CGROUP_FREEZING ||
+	    old_freezer->state == CGROUP_FROZEN)
+		orig_state = CGROUP_FREEZING;
+
+	/* Nothing to change */
+	if (orig_state == goal_state)
+		goto done;
+
+	retval = freezer_task_transition(task, goal_state);
+done:
+	spin_unlock_irq(&old_freezer->lock);
+out_unlock_freezer:
+	spin_unlock_irq(&freezer->lock);
+
+	return retval;
+}
+
+static void freezer_cancel_attach(struct cgroup_subsys *ss,
+				  struct cgroup *cgroup,
+				  struct cgroup *old_cgroup,
+				  struct task_struct *task)
+{
+	struct freezer *freezer = cgroup_freezer(old_cgroup);
+	int retval = 0;
+
+	spin_lock_irq(&freezer->lock);
+	retval = freezer_task_transition(task, freezer->state);
+	if (retval)
+		pr_warning("freezer: Can't move task (pid %d) to %s state\n",
+			   task_pid_nr(task),
+			   freezer_state_strs[freezer->state]);
+	spin_unlock_irq(&freezer->lock);
+}
+
 struct cgroup_subsys freezer_subsys = {
 	.name		= "freezer",
 	.create		= freezer_create,
 	.destroy	= freezer_destroy,
 	.populate	= freezer_populate,
 	.subsys_id	= freezer_subsys_id,
-	.can_attach	= freezer_can_attach,
-	.can_attach_task = freezer_can_attach_task,
+	.can_attach	= NULL,
+	.can_attach_task= freezer_can_attach_task,
+	.cancel_attach	= freezer_cancel_attach,
 	.pre_attach	= NULL,
 	.attach_task	= NULL,
 	.attach		= NULL,
Index: linux-2.6.git/mm/memcontrol.c
===================================================================
--- linux-2.6.git.orig/mm/memcontrol.c
+++ linux-2.6.git/mm/memcontrol.c
@@ -5337,6 +5337,7 @@ static int mem_cgroup_can_attach(struct
 
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
+				struct cgroup *old_cgroup,
 				struct task_struct *p)
 {
 	mem_cgroup_clear_mc();
@@ -5477,6 +5478,7 @@ static int mem_cgroup_can_attach(struct
 }
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
+				struct cgroup *old_cgroup,
 				struct task_struct *p)
 {
 }

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 12:08 [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup Cyrill Gorcunov
@ 2011-11-28 13:10 ` Andrey Vagin
  2011-11-28 13:38   ` Cyrill Gorcunov
  2011-11-28 16:08 ` Tejun Heo
  1 sibling, 1 reply; 11+ messages in thread
From: Andrey Vagin @ 2011-11-28 13:10 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: LKML, Li Zefan, Matt Helsley, Pavel Emelyanov, Tejun Heo

>        void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
> -                             struct task_struct *tsk);
> +                             struct cgroup *old_cgrp, struct task_struct *tsk);

I'm not shure, that we need old_cgrp, because when cancel_attach is
executed, a task is in old cgroup and old_cgrp = task_cgroup(tsk);

...

>
> +static int freezer_task_transition(struct task_struct *task, int state_to)
> +{
> +       int retval = 0;
> +
> +       switch (state_to) {
> +       case CGROUP_THAWED:
> +               thaw_process(task);
> +               break;
> +       case CGROUP_FROZEN:
> +       case CGROUP_FREEZING:
> +               if (freeze_task(task, true)) {
> +                       if (!frozen(task)) {
> +                               if (!freezing(task) &&
> +                                   !freezer_should_skip(task))
> +                                       retval = -EBUSY;
> +                       }
> +               }
> +               break;
> +       }
> +
> +       return retval;
> +}
> +
> +static int freezer_can_attach_task(struct cgroup *cgroup, struct task_struct *task)
> +{
> +       struct freezer *old_freezer;
> +       struct freezer *freezer;
> +
> +       int goal_state, orig_state;
> +       int retval = 0;
> +
> +       old_freezer = task_freezer(task);
> +       freezer = cgroup_freezer(cgroup);
> +
> +       spin_lock_irq(&freezer->lock);
> +
> +       if (!spin_trylock_irq(&old_freezer->lock)) {
> +               retval = -EBUSY;

I think EBUSY is not a good idea in this place.  We can do something
like double_rq_lock.

.....

> +
> +static void freezer_cancel_attach(struct cgroup_subsys *ss,
> +                                 struct cgroup *cgroup,
> +                                 struct cgroup *old_cgroup,
> +                                 struct task_struct *task)
> +{
> +       struct freezer *freezer = cgroup_freezer(old_cgroup);
> +       int retval = 0;
> +
> +       spin_lock_irq(&freezer->lock);
> +       retval = freezer_task_transition(task, freezer->state);
> +       if (retval)
> +               pr_warning("freezer: Can't move task (pid %d) to %s state\n",
> +                          task_pid_nr(task),
> +                          freezer_state_strs[freezer->state]);

It's strange. A rollback can't fail. We have three situations:

frozen -> frozen
thawed -> frozen
frozen -> thawed

In first and second cases cancel_request can't fail.
In the third we have a problem, which may be solved if we will call
thaw_process(task) from attach_task(), we can do that, because
thaw_process() can't fail. It solves a problem, because
freezer_cancel_attach will be executed for the first and second cases
only.

If my suggestion is correct, we can replace pr_warning on BUG_ON

> +       spin_unlock_irq(&freezer->lock);
> +}
> +

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 13:10 ` Andrey Vagin
@ 2011-11-28 13:38   ` Cyrill Gorcunov
  2011-11-28 14:03     ` Andrew Vagin
  0 siblings, 1 reply; 11+ messages in thread
From: Cyrill Gorcunov @ 2011-11-28 13:38 UTC (permalink / raw)
  To: Andrey Vagin; +Cc: LKML, Li Zefan, Matt Helsley, Pavel Emelyanov, Tejun Heo

On Mon, Nov 28, 2011 at 05:10:00PM +0400, Andrey Vagin wrote:
> >        void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
> > -                             struct task_struct *tsk);
> > +                             struct cgroup *old_cgrp, struct task_struct *tsk);
> 
> I'm not shure, that we need old_cgrp, because when cancel_attach is
> executed, a task is in old cgroup and old_cgrp = task_cgroup(tsk);
> 
> ...
> 

Yup, thanks for the point. Indeed old_cgrp is redundant and task_cgroup
helper will provide all additional information we need.

> > +
> > +static int freezer_can_attach_task(struct cgroup *cgroup, struct task_struct *task)
> > +{
> > +       struct freezer *old_freezer;
> > +       struct freezer *freezer;
> > +
> > +       int goal_state, orig_state;
> > +       int retval = 0;
> > +
> > +       old_freezer = task_freezer(task);
> > +       freezer = cgroup_freezer(cgroup);
> > +
> > +       spin_lock_irq(&freezer->lock);
> > +
> > +       if (!spin_trylock_irq(&old_freezer->lock)) {
> > +               retval = -EBUSY;
> 
> I think EBUSY is not a good idea in this place.  We can do something
> like double_rq_lock.
> 

Could you please elaborate? freezers are guarded with spinlocks so I think
we should stick with them instead of poking rq (or whatever) directly.

> 
> > +
> > +static void freezer_cancel_attach(struct cgroup_subsys *ss,
> > +                                 struct cgroup *cgroup,
> > +                                 struct cgroup *old_cgroup,
> > +                                 struct task_struct *task)
> > +{
> > +       struct freezer *freezer = cgroup_freezer(old_cgroup);
> > +       int retval = 0;
> > +
> > +       spin_lock_irq(&freezer->lock);
> > +       retval = freezer_task_transition(task, freezer->state);
> > +       if (retval)
> > +               pr_warning("freezer: Can't move task (pid %d) to %s state\n",
> > +                          task_pid_nr(task),
> > +                          freezer_state_strs[freezer->state]);
> 
> It's strange. A rollback can't fail. We have three situations:
> 
> frozen -> frozen
> thawed -> frozen
> frozen -> thawed
> 
> In first and second cases cancel_request can't fail.
> In the third we have a problem, which may be solved if we will call
> thaw_process(task) from attach_task(), we can do that, because
> thaw_process() can't fail. It solves a problem, because
> freezer_cancel_attach will be executed for the first and second cases
> only.
> 
> If my suggestion is correct, we can replace pr_warning on BUG_ON
> 

Yes, the case which can fail is

   frozen->(can_attach_task)->thawed
     (cgroup_task_migrate failure)
   thawed->(cancel_attach)->frozen

and we should never fail here since otherwise we would not have
a "frozen" state before. But I think placing BUG_ON is too severe
here, maybe WARN_ON_ONCE(1) would fit better?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 13:38   ` Cyrill Gorcunov
@ 2011-11-28 14:03     ` Andrew Vagin
  2011-11-28 15:00       ` Cyrill Gorcunov
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Vagin @ 2011-11-28 14:03 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Andrey Vagin, LKML, Li Zefan, Matt Helsley, Pavel Emelyanov, Tejun Heo

> > > +static int freezer_can_attach_task(struct cgroup *cgroup, struct task_struct *task)
> > > +{
> > > +       struct freezer *old_freezer;
> > > +       struct freezer *freezer;
> > > +
> > > +       int goal_state, orig_state;
> > > +       int retval = 0;
> > > +
> > > +       old_freezer = task_freezer(task);
> > > +       freezer = cgroup_freezer(cgroup);
> > > +
> > > +       spin_lock_irq(&freezer->lock);
> > > +
> > > +       if (!spin_trylock_irq(&old_freezer->lock)) {
> > > +               retval = -EBUSY;
> > 
> > I think EBUSY is not a good idea in this place.  We can do something
> > like double_rq_lock.
> > 
> 
> Could you please elaborate? freezers are guarded with spinlocks so I think
> we should stick with them instead of poking rq (or whatever) directly.

It's misunderstanding. I want to say that we can avoid dead lock if we
will take a lock with a smaller address at first.

if (&freezer->lock > &old_freezer->lock) {
	spin_lock_irq(&old_freezer->lock)
	spin_lock_irq(&freezer->lock);
} else {
	spin_lock_irq(&freezer->lock);
	spin_lock_irq(&old_freezer->lock)
}

> 
> > 
> > > +
> > > +static void freezer_cancel_attach(struct cgroup_subsys *ss,
> > > +                                 struct cgroup *cgroup,
> > > +                                 struct cgroup *old_cgroup,
> > > +                                 struct task_struct *task)
> > > +{
> > > +       struct freezer *freezer = cgroup_freezer(old_cgroup);
> > > +       int retval = 0;
> > > +
> > > +       spin_lock_irq(&freezer->lock);
> > > +       retval = freezer_task_transition(task, freezer->state);
> > > +       if (retval)
> > > +               pr_warning("freezer: Can't move task (pid %d) to %s state\n",
> > > +                          task_pid_nr(task),
> > > +                          freezer_state_strs[freezer->state]);
> > 
> > It's strange. A rollback can't fail. We have three situations:
> > 
> > frozen -> frozen
> > thawed -> frozen
> > frozen -> thawed
> > 
> > In first and second cases cancel_request can't fail.
> > In the third we have a problem, which may be solved if we will call
> > thaw_process(task) from attach_task(), we can do that, because
> > thaw_process() can't fail. It solves a problem, because
> > freezer_cancel_attach will be executed for the first and second cases
> > only.
> > 
> > If my suggestion is correct, we can replace pr_warning on BUG_ON
> > 
> 
> Yes, the case which can fail is
> 
>    frozen->(can_attach_task)->thawed
>      (cgroup_task_migrate failure)
>    thawed->(cancel_attach)->frozen
> 
> and we should never fail here since otherwise we would not have
> a "frozen" state before. But I think placing BUG_ON is too severe
> here, maybe WARN_ON_ONCE(1) would fit better?

It's true, if a task is not being executed between thaw_process() and
freeze_task().

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 14:03     ` Andrew Vagin
@ 2011-11-28 15:00       ` Cyrill Gorcunov
  2011-11-28 15:43         ` Andrew Wagin
  0 siblings, 1 reply; 11+ messages in thread
From: Cyrill Gorcunov @ 2011-11-28 15:00 UTC (permalink / raw)
  To: Andrew Vagin; +Cc: LKML, Li Zefan, Matt Helsley, Pavel Emelyanov, Tejun Heo

On Mon, Nov 28, 2011 at 06:03:56PM +0400, Andrew Vagin wrote:
> > > > +static int freezer_can_attach_task(struct cgroup *cgroup, struct task_struct *task)
> > > > +{
> > > > +       struct freezer *old_freezer;
> > > > +       struct freezer *freezer;
> > > > +
> > > > +       int goal_state, orig_state;
> > > > +       int retval = 0;
> > > > +
> > > > +       old_freezer = task_freezer(task);
> > > > +       freezer = cgroup_freezer(cgroup);
> > > > +
> > > > +       spin_lock_irq(&freezer->lock);
> > > > +
> > > > +       if (!spin_trylock_irq(&old_freezer->lock)) {
> > > > +               retval = -EBUSY;
> > > 
> > > I think EBUSY is not a good idea in this place.  We can do something
> > > like double_rq_lock.
> > > 
> > 
> > Could you please elaborate? freezers are guarded with spinlocks so I think
> > we should stick with them instead of poking rq (or whatever) directly.
> 
> It's misunderstanding. I want to say that we can avoid dead lock if we
> will take a lock with a smaller address at first.
> 
> if (&freezer->lock > &old_freezer->lock) {
> 	spin_lock_irq(&old_freezer->lock)
> 	spin_lock_irq(&freezer->lock);
> } else {
> 	spin_lock_irq(&freezer->lock);
> 	spin_lock_irq(&old_freezer->lock)
> }
> 

This is not applicable here as far as I see. It works for rq because of
per-cpu address allocation, but not for freezers which are allocated via
kzalloc. The second try_lock (note I've overdid with irq disabling, plain
spin_trylock would be enough) is not for escaping deadlock but rather for
not waiting much if target freezer is handling state transition for all
task it has.

I think the better approach would to make this code even less lock contended,
ie something like

	local_irq_disable
	spin_trylock(new_freezer)
	spin_trylock(old_freezer)
	...
	local_irq_enable

so if both freezers are not handling anything we attach the task then.
Or I miss something obvious?

> > > 
> > > It's strange. A rollback can't fail. We have three situations:
> > > 
> > > frozen -> frozen
> > > thawed -> frozen
> > > frozen -> thawed
> > > 
> > > In first and second cases cancel_request can't fail.
> > > In the third we have a problem, which may be solved if we will call
> > > thaw_process(task) from attach_task(), we can do that, because
> > > thaw_process() can't fail. It solves a problem, because
> > > freezer_cancel_attach will be executed for the first and second cases
> > > only.
> > > 
> > > If my suggestion is correct, we can replace pr_warning on BUG_ON
> > > 
> > 
> > Yes, the case which can fail is
> > 
> >    frozen->(can_attach_task)->thawed
> >      (cgroup_task_migrate failure)
> >    thawed->(cancel_attach)->frozen
> > 
> > and we should never fail here since otherwise we would not have
> > a "frozen" state before. But I think placing BUG_ON is too severe
> > here, maybe WARN_ON_ONCE(1) would fit better?
> 
> It's true, if a task is not being executed between thaw_process() and
> freeze_task().

Hmm... But what the problem it might be if a task get executed between
those stages even for some time?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 15:00       ` Cyrill Gorcunov
@ 2011-11-28 15:43         ` Andrew Wagin
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Wagin @ 2011-11-28 15:43 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: LKML, Li Zefan, Matt Helsley, Pavel Emelyanov, Tejun Heo

>
>> > >
>> > > It's strange. A rollback can't fail. We have three situations:
>> > >
>> > > frozen -> frozen
>> > > thawed -> frozen
>> > > frozen -> thawed
>> > >
>> > > In first and second cases cancel_request can't fail.
>> > > In the third we have a problem, which may be solved if we will call
>> > > thaw_process(task) from attach_task(), we can do that, because
>> > > thaw_process() can't fail. It solves a problem, because
>> > > freezer_cancel_attach will be executed for the first and second cases
>> > > only.
>> > >
>> > > If my suggestion is correct, we can replace pr_warning on BUG_ON
>> > >
>> >
>> > Yes, the case which can fail is
>> >
>> >    frozen->(can_attach_task)->thawed
>> >      (cgroup_task_migrate failure)
>> >    thawed->(cancel_attach)->frozen
>> >
>> > and we should never fail here since otherwise we would not have
>> > a "frozen" state before. But I think placing BUG_ON is too severe
>> > here, maybe WARN_ON_ONCE(1) would fit better?
>>
>> It's true, if a task is not being executed between thaw_process() and
>> freeze_task().
>
> Hmm... But what the problem it might be if a task get executed between
> those stages even for some time?

It may become unfreezable...

>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 12:08 [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup Cyrill Gorcunov
  2011-11-28 13:10 ` Andrey Vagin
@ 2011-11-28 16:08 ` Tejun Heo
  2011-11-28 16:31   ` Cyrill Gorcunov
  2011-11-29 22:58   ` Matt Helsley
  1 sibling, 2 replies; 11+ messages in thread
From: Tejun Heo @ 2011-11-28 16:08 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: LKML, Li Zefan, Matt Helsley, Andrey Vagin, Pavel Emelyanov

On Mon, Nov 28, 2011 at 04:08:13PM +0400, Cyrill Gorcunov wrote:
> In checkpoint/restore we need an ability to attach pids to
> a frozen cgroup. Thus once pid reaches a frozen cgroup it is
> not rejected, but the task get frozen immediately.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
> 
> I would really appreciate complains and comments.

First of all, both freezer and cgroup have non-trivial pending
patchsets (e.g. ->can_attach_task() is scheduled for removal) and I
have changes which basically try to achieve about the same thing, so
let's slow down a bit.  I think the problem is a bit more complex.

Some thoughts I have on cgroup freezer ATM,

* Currently, FROZEN -> FREEZING transition isn't possible.  That's why
  event transition detection by polling is acceptable.  ie. once the
  state is polled to be FROZEN, it stays frozen.  Allowing FREEZING ->
  FROZEN transition would probably require improvements to state
  transition notification too, at the very least, clarification of
  rules.

* There are some unclear corner cases and bugs the current cgroup
  freezer has.  e.g. behavior w.r.t. kthreads is outright buggy.  It
  would be great to figure out how to deal with them with or before
  this change (ie. what happens when you transfer unfreezable
  kthreads).

* A pending feature request is making the freezing action atomically
  recursive.  We'll probably need to add a param to allow choosing
  which behavior to take.

* Another improvement that I want to have is allowing cgroup frozen
  tasks to be killed.  I don't think this matters for system freezer
  but for cgroup freezer both oom killer and systemd want it.

So, while I agree with the direction of this patch, I think this
definitely needs a lot more work to go in.  I don't think we can do
much until the freezer and cgroup changes are settled.  The freezer
part is now in Rafael's tree, the cgroup part is going under Linus'
review.  Once they're complete, I'll provide a merged branch that
further cgroup works can be based on.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 16:08 ` Tejun Heo
@ 2011-11-28 16:31   ` Cyrill Gorcunov
  2011-11-29 22:58   ` Matt Helsley
  1 sibling, 0 replies; 11+ messages in thread
From: Cyrill Gorcunov @ 2011-11-28 16:31 UTC (permalink / raw)
  To: Tejun Heo; +Cc: LKML, Li Zefan, Matt Helsley, Andrey Vagin, Pavel Emelyanov

On Mon, Nov 28, 2011 at 08:08:44AM -0800, Tejun Heo wrote:
> On Mon, Nov 28, 2011 at 04:08:13PM +0400, Cyrill Gorcunov wrote:
> > In checkpoint/restore we need an ability to attach pids to
> > a frozen cgroup. Thus once pid reaches a frozen cgroup it is
> > not rejected, but the task get frozen immediately.
> > 
> > Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> > ---
> > 
> > I would really appreciate complains and comments.
> 
> First of all, both freezer and cgroup have non-trivial pending
> patchsets (e.g. ->can_attach_task() is scheduled for removal) and I
> have changes which basically try to achieve about the same thing, so
> let's slow down a bit.  I think the problem is a bit more complex.
> 
> Some thoughts I have on cgroup freezer ATM,
> 
...
> 
> * Another improvement that I want to have is allowing cgroup frozen
>   tasks to be killed.  I don't think this matters for system freezer
>   but for cgroup freezer both oom killer and systemd want it.
>

  * Run ptrace over frozen tasks ;)

> So, while I agree with the direction of this patch, I think this
> definitely needs a lot more work to go in.  I don't think we can do
> much until the freezer and cgroup changes are settled.  The freezer
> part is now in Rafael's tree, the cgroup part is going under Linus'
> review.  Once they're complete, I'll provide a merged branch that
> further cgroup works can be based on.
> 

Thanks a lot for comments, Tejun! For own needs I'll make a stub
in our user-space tool, once things are settled down we can move on.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-28 16:08 ` Tejun Heo
  2011-11-28 16:31   ` Cyrill Gorcunov
@ 2011-11-29 22:58   ` Matt Helsley
  2011-11-29 23:19     ` Tejun Heo
  2011-11-30  6:48     ` Cyrill Gorcunov
  1 sibling, 2 replies; 11+ messages in thread
From: Matt Helsley @ 2011-11-29 22:58 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Cyrill Gorcunov, LKML, Li Zefan, Matt Helsley, Andrey Vagin,
	Pavel Emelyanov

On Mon, Nov 28, 2011 at 08:08:44AM -0800, Tejun Heo wrote:
> On Mon, Nov 28, 2011 at 04:08:13PM +0400, Cyrill Gorcunov wrote:
> > In checkpoint/restore we need an ability to attach pids to
> > a frozen cgroup. Thus once pid reaches a frozen cgroup it is
> > not rejected, but the task get frozen immediately.

Frankly, I don't see the need for this. I think you should explain
it with more detail at the very least.

Why can't they be frozen before being checkpointed? Assuming you've
used SEIZE and spawned a new parasite thread, just move that thread to a 
cgroup for the parasitic threads then freeze the old cgroup.

> > 
> > Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> > ---
> > 
> > I would really appreciate complains and comments.
> 
> First of all, both freezer and cgroup have non-trivial pending
> patchsets (e.g. ->can_attach_task() is scheduled for removal) and I
> have changes which basically try to achieve about the same thing, so
> let's slow down a bit.  I think the problem is a bit more complex.
> 
> Some thoughts I have on cgroup freezer ATM,
> 
> * Currently, FROZEN -> FREEZING transition isn't possible.  That's why

No, IMO that would consitute breaking the interface. Scripts, tools,
applications, and tests have been written to rely on the fact that
the cgroup cannot go from FROZEN to FREEZING -- only from FROZEN to
THAWED.

>   event transition detection by polling is acceptable.  ie. once the
>   state is polled to be FROZEN, it stays frozen.  Allowing FREEZING ->
>   FROZEN transition would probably require improvements to state

(I'm assuming this last sentence has a thinko in it...)

>   transition notification too, at the very least, clarification of

Notification is a fine idea. However, not everything that's already
written expects them so correct usage of the cgroup freezer should not
require them -- IOW allowing the FROZEN -> FREEZING transition
isn't made OK just by adding notifications.

>   rules.
> 
> * There are some unclear corner cases and bugs the current cgroup
>   freezer has.  e.g. behavior w.r.t. kthreads is outright buggy.  It
>   would be great to figure out how to deal with them with or before
>   this change (ie. what happens when you transfer unfreezable
>   kthreads).

Huh? Shouldn't we just disable moving kthreads between cgroups? Allowing
userspace to freeze kthreads via cgroups seems like a *very* bad idea
(perhaps it's a thread critical for IO, or some driver, etc.).

Cheers
	-Matt Helsley


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-29 22:58   ` Matt Helsley
@ 2011-11-29 23:19     ` Tejun Heo
  2011-11-30  6:48     ` Cyrill Gorcunov
  1 sibling, 0 replies; 11+ messages in thread
From: Tejun Heo @ 2011-11-29 23:19 UTC (permalink / raw)
  To: Matt Helsley
  Cc: Cyrill Gorcunov, LKML, Li Zefan, Andrey Vagin, Pavel Emelyanov

Hello, Matt.

On Tue, Nov 29, 2011 at 02:58:53PM -0800, Matt Helsley wrote:
> >   transition notification too, at the very least, clarification of
> 
> Notification is a fine idea. However, not everything that's already
> written expects them so correct usage of the cgroup freezer should not
> require them -- IOW allowing the FROZEN -> FREEZING transition
> isn't made OK just by adding notifications.

The transition model doesn't have to be modified.  ie. we can try to
freeze the task/process on migration request and actually moves it
only when it actually reaches the target state.  Not entirely sure how
well that can be implemented with the current cgroup callbacks tho.
We can also make the new behavior selectable per-cgroup if all else
fail.

> > * There are some unclear corner cases and bugs the current cgroup
> >   freezer has.  e.g. behavior w.r.t. kthreads is outright buggy.  It
> >   would be great to figure out how to deal with them with or before
> >   this change (ie. what happens when you transfer unfreezable
> >   kthreads).
> 
> Huh? Shouldn't we just disable moving kthreads between cgroups? Allowing
> userspace to freeze kthreads via cgroups seems like a *very* bad idea
> (perhaps it's a thread critical for IO, or some driver, etc.).

That probably is the easiest solution.  Different cgroups can be
grouped together so we need to be a bit careful but if none of the
cgroups makes sense with kthread, we may just ban kthreads from any
cgroup.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup
  2011-11-29 22:58   ` Matt Helsley
  2011-11-29 23:19     ` Tejun Heo
@ 2011-11-30  6:48     ` Cyrill Gorcunov
  1 sibling, 0 replies; 11+ messages in thread
From: Cyrill Gorcunov @ 2011-11-30  6:48 UTC (permalink / raw)
  To: Matt Helsley; +Cc: Tejun Heo, LKML, Li Zefan, Andrey Vagin, Pavel Emelyanov

On Tue, Nov 29, 2011 at 02:58:53PM -0800, Matt Helsley wrote:
> On Mon, Nov 28, 2011 at 08:08:44AM -0800, Tejun Heo wrote:
> > On Mon, Nov 28, 2011 at 04:08:13PM +0400, Cyrill Gorcunov wrote:
> > > In checkpoint/restore we need an ability to attach pids to
> > > a frozen cgroup. Thus once pid reaches a frozen cgroup it is
> > > not rejected, but the task get frozen immediately.
> 
> Frankly, I don't see the need for this. I think you should explain
> it with more detail at the very least.
> 
> Why can't they be frozen before being checkpointed? Assuming you've
> used SEIZE and spawned a new parasite thread, just move that thread to a 
> cgroup for the parasitic threads then freeze the old cgroup.
> 

Hi Matt,

At moment there is no way to use ptrace with frozen task (Tejun
will take a look into such ability once he get time for, as far
as I know).

Ideally we would like to move checkpointing tasks to cgroup,
freeze it there and _then_ run ptrace and inject parasite code
into it. Finally the parasite code would dump tasks contents,
detach itself and unfreeze tasks.

At restore time we would like to move the tasks being restored
into frozen cgroup before final jump to the original pt_regs the
task had at checkpoint time (it actually takes a few stages, ie once
memory and other resources are restored but task is not yet
switched to the former pt_regs, we move it into frozen cgroup,
and then the task get immediately frozen, finally once all processes
being restored reaches this point we unfreeze the cgroup and
they all switch to original registers contents and continue
execution).

	Cyrill

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-11-30  6:49 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-28 12:08 [RFC] cgroups: freezer -- Allow to attach a task to a frozen cgroup Cyrill Gorcunov
2011-11-28 13:10 ` Andrey Vagin
2011-11-28 13:38   ` Cyrill Gorcunov
2011-11-28 14:03     ` Andrew Vagin
2011-11-28 15:00       ` Cyrill Gorcunov
2011-11-28 15:43         ` Andrew Wagin
2011-11-28 16:08 ` Tejun Heo
2011-11-28 16:31   ` Cyrill Gorcunov
2011-11-29 22:58   ` Matt Helsley
2011-11-29 23:19     ` Tejun Heo
2011-11-30  6:48     ` Cyrill Gorcunov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.