linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
       [not found] <4ff7f030-b797-6711-fb6a-fe39bb02075a@oracle.com>
@ 2020-11-09 23:23 ` chris hyser
  2020-11-10 14:15   ` Joel Fernandes
  2020-11-17  0:45   ` Joel Fernandes
  0 siblings, 2 replies; 7+ messages in thread
From: chris hyser @ 2020-11-09 23:23 UTC (permalink / raw)
  To: Joel Fernandes (Google), linux-kernel

> On 10/19/20 9:43 PM, Joel Fernandes (Google) wrote:
>> In order to prevent interference and clearly support both per-task and CGroup
>> APIs, split the cookie into 2 and allow it to be set from either per-task, or
>> CGroup API. The final cookie is the combined value of both and is computed when
>> the stop-machine executes during a change of cookie.
>>
>> Also, for the per-task cookie, it will get weird if we use pointers of any
>> emphemeral objects. For this reason, introduce a refcounted object who's sole
>> purpose is to assign unique cookie value by way of the object's pointer.
>>
>> While at it, refactor the CGroup code a bit. Future patches will introduce more
>> APIs and support.
>>
>> Tested-by: Julien Desfossez <jdesfossez@digitalocean.com>
>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>> ---
>>   include/linux/sched.h |   2 +
>>   kernel/sched/core.c   | 241 ++++++++++++++++++++++++++++++++++++++++--
>>   kernel/sched/debug.c  |   4 +
>>   3 files changed, 236 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index fe6f225bfbf9..c6034c00846a 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -688,6 +688,8 @@ struct task_struct {
>>   #ifdef CONFIG_SCHED_CORE
>>       struct rb_node            core_node;
>>       unsigned long            core_cookie;
>> +    unsigned long            core_task_cookie;
>> +    unsigned long            core_group_cookie;
>>       unsigned int            core_occupation;
>>   #endif
>>   diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index bab4ea2f5cd8..30a9e4cb5ce1 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -346,11 +346,14 @@ void sched_core_put(void)
>>       mutex_unlock(&sched_core_mutex);
>>   }
>>   +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
>> +
>>   #else /* !CONFIG_SCHED_CORE */
>>     static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
>>   static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
>>   static bool sched_core_enqueued(struct task_struct *task) { return false; }
>> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
>>     #endif /* CONFIG_SCHED_CORE */
>>   @@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
>>   #endif
>>   #ifdef CONFIG_SCHED_CORE
>>       RB_CLEAR_NODE(&p->core_node);
>> +
>> +    /*
>> +     * Tag child via per-task cookie only if parent is tagged via per-task
>> +     * cookie. This is independent of, but can be additive to the CGroup tagging.
>> +     */
>> +    if (current->core_task_cookie) {
>> +
>> +        /* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
>> +        if (!(clone_flags & CLONE_THREAD)) {
>> +            return sched_core_share_tasks(p, p);
>> +               }
>> +        /* Otherwise share the parent's per-task tag. */
>> +        return sched_core_share_tasks(p, current);
>> +    }
>>   #endif
>>       return 0;
>>   } 

sched_core_share_tasks() looks at the value of the new tasks core_task_cookie which is non-zero on a
process or thread clone and causes underflow for both the enable flag itself and for cookie ref counts.

So just zero it in __sched_fork().

-chris


PATCH] sched: zero out the core scheduling cookie on clone

From: chris hyser <chris.hyser@oracle.com>

As the cookie is reference counted, even if inherited, zero this and allow
explicit sharing.

Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
---
  kernel/sched/core.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd3cc03..2af0ea6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3378,6 +3378,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  	p->capture_control = NULL;
  #endif
  	init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SCHED_CORE
+	p->core_task_cookie = 0;
+#endif
  #ifdef CONFIG_SMP
  	p->wake_entry.u_flags = CSD_TYPE_TTWU;
  #endif
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-11-09 23:23 ` [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork chris hyser
@ 2020-11-10 14:15   ` Joel Fernandes
  2020-11-17  0:45   ` Joel Fernandes
  1 sibling, 0 replies; 7+ messages in thread
From: Joel Fernandes @ 2020-11-10 14:15 UTC (permalink / raw)
  To: chris hyser; +Cc: linux-kernel

On Mon, Nov 09, 2020 at 06:23:13PM -0500, chris hyser wrote:
[..]
> > 
> > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > index fe6f225bfbf9..c6034c00846a 100644
> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -688,6 +688,8 @@ struct task_struct {
> > >   #ifdef CONFIG_SCHED_CORE
> > >       struct rb_node            core_node;
> > >       unsigned long            core_cookie;
> > > +    unsigned long            core_task_cookie;
> > > +    unsigned long            core_group_cookie;
> > >       unsigned int            core_occupation;
> > >   #endif
> > >   diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index bab4ea2f5cd8..30a9e4cb5ce1 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -346,11 +346,14 @@ void sched_core_put(void)
> > >       mutex_unlock(&sched_core_mutex);
> > >   }
> > >   +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
> > > +
> > >   #else /* !CONFIG_SCHED_CORE */
> > >     static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
> > >   static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
> > >   static bool sched_core_enqueued(struct task_struct *task) { return false; }
> > > +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
> > >     #endif /* CONFIG_SCHED_CORE */
> > >   @@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
> > >   #endif
> > >   #ifdef CONFIG_SCHED_CORE
> > >       RB_CLEAR_NODE(&p->core_node);
> > > +
> > > +    /*
> > > +     * Tag child via per-task cookie only if parent is tagged via per-task
> > > +     * cookie. This is independent of, but can be additive to the CGroup tagging.
> > > +     */
> > > +    if (current->core_task_cookie) {
> > > +
> > > +        /* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
> > > +        if (!(clone_flags & CLONE_THREAD)) {
> > > +            return sched_core_share_tasks(p, p);
> > > +               }
> > > +        /* Otherwise share the parent's per-task tag. */
> > > +        return sched_core_share_tasks(p, current);
> > > +    }
> > >   #endif
> > >       return 0;
> > >   }
> 
> sched_core_share_tasks() looks at the value of the new tasks core_task_cookie which is non-zero on a
> process or thread clone and causes underflow for both the enable flag itself and for cookie ref counts.
> 
> So just zero it in __sched_fork().

Nice catch, applying to my v9 staging tree. Thanks!

thanks,

 - Joel


> 
> -chris
> 
> 
> PATCH] sched: zero out the core scheduling cookie on clone
> 
> From: chris hyser <chris.hyser@oracle.com>
> 
> As the cookie is reference counted, even if inherited, zero this and allow
> explicit sharing.
> 
> Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
> ---
>  kernel/sched/core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index fd3cc03..2af0ea6 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3378,6 +3378,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
>  	p->capture_control = NULL;
>  #endif
>  	init_numa_balancing(clone_flags, p);
> +#ifdef CONFIG_SCHED_CORE
> +	p->core_task_cookie = 0;
> +#endif
>  #ifdef CONFIG_SMP
>  	p->wake_entry.u_flags = CSD_TYPE_TTWU;
>  #endif
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-11-09 23:23 ` [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork chris hyser
  2020-11-10 14:15   ` Joel Fernandes
@ 2020-11-17  0:45   ` Joel Fernandes
  1 sibling, 0 replies; 7+ messages in thread
From: Joel Fernandes @ 2020-11-17  0:45 UTC (permalink / raw)
  To: chris hyser; +Cc: linux-kernel

Hi Chris,

On Mon, Nov 09, 2020 at 06:23:13PM -0500, chris hyser wrote:
> > On 10/19/20 9:43 PM, Joel Fernandes (Google) wrote:
> > > In order to prevent interference and clearly support both per-task and CGroup
> > > APIs, split the cookie into 2 and allow it to be set from either per-task, or
> > > CGroup API. The final cookie is the combined value of both and is computed when
> > > the stop-machine executes during a change of cookie.
> > > 
> > > Also, for the per-task cookie, it will get weird if we use pointers of any
> > > emphemeral objects. For this reason, introduce a refcounted object who's sole
> > > purpose is to assign unique cookie value by way of the object's pointer.
> > > 
> > > While at it, refactor the CGroup code a bit. Future patches will introduce more
> > > APIs and support.
> > > 
> > > Tested-by: Julien Desfossez <jdesfossez@digitalocean.com>
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >   include/linux/sched.h |   2 +
> > >   kernel/sched/core.c   | 241 ++++++++++++++++++++++++++++++++++++++++--
> > >   kernel/sched/debug.c  |   4 +
> > >   3 files changed, 236 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > index fe6f225bfbf9..c6034c00846a 100644
> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -688,6 +688,8 @@ struct task_struct {
> > >   #ifdef CONFIG_SCHED_CORE
> > >       struct rb_node            core_node;
> > >       unsigned long            core_cookie;
> > > +    unsigned long            core_task_cookie;
> > > +    unsigned long            core_group_cookie;
> > >       unsigned int            core_occupation;
> > >   #endif
> > >   diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index bab4ea2f5cd8..30a9e4cb5ce1 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -346,11 +346,14 @@ void sched_core_put(void)
> > >       mutex_unlock(&sched_core_mutex);
> > >   }
> > >   +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
> > > +
> > >   #else /* !CONFIG_SCHED_CORE */
> > >     static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
> > >   static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
> > >   static bool sched_core_enqueued(struct task_struct *task) { return false; }
> > > +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
> > >     #endif /* CONFIG_SCHED_CORE */
> > >   @@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
> > >   #endif
> > >   #ifdef CONFIG_SCHED_CORE
> > >       RB_CLEAR_NODE(&p->core_node);
> > > +
> > > +    /*
> > > +     * Tag child via per-task cookie only if parent is tagged via per-task
> > > +     * cookie. This is independent of, but can be additive to the CGroup tagging.
> > > +     */
> > > +    if (current->core_task_cookie) {
> > > +
> > > +        /* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
> > > +        if (!(clone_flags & CLONE_THREAD)) {
> > > +            return sched_core_share_tasks(p, p);
> > > +               }
> > > +        /* Otherwise share the parent's per-task tag. */
> > > +        return sched_core_share_tasks(p, current);
> > > +    }
> > >   #endif
> > >       return 0;
> > >   }
> 
> sched_core_share_tasks() looks at the value of the new tasks core_task_cookie which is non-zero on a
> process or thread clone and causes underflow for both the enable flag itself and for cookie ref counts.
> 
> So just zero it in __sched_fork().

Since I am going to split this into a new patch, could you please send me a
detailed commit message explaining the issue? I feel the below one liner is
unclear / insufficient as a commit message.

thanks,

 - Joel


> -chris
> 
> 
> PATCH] sched: zero out the core scheduling cookie on clone
> 
> From: chris hyser <chris.hyser@oracle.com>
> 
> As the cookie is reference counted, even if inherited, zero this and allow
> explicit sharing.
> 
> Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
> ---
>  kernel/sched/core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index fd3cc03..2af0ea6 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3378,6 +3378,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
>  	p->capture_control = NULL;
>  #endif
>  	init_numa_balancing(clone_flags, p);
> +#ifdef CONFIG_SCHED_CORE
> +	p->core_task_cookie = 0;
> +#endif
>  #ifdef CONFIG_SMP
>  	p->wake_entry.u_flags = CSD_TYPE_TTWU;
>  #endif
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-11-04 22:30   ` chris hyser
  2020-11-05 14:49     ` Joel Fernandes
@ 2020-11-09 23:30     ` chris hyser
  1 sibling, 0 replies; 7+ messages in thread
From: chris hyser @ 2020-11-09 23:30 UTC (permalink / raw)
  To: Joel Fernandes (Google),
	Nishanth Aravamudan, Julien Desfossez, Peter Zijlstra, Tim Chen,
	Vineeth Pillai, Aaron Lu, Aubrey Li, tglx, linux-kernel
  Cc: mingo, torvalds, fweisbec, keescook, kerrnel, Phil Auld,
	Valentin Schneider, Mel Gorman, Pawan Gupta, Paolo Bonzini,
	vineeth, Chen Yu, Christian Brauner, Agata Gruza,
	Antonio Gomez Iglesias, graf, konrad.wilk, dfaggioli, pjt,
	rostedt, derkling, benbjiang, Alexandre Chartre, James.Bottomley,
	OWeisse, Dhaval Giani, Junaid Shahid, jsbarnes, Aubrey Li,
	Paul E. McKenney, Tim Chen

On 11/4/20 5:30 PM, chris hyser wrote:
> On 10/19/20 9:43 PM, Joel Fernandes (Google) wrote:
>> In order to prevent interference and clearly support both per-task and CGroup
>> APIs, split the cookie into 2 and allow it to be set from either per-task, or
>> CGroup API. The final cookie is the combined value of both and is computed when
>> the stop-machine executes during a change of cookie.
>>
>> Also, for the per-task cookie, it will get weird if we use pointers of any
>> emphemeral objects. For this reason, introduce a refcounted object who's sole
>> purpose is to assign unique cookie value by way of the object's pointer.
>>
>> While at it, refactor the CGroup code a bit. Future patches will introduce more
>> APIs and support.
>>
>> Tested-by: Julien Desfossez <jdesfossez@digitalocean.com>
>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
>> ---
>>   include/linux/sched.h |   2 +
>>   kernel/sched/core.c   | 241 ++++++++++++++++++++++++++++++++++++++++--
>>   kernel/sched/debug.c  |   4 +
>>   3 files changed, 236 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index fe6f225bfbf9..c6034c00846a 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -688,6 +688,8 @@ struct task_struct {
>>   #ifdef CONFIG_SCHED_CORE
>>       struct rb_node            core_node;
>>       unsigned long            core_cookie;
>> +    unsigned long            core_task_cookie;
>> +    unsigned long            core_group_cookie;
>>       unsigned int            core_occupation;
>>   #endif
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index bab4ea2f5cd8..30a9e4cb5ce1 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -346,11 +346,14 @@ void sched_core_put(void)
>>       mutex_unlock(&sched_core_mutex);
>>   }
>> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
>> +
>>   #else /* !CONFIG_SCHED_CORE */
>>   static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
>>   static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
>>   static bool sched_core_enqueued(struct task_struct *task) { return false; }
>> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
>>   #endif /* CONFIG_SCHED_CORE */
>> @@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
>>   #endif
>>   #ifdef CONFIG_SCHED_CORE
>>       RB_CLEAR_NODE(&p->core_node);
>> +
>> +    /*
>> +     * Tag child via per-task cookie only if parent is tagged via per-task
>> +     * cookie. This is independent of, but can be additive to the CGroup tagging.
>> +     */
>> +    if (current->core_task_cookie) {
>> +
>> +        /* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
>> +        if (!(clone_flags & CLONE_THREAD)) {
>> +            return sched_core_share_tasks(p, p);
>> +               }
>> +        /* Otherwise share the parent's per-task tag. */
>> +        return sched_core_share_tasks(p, current);
>> +    }
>>   #endif
>>       return 0;
>>   }

sched_core_share_tasks() looks at the value of the new tasks core_task_cookie which is non-zero on a
process or thread clone and causes underflow for both the enable flag itself and for cookie ref counts.

So just zero it in __sched_fork().

-chrish

------

sched: zero out the core scheduling cookie on clone

As the cookie is reference counted, even if inherited, zero this and allow
explicit sharing.

Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
---
  kernel/sched/core.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd3cc03..2af0ea6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3378,6 +3378,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
      p->capture_control = NULL;
  #endif
      init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SCHED_CORE
+    p->core_task_cookie = 0;
+#endif
  #ifdef CONFIG_SMP
      p->wake_entry.u_flags = CSD_TYPE_TTWU;
  #endif

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-11-04 22:30   ` chris hyser
@ 2020-11-05 14:49     ` Joel Fernandes
  2020-11-09 23:30     ` chris hyser
  1 sibling, 0 replies; 7+ messages in thread
From: Joel Fernandes @ 2020-11-05 14:49 UTC (permalink / raw)
  To: chris hyser
  Cc: Nishanth Aravamudan, Julien Desfossez, Peter Zijlstra, Tim Chen,
	Vineeth Pillai, Aaron Lu, Aubrey Li, tglx, linux-kernel, mingo,
	torvalds, fweisbec, keescook, kerrnel, Phil Auld,
	Valentin Schneider, Mel Gorman, Pawan Gupta, Paolo Bonzini,
	vineeth, Chen Yu, Christian Brauner, Agata Gruza,
	Antonio Gomez Iglesias, graf, konrad.wilk, dfaggioli, pjt,
	rostedt, derkling, benbjiang, Alexandre Chartre, James.Bottomley,
	OWeisse, Dhaval Giani, Junaid Shahid, jsbarnes, Aubrey Li,
	Paul E. McKenney, Tim Chen

On Wed, Nov 04, 2020 at 05:30:24PM -0500, chris hyser wrote:
[..]
> +		wr.cookies[0] = cookie;
> > +	} else
> > +	/*
> > +	 * 		t1		joining		t2
> > +	 * CASE 1:
> > +	 * before	0				0
> > +	 * after	new cookie			new cookie
> > +	 *
> > +	 * CASE 2:
> > +	 * before	X (non-zero)			0
> > +	 * after	0				0
> > +	 *
> > +	 * CASE 3:
> > +	 * before	0				X (non-zero)
> > +	 * after	X				X
> > +	 *
> > +	 * CASE 4:
> > +	 * before	Y (non-zero)			X (non-zero)
> > +	 * after	X				X
> > +	 */
> > +	if (!t1->core_task_cookie && !t2->core_task_cookie) {
> > +		/* CASE 1. */
> > +		cookie = sched_core_alloc_task_cookie();
> > +		if (!cookie)
> > +			goto out_unlock;
> > +
> > +		/* Add another reference for the other task. */
> > +		if (!sched_core_get_task_cookie(cookie)) {
> > +			return -EINVAL;
> 
> should be:              ret = -EINVAL;

Fixed now, thanks.

thanks,

 - Joel


> 
> > +			goto out_unlock;
> > +		}
> > +
> > +		wr.tasks[0] = t1;
> > +		wr.tasks[1] = t2;
> > +		wr.cookies[0] = wr.cookies[1] = cookie;
> > +
> > +	} else if (t1->core_task_cookie && !t2->core_task_cookie) {
> > +		/* CASE 2. */
> > +		sched_core_put_task_cookie(t1->core_task_cookie);
> > +		sched_core_put_after_stopper = true;
> > +
> > +		wr.tasks[0] = t1; /* Reset cookie for t1. */
> > +
> > +	} else if (!t1->core_task_cookie && t2->core_task_cookie) {
> > +		/* CASE 3. */
> > +		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
> > +			ret = -EINVAL;
> > +			goto out_unlock;
> > +		}
> > +
> > +		wr.tasks[0] = t1;
> > +		wr.cookies[0] = t2->core_task_cookie;
> > +
> > +	} else {
> > +		/* CASE 4. */
> > +		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
> > +			ret = -EINVAL;
> > +			goto out_unlock;
> > +		}
> > +		sched_core_put_task_cookie(t1->core_task_cookie);
> > +		sched_core_put_after_stopper = true;
> > +
> > +		wr.tasks[0] = t1;
> > +		wr.cookies[0] = t2->core_task_cookie;
> > +	}
> > +
> > +	stop_machine(sched_core_task_join_stopper, (void *)&wr, NULL);
> > +
> > +	if (sched_core_put_after_stopper)
> > +		sched_core_put();
> > +
> > +	ret = 0;
> > +out_unlock:
> > +	mutex_unlock(&sched_core_mutex);
> > +	return ret;
> > +}
> > +
> > +/* CGroup interface */
> >   static u64 cpu_core_tag_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
> >   {
> >   	struct task_group *tg = css_tg(css);
> > @@ -9207,18 +9435,9 @@ static int __sched_write_tag(void *data)
> >   	 * when we set cgroup tag to 0 when the loop is done below.
> >   	 */
> >   	while ((p = css_task_iter_next(&it))) {
> > -		p->core_cookie = !!val ? (unsigned long)tg : 0UL;
> > -
> > -		if (sched_core_enqueued(p)) {
> > -			sched_core_dequeue(task_rq(p), p);
> > -			if (!p->core_cookie)
> > -				continue;
> > -		}
> > -
> > -		if (sched_core_enabled(task_rq(p)) &&
> > -		    p->core_cookie && task_on_rq_queued(p))
> > -			sched_core_enqueue(task_rq(p), p);
> > +		unsigned long cookie = !!val ? (unsigned long)tg : 0UL;
> > +		sched_core_tag_requeue(p, cookie, true /* group */);
> >   	}
> >   	css_task_iter_end(&it);
> > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> > index c8fee8d9dfd4..88bf45267672 100644
> > --- a/kernel/sched/debug.c
> > +++ b/kernel/sched/debug.c
> > @@ -1024,6 +1024,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
> >   		__PS("clock-delta", t1-t0);
> >   	}
> > +#ifdef CONFIG_SCHED_CORE
> > +	__PS("core_cookie", p->core_cookie);
> > +#endif
> > +
> >   	sched_show_numa(p, m);
> >   }
> > 
> 
> -chrish

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-10-20  1:43 ` [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork Joel Fernandes (Google)
@ 2020-11-04 22:30   ` chris hyser
  2020-11-05 14:49     ` Joel Fernandes
  2020-11-09 23:30     ` chris hyser
  0 siblings, 2 replies; 7+ messages in thread
From: chris hyser @ 2020-11-04 22:30 UTC (permalink / raw)
  To: Joel Fernandes (Google),
	Nishanth Aravamudan, Julien Desfossez, Peter Zijlstra, Tim Chen,
	Vineeth Pillai, Aaron Lu, Aubrey Li, tglx, linux-kernel
  Cc: mingo, torvalds, fweisbec, keescook, kerrnel, Phil Auld,
	Valentin Schneider, Mel Gorman, Pawan Gupta, Paolo Bonzini,
	vineeth, Chen Yu, Christian Brauner, Agata Gruza,
	Antonio Gomez Iglesias, graf, konrad.wilk, dfaggioli, pjt,
	rostedt, derkling, benbjiang, Alexandre Chartre, James.Bottomley,
	OWeisse, Dhaval Giani, Junaid Shahid, jsbarnes, Aubrey Li,
	Paul E. McKenney, Tim Chen

On 10/19/20 9:43 PM, Joel Fernandes (Google) wrote:
> In order to prevent interference and clearly support both per-task and CGroup
> APIs, split the cookie into 2 and allow it to be set from either per-task, or
> CGroup API. The final cookie is the combined value of both and is computed when
> the stop-machine executes during a change of cookie.
> 
> Also, for the per-task cookie, it will get weird if we use pointers of any
> emphemeral objects. For this reason, introduce a refcounted object who's sole
> purpose is to assign unique cookie value by way of the object's pointer.
> 
> While at it, refactor the CGroup code a bit. Future patches will introduce more
> APIs and support.
> 
> Tested-by: Julien Desfossez <jdesfossez@digitalocean.com>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>   include/linux/sched.h |   2 +
>   kernel/sched/core.c   | 241 ++++++++++++++++++++++++++++++++++++++++--
>   kernel/sched/debug.c  |   4 +
>   3 files changed, 236 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index fe6f225bfbf9..c6034c00846a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -688,6 +688,8 @@ struct task_struct {
>   #ifdef CONFIG_SCHED_CORE
>   	struct rb_node			core_node;
>   	unsigned long			core_cookie;
> +	unsigned long			core_task_cookie;
> +	unsigned long			core_group_cookie;
>   	unsigned int			core_occupation;
>   #endif
>   
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index bab4ea2f5cd8..30a9e4cb5ce1 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -346,11 +346,14 @@ void sched_core_put(void)
>   	mutex_unlock(&sched_core_mutex);
>   }
>   
> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
> +
>   #else /* !CONFIG_SCHED_CORE */
>   
>   static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
>   static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
>   static bool sched_core_enqueued(struct task_struct *task) { return false; }
> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
>   
>   #endif /* CONFIG_SCHED_CORE */
>   
> @@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
>   #endif
>   #ifdef CONFIG_SCHED_CORE
>   	RB_CLEAR_NODE(&p->core_node);
> +
> +	/*
> +	 * Tag child via per-task cookie only if parent is tagged via per-task
> +	 * cookie. This is independent of, but can be additive to the CGroup tagging.
> +	 */
> +	if (current->core_task_cookie) {
> +
> +		/* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
> +		if (!(clone_flags & CLONE_THREAD)) {
> +			return sched_core_share_tasks(p, p);
> +               }
> +		/* Otherwise share the parent's per-task tag. */
> +		return sched_core_share_tasks(p, current);
> +	}
>   #endif
>   	return 0;
>   }
> @@ -9177,6 +9194,217 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
>   #endif /* CONFIG_RT_GROUP_SCHED */
>   
>   #ifdef CONFIG_SCHED_CORE
> +/*
> + * A simple wrapper around refcount. An allocated sched_core_cookie's
> + * address is used to compute the cookie of the task.
> + */
> +struct sched_core_cookie {
> +	refcount_t refcnt;
> +};
> +
> +/*
> + * sched_core_tag_requeue - Common helper for all interfaces to set a cookie.
> + * @p: The task to assign a cookie to.
> + * @cookie: The cookie to assign.
> + * @group: is it a group interface or a per-task interface.
> + *
> + * This function is typically called from a stop-machine handler.
> + */
> +void sched_core_tag_requeue(struct task_struct *p, unsigned long cookie, bool group)
> +{
> +	if (!p)
> +		return;
> +
> +	if (group)
> +		p->core_group_cookie = cookie;
> +	else
> +		p->core_task_cookie = cookie;
> +
> +	/* Use up half of the cookie's bits for task cookie and remaining for group cookie. */
> +	p->core_cookie = (p->core_task_cookie <<
> +				(sizeof(unsigned long) * 4)) + p->core_group_cookie;
> +
> +	if (sched_core_enqueued(p)) {
> +		sched_core_dequeue(task_rq(p), p);
> +		if (!p->core_task_cookie)
> +			return;
> +	}
> +
> +	if (sched_core_enabled(task_rq(p)) &&
> +			p->core_cookie && task_on_rq_queued(p))
> +		sched_core_enqueue(task_rq(p), p);
> +}
> +
> +/* Per-task interface */
> +static unsigned long sched_core_alloc_task_cookie(void)
> +{
> +	struct sched_core_cookie *ptr =
> +		kmalloc(sizeof(struct sched_core_cookie), GFP_KERNEL);
> +
> +	if (!ptr)
> +		return 0;
> +	refcount_set(&ptr->refcnt, 1);
> +
> +	/*
> +	 * NOTE: sched_core_put() is not done by put_task_cookie(). Instead, it
> +	 * is done after the stopper runs.
> +	 */
> +	sched_core_get();
> +	return (unsigned long)ptr;
> +}
> +
> +static bool sched_core_get_task_cookie(unsigned long cookie)
> +{
> +	struct sched_core_cookie *ptr = (struct sched_core_cookie *)cookie;
> +
> +	/*
> +	 * NOTE: sched_core_put() is not done by put_task_cookie(). Instead, it
> +	 * is done after the stopper runs.
> +	 */
> +	sched_core_get();
> +	return refcount_inc_not_zero(&ptr->refcnt);
> +}
> +
> +static void sched_core_put_task_cookie(unsigned long cookie)
> +{
> +	struct sched_core_cookie *ptr = (struct sched_core_cookie *)cookie;
> +
> +	if (refcount_dec_and_test(&ptr->refcnt))
> +		kfree(ptr);
> +}
> +
> +struct sched_core_task_write_tag {
> +	struct task_struct *tasks[2];
> +	unsigned long cookies[2];
> +};
> +
> +/*
> + * Ensure that the task has been requeued. The stopper ensures that the task cannot
> + * be migrated to a different CPU while its core scheduler queue state is being updated.
> + * It also makes sure to requeue a task if it was running actively on another CPU.
> + */
> +static int sched_core_task_join_stopper(void *data)
> +{
> +	struct sched_core_task_write_tag *tag = (struct sched_core_task_write_tag *)data;
> +	int i;
> +
> +	for (i = 0; i < 2; i++)
> +		sched_core_tag_requeue(tag->tasks[i], tag->cookies[i], false /* !group */);
> +
> +	return 0;
> +}
> +
> +static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2)
> +{
> +	struct sched_core_task_write_tag wr = {}; /* for stop machine. */
> +	bool sched_core_put_after_stopper = false;
> +	unsigned long cookie;
> +	int ret = -ENOMEM;
> +
> +	mutex_lock(&sched_core_mutex);
> +
> +	/*
> +	 * NOTE: sched_core_get() is done by sched_core_alloc_task_cookie() or
> +	 *       sched_core_put_task_cookie(). However, sched_core_put() is done
> +	 *       by this function *after* the stopper removes the tasks from the
> +	 *       core queue, and not before. This is just to play it safe.
> +	 */
> +	if (t2 == NULL) {
> +		if (t1->core_task_cookie) {
> +			sched_core_put_task_cookie(t1->core_task_cookie);
> +			sched_core_put_after_stopper = true;
> +			wr.tasks[0] = t1; /* Keep wr.cookies[0] reset for t1. */
> +		}
> +	} else if (t1 == t2) {
> +		/* Assign a unique per-task cookie solely for t1. */
> +
> +		cookie = sched_core_alloc_task_cookie();
> +		if (!cookie)
> +			goto out_unlock;
> +
> +		if (t1->core_task_cookie) {
> +			sched_core_put_task_cookie(t1->core_task_cookie);
> +			sched_core_put_after_stopper = true;
> +		}
> +		wr.tasks[0] = t1;
> +		wr.cookies[0] = cookie;
> +	} else
> +	/*
> +	 * 		t1		joining		t2
> +	 * CASE 1:
> +	 * before	0				0
> +	 * after	new cookie			new cookie
> +	 *
> +	 * CASE 2:
> +	 * before	X (non-zero)			0
> +	 * after	0				0
> +	 *
> +	 * CASE 3:
> +	 * before	0				X (non-zero)
> +	 * after	X				X
> +	 *
> +	 * CASE 4:
> +	 * before	Y (non-zero)			X (non-zero)
> +	 * after	X				X
> +	 */
> +	if (!t1->core_task_cookie && !t2->core_task_cookie) {
> +		/* CASE 1. */
> +		cookie = sched_core_alloc_task_cookie();
> +		if (!cookie)
> +			goto out_unlock;
> +
> +		/* Add another reference for the other task. */
> +		if (!sched_core_get_task_cookie(cookie)) {
> +			return -EINVAL;

should be:              ret = -EINVAL;

> +			goto out_unlock;
> +		}
> +
> +		wr.tasks[0] = t1;
> +		wr.tasks[1] = t2;
> +		wr.cookies[0] = wr.cookies[1] = cookie;
> +
> +	} else if (t1->core_task_cookie && !t2->core_task_cookie) {
> +		/* CASE 2. */
> +		sched_core_put_task_cookie(t1->core_task_cookie);
> +		sched_core_put_after_stopper = true;
> +
> +		wr.tasks[0] = t1; /* Reset cookie for t1. */
> +
> +	} else if (!t1->core_task_cookie && t2->core_task_cookie) {
> +		/* CASE 3. */
> +		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
> +			ret = -EINVAL;
> +			goto out_unlock;
> +		}
> +
> +		wr.tasks[0] = t1;
> +		wr.cookies[0] = t2->core_task_cookie;
> +
> +	} else {
> +		/* CASE 4. */
> +		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
> +			ret = -EINVAL;
> +			goto out_unlock;
> +		}
> +		sched_core_put_task_cookie(t1->core_task_cookie);
> +		sched_core_put_after_stopper = true;
> +
> +		wr.tasks[0] = t1;
> +		wr.cookies[0] = t2->core_task_cookie;
> +	}
> +
> +	stop_machine(sched_core_task_join_stopper, (void *)&wr, NULL);
> +
> +	if (sched_core_put_after_stopper)
> +		sched_core_put();
> +
> +	ret = 0;
> +out_unlock:
> +	mutex_unlock(&sched_core_mutex);
> +	return ret;
> +}
> +
> +/* CGroup interface */
>   static u64 cpu_core_tag_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
>   {
>   	struct task_group *tg = css_tg(css);
> @@ -9207,18 +9435,9 @@ static int __sched_write_tag(void *data)
>   	 * when we set cgroup tag to 0 when the loop is done below.
>   	 */
>   	while ((p = css_task_iter_next(&it))) {
> -		p->core_cookie = !!val ? (unsigned long)tg : 0UL;
> -
> -		if (sched_core_enqueued(p)) {
> -			sched_core_dequeue(task_rq(p), p);
> -			if (!p->core_cookie)
> -				continue;
> -		}
> -
> -		if (sched_core_enabled(task_rq(p)) &&
> -		    p->core_cookie && task_on_rq_queued(p))
> -			sched_core_enqueue(task_rq(p), p);
> +		unsigned long cookie = !!val ? (unsigned long)tg : 0UL;
>   
> +		sched_core_tag_requeue(p, cookie, true /* group */);
>   	}
>   	css_task_iter_end(&it);
>   
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index c8fee8d9dfd4..88bf45267672 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -1024,6 +1024,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
>   		__PS("clock-delta", t1-t0);
>   	}
>   
> +#ifdef CONFIG_SCHED_CORE
> +	__PS("core_cookie", p->core_cookie);
> +#endif
> +
>   	sched_show_numa(p, m);
>   }
>   
> 

-chrish

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork
  2020-10-20  1:43 [PATCH v8 -tip 00/26] Core scheduling Joel Fernandes (Google)
@ 2020-10-20  1:43 ` Joel Fernandes (Google)
  2020-11-04 22:30   ` chris hyser
  0 siblings, 1 reply; 7+ messages in thread
From: Joel Fernandes (Google) @ 2020-10-20  1:43 UTC (permalink / raw)
  To: Nishanth Aravamudan, Julien Desfossez, Peter Zijlstra, Tim Chen,
	Vineeth Pillai, Aaron Lu, Aubrey Li, tglx, linux-kernel
  Cc: mingo, torvalds, fweisbec, keescook, kerrnel, Phil Auld,
	Valentin Schneider, Mel Gorman, Pawan Gupta, Paolo Bonzini, joel,
	vineeth, Chen Yu, Christian Brauner, Agata Gruza,
	Antonio Gomez Iglesias, graf, konrad.wilk, dfaggioli, pjt,
	rostedt, derkling, benbjiang, Alexandre Chartre, James.Bottomley,
	OWeisse, Dhaval Giani, Junaid Shahid, jsbarnes, chris.hyser,
	Aubrey Li, Paul E. McKenney, Tim Chen

In order to prevent interference and clearly support both per-task and CGroup
APIs, split the cookie into 2 and allow it to be set from either per-task, or
CGroup API. The final cookie is the combined value of both and is computed when
the stop-machine executes during a change of cookie.

Also, for the per-task cookie, it will get weird if we use pointers of any
emphemeral objects. For this reason, introduce a refcounted object who's sole
purpose is to assign unique cookie value by way of the object's pointer.

While at it, refactor the CGroup code a bit. Future patches will introduce more
APIs and support.

Tested-by: Julien Desfossez <jdesfossez@digitalocean.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/sched.h |   2 +
 kernel/sched/core.c   | 241 ++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/debug.c  |   4 +
 3 files changed, 236 insertions(+), 11 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe6f225bfbf9..c6034c00846a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -688,6 +688,8 @@ struct task_struct {
 #ifdef CONFIG_SCHED_CORE
 	struct rb_node			core_node;
 	unsigned long			core_cookie;
+	unsigned long			core_task_cookie;
+	unsigned long			core_group_cookie;
 	unsigned int			core_occupation;
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bab4ea2f5cd8..30a9e4cb5ce1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -346,11 +346,14 @@ void sched_core_put(void)
 	mutex_unlock(&sched_core_mutex);
 }
 
+static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2);
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
 static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
 static bool sched_core_enqueued(struct task_struct *task) { return false; }
+static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2) { }
 
 #endif /* CONFIG_SCHED_CORE */
 
@@ -3583,6 +3586,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 #ifdef CONFIG_SCHED_CORE
 	RB_CLEAR_NODE(&p->core_node);
+
+	/*
+	 * Tag child via per-task cookie only if parent is tagged via per-task
+	 * cookie. This is independent of, but can be additive to the CGroup tagging.
+	 */
+	if (current->core_task_cookie) {
+
+		/* If it is not CLONE_THREAD fork, assign a unique per-task tag. */
+		if (!(clone_flags & CLONE_THREAD)) {
+			return sched_core_share_tasks(p, p);
+               }
+		/* Otherwise share the parent's per-task tag. */
+		return sched_core_share_tasks(p, current);
+	}
 #endif
 	return 0;
 }
@@ -9177,6 +9194,217 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_SCHED_CORE
+/*
+ * A simple wrapper around refcount. An allocated sched_core_cookie's
+ * address is used to compute the cookie of the task.
+ */
+struct sched_core_cookie {
+	refcount_t refcnt;
+};
+
+/*
+ * sched_core_tag_requeue - Common helper for all interfaces to set a cookie.
+ * @p: The task to assign a cookie to.
+ * @cookie: The cookie to assign.
+ * @group: is it a group interface or a per-task interface.
+ *
+ * This function is typically called from a stop-machine handler.
+ */
+void sched_core_tag_requeue(struct task_struct *p, unsigned long cookie, bool group)
+{
+	if (!p)
+		return;
+
+	if (group)
+		p->core_group_cookie = cookie;
+	else
+		p->core_task_cookie = cookie;
+
+	/* Use up half of the cookie's bits for task cookie and remaining for group cookie. */
+	p->core_cookie = (p->core_task_cookie <<
+				(sizeof(unsigned long) * 4)) + p->core_group_cookie;
+
+	if (sched_core_enqueued(p)) {
+		sched_core_dequeue(task_rq(p), p);
+		if (!p->core_task_cookie)
+			return;
+	}
+
+	if (sched_core_enabled(task_rq(p)) &&
+			p->core_cookie && task_on_rq_queued(p))
+		sched_core_enqueue(task_rq(p), p);
+}
+
+/* Per-task interface */
+static unsigned long sched_core_alloc_task_cookie(void)
+{
+	struct sched_core_cookie *ptr =
+		kmalloc(sizeof(struct sched_core_cookie), GFP_KERNEL);
+
+	if (!ptr)
+		return 0;
+	refcount_set(&ptr->refcnt, 1);
+
+	/*
+	 * NOTE: sched_core_put() is not done by put_task_cookie(). Instead, it
+	 * is done after the stopper runs.
+	 */
+	sched_core_get();
+	return (unsigned long)ptr;
+}
+
+static bool sched_core_get_task_cookie(unsigned long cookie)
+{
+	struct sched_core_cookie *ptr = (struct sched_core_cookie *)cookie;
+
+	/*
+	 * NOTE: sched_core_put() is not done by put_task_cookie(). Instead, it
+	 * is done after the stopper runs.
+	 */
+	sched_core_get();
+	return refcount_inc_not_zero(&ptr->refcnt);
+}
+
+static void sched_core_put_task_cookie(unsigned long cookie)
+{
+	struct sched_core_cookie *ptr = (struct sched_core_cookie *)cookie;
+
+	if (refcount_dec_and_test(&ptr->refcnt))
+		kfree(ptr);
+}
+
+struct sched_core_task_write_tag {
+	struct task_struct *tasks[2];
+	unsigned long cookies[2];
+};
+
+/*
+ * Ensure that the task has been requeued. The stopper ensures that the task cannot
+ * be migrated to a different CPU while its core scheduler queue state is being updated.
+ * It also makes sure to requeue a task if it was running actively on another CPU.
+ */
+static int sched_core_task_join_stopper(void *data)
+{
+	struct sched_core_task_write_tag *tag = (struct sched_core_task_write_tag *)data;
+	int i;
+
+	for (i = 0; i < 2; i++)
+		sched_core_tag_requeue(tag->tasks[i], tag->cookies[i], false /* !group */);
+
+	return 0;
+}
+
+static int sched_core_share_tasks(struct task_struct *t1, struct task_struct *t2)
+{
+	struct sched_core_task_write_tag wr = {}; /* for stop machine. */
+	bool sched_core_put_after_stopper = false;
+	unsigned long cookie;
+	int ret = -ENOMEM;
+
+	mutex_lock(&sched_core_mutex);
+
+	/*
+	 * NOTE: sched_core_get() is done by sched_core_alloc_task_cookie() or
+	 *       sched_core_put_task_cookie(). However, sched_core_put() is done
+	 *       by this function *after* the stopper removes the tasks from the
+	 *       core queue, and not before. This is just to play it safe.
+	 */
+	if (t2 == NULL) {
+		if (t1->core_task_cookie) {
+			sched_core_put_task_cookie(t1->core_task_cookie);
+			sched_core_put_after_stopper = true;
+			wr.tasks[0] = t1; /* Keep wr.cookies[0] reset for t1. */
+		}
+	} else if (t1 == t2) {
+		/* Assign a unique per-task cookie solely for t1. */
+
+		cookie = sched_core_alloc_task_cookie();
+		if (!cookie)
+			goto out_unlock;
+
+		if (t1->core_task_cookie) {
+			sched_core_put_task_cookie(t1->core_task_cookie);
+			sched_core_put_after_stopper = true;
+		}
+		wr.tasks[0] = t1;
+		wr.cookies[0] = cookie;
+	} else
+	/*
+	 * 		t1		joining		t2
+	 * CASE 1:
+	 * before	0				0
+	 * after	new cookie			new cookie
+	 *
+	 * CASE 2:
+	 * before	X (non-zero)			0
+	 * after	0				0
+	 *
+	 * CASE 3:
+	 * before	0				X (non-zero)
+	 * after	X				X
+	 *
+	 * CASE 4:
+	 * before	Y (non-zero)			X (non-zero)
+	 * after	X				X
+	 */
+	if (!t1->core_task_cookie && !t2->core_task_cookie) {
+		/* CASE 1. */
+		cookie = sched_core_alloc_task_cookie();
+		if (!cookie)
+			goto out_unlock;
+
+		/* Add another reference for the other task. */
+		if (!sched_core_get_task_cookie(cookie)) {
+			return -EINVAL;
+			goto out_unlock;
+		}
+
+		wr.tasks[0] = t1;
+		wr.tasks[1] = t2;
+		wr.cookies[0] = wr.cookies[1] = cookie;
+
+	} else if (t1->core_task_cookie && !t2->core_task_cookie) {
+		/* CASE 2. */
+		sched_core_put_task_cookie(t1->core_task_cookie);
+		sched_core_put_after_stopper = true;
+
+		wr.tasks[0] = t1; /* Reset cookie for t1. */
+
+	} else if (!t1->core_task_cookie && t2->core_task_cookie) {
+		/* CASE 3. */
+		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		wr.tasks[0] = t1;
+		wr.cookies[0] = t2->core_task_cookie;
+
+	} else {
+		/* CASE 4. */
+		if (!sched_core_get_task_cookie(t2->core_task_cookie)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		sched_core_put_task_cookie(t1->core_task_cookie);
+		sched_core_put_after_stopper = true;
+
+		wr.tasks[0] = t1;
+		wr.cookies[0] = t2->core_task_cookie;
+	}
+
+	stop_machine(sched_core_task_join_stopper, (void *)&wr, NULL);
+
+	if (sched_core_put_after_stopper)
+		sched_core_put();
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&sched_core_mutex);
+	return ret;
+}
+
+/* CGroup interface */
 static u64 cpu_core_tag_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
 {
 	struct task_group *tg = css_tg(css);
@@ -9207,18 +9435,9 @@ static int __sched_write_tag(void *data)
 	 * when we set cgroup tag to 0 when the loop is done below.
 	 */
 	while ((p = css_task_iter_next(&it))) {
-		p->core_cookie = !!val ? (unsigned long)tg : 0UL;
-
-		if (sched_core_enqueued(p)) {
-			sched_core_dequeue(task_rq(p), p);
-			if (!p->core_cookie)
-				continue;
-		}
-
-		if (sched_core_enabled(task_rq(p)) &&
-		    p->core_cookie && task_on_rq_queued(p))
-			sched_core_enqueue(task_rq(p), p);
+		unsigned long cookie = !!val ? (unsigned long)tg : 0UL;
 
+		sched_core_tag_requeue(p, cookie, true /* group */);
 	}
 	css_task_iter_end(&it);
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c8fee8d9dfd4..88bf45267672 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1024,6 +1024,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 		__PS("clock-delta", t1-t0);
 	}
 
+#ifdef CONFIG_SCHED_CORE
+	__PS("core_cookie", p->core_cookie);
+#endif
+
 	sched_show_numa(p, m);
 }
 
-- 
2.29.0.rc1.297.gfa9743e501-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-17  0:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <4ff7f030-b797-6711-fb6a-fe39bb02075a@oracle.com>
2020-11-09 23:23 ` [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork chris hyser
2020-11-10 14:15   ` Joel Fernandes
2020-11-17  0:45   ` Joel Fernandes
2020-10-20  1:43 [PATCH v8 -tip 00/26] Core scheduling Joel Fernandes (Google)
2020-10-20  1:43 ` [PATCH v8 -tip 17/26] sched: Split the cookie and setup per-task cookie on fork Joel Fernandes (Google)
2020-11-04 22:30   ` chris hyser
2020-11-05 14:49     ` Joel Fernandes
2020-11-09 23:30     ` chris hyser

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).