All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: rt_rq runtime leakage bug fix
@ 2011-05-11  7:34 Cheng Xu
  2011-05-11  9:21 ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Cheng Xu @ 2011-05-11  7:34 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Paul Mckenney, LKML

This patch is to fix bug report https://lkml.org/lkml/2011/4/26/13

Function __disable_runtime() reports leakage of rt_rq runtime. The root cause is __disable_runtime() assumes it iterates through all the existing rt_rq's while walking rq->leaf_rt_rq_list, which actually contains only runnable rt_rq's. This problem also applies to __enable_runtime() and print_rt_stats().

The patch is based on above analysis, appears to fix the problem, but is only lightly tested.


Signed-off-by: Cheng Xu <chengxu@linux.vnet.ibm.com>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

---
 kernel/sched_rt.c |   31 ++++++++++++++++++++++++-------
 1 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc..7f478ff 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+#define rt_rq_of_rq_decls(name) struct task_group *name
+
+#define list_for_rt_rq_of_rq(iterator, rq) \
+	list_for_each_entry_rcu(iterator, &task_groups, list)
+
+#define rt_rq_of_rq_deref(iterator, rq) (iterator->rt_rq[cpu_of(rq)])
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 	list_add_rcu(&rt_rq->leaf_rt_rq_list,
@@ -288,6 +295,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+#define rt_rq_of_rq_decls(name) struct rt_rq *name
+
+#define list_for_rt_rq_of_rq(iterator, rq) \
+	for (iterator = &rq->rt; iterator; iterator = NULL)
+
+#define rt_rq_of_rq_deref(iterator, rq) (iterator)
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 }
@@ -402,12 +416,13 @@ next:
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
-	struct rt_rq *rt_rq;
+	rt_rq_of_rq_decls(iterator);
 
 	if (unlikely(!scheduler_running))
 		return;
 
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	list_for_rt_rq_of_rq(iterator, rq) {
+		struct rt_rq *rt_rq = rt_rq_of_rq_deref(iterator, rq);
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 		s64 want;
 		int i;
@@ -487,7 +502,7 @@ static void disable_runtime(struct rq *rq)
 
 static void __enable_runtime(struct rq *rq)
 {
-	struct rt_rq *rt_rq;
+	rt_rq_of_rq_decls(iterator);
 
 	if (unlikely(!scheduler_running))
 		return;
@@ -495,7 +510,8 @@ static void __enable_runtime(struct rq *rq)
 	/*
 	 * Reset each runqueue's bandwidth settings
 	 */
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	list_for_rt_rq_of_rq(iterator, rq) {
+		struct rt_rq *rt_rq = rt_rq_of_rq_deref(iterator, rq);
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
 		raw_spin_lock(&rt_b->rt_runtime_lock);
@@ -1796,11 +1812,12 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
 static void print_rt_stats(struct seq_file *m, int cpu)
 {
-	struct rt_rq *rt_rq;
+	rt_rq_of_rq_decls(iterator);
+	struct rq *rq = cpu_rq(cpu);
 
 	rcu_read_lock();
-	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
-		print_rt_rq(m, cpu, rt_rq);
+	list_for_rt_rq_of_rq(iterator, rq)
+		print_rt_rq(m, cpu, rt_rq_of_rq_deref(iterator, rq));
 	rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
-- 
1.7.1




^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-11  7:34 [PATCH] sched: rt_rq runtime leakage bug fix Cheng Xu
@ 2011-05-11  9:21 ` Peter Zijlstra
  2011-05-11 17:30   ` Cheng Xu
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2011-05-11  9:21 UTC (permalink / raw)
  To: Cheng Xu; +Cc: Ingo Molnar, Paul Mckenney, LKML

On Wed, 2011-05-11 at 15:34 +0800, Cheng Xu wrote:
> This patch is to fix bug report https://lkml.org/lkml/2011/4/26/13

This really doesn't tell me anything, please restate the relevant
information.

> Function __disable_runtime() reports leakage of rt_rq runtime. The
> root cause is __disable_runtime() assumes it iterates through all the
> existing rt_rq's while walking rq->leaf_rt_rq_list, which actually
> contains only runnable rt_rq's. This problem also applies to
> __enable_runtime() and print_rt_stats().

Teach your mailer to wrap at 78 characters for changelogs.

> The patch is based on above analysis, appears to fix the problem, but is only lightly tested.
> 
> 
> Signed-off-by: Cheng Xu <chengxu@linux.vnet.ibm.com>
> Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 

Don't leave whitespace between the tags and the tripple-dash. Also, I'm
suspecting you're missing a Reported-by: paulmck tag.

> ---
>  kernel/sched_rt.c |   31 ++++++++++++++++++++++++-------
>  1 files changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
> index e7cebdc..7f478ff 100644
> --- a/kernel/sched_rt.c
> +++ b/kernel/sched_rt.c
> @@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>  	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
>  }
>  
> +#define rt_rq_of_rq_decls(name) struct task_group *name
> +
> +#define list_for_rt_rq_of_rq(iterator, rq) \
> +	list_for_each_entry_rcu(iterator, &task_groups, list)
> +
> +#define rt_rq_of_rq_deref(iterator, rq) (iterator->rt_rq[cpu_of(rq)])
> +
>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>  {
>  	list_add_rcu(&rt_rq->leaf_rt_rq_list,
> @@ -288,6 +295,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>  	return ktime_to_ns(def_rt_bandwidth.rt_period);
>  }
>  
> +#define rt_rq_of_rq_decls(name) struct rt_rq *name
> +
> +#define list_for_rt_rq_of_rq(iterator, rq) \
> +	for (iterator = &rq->rt; iterator; iterator = NULL)
> +
> +#define rt_rq_of_rq_deref(iterator, rq) (iterator)
> +
>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>  {
>  }

So I see why you did that, I just don't much like it.. esp the decls
macros, C has typedef to deal with that problem, also you can get rid of
the deref macros (now if we were allowed C99 we could avoid the whole
iter thing and declare a for-scope variable).

How about something like:

typedef struct task_group *rt_rq_iter_t;

#define for_each_rt_rq(rt_rq, iter, rq)                                     \
	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list),  \
	     rt_rq = iter->rt_rq[cpu_of(rq)]; &iter->list != &task_groups;  \
	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list),   \
	     rt_rq = iter->rt_rq[cpu_of(rq)])

	    
which is then used like:

	rt_rq_iter_t iter;
	struct rt_rq *rt_rq;

	for_each_rt_rq(rt_rq, iter, rq) {
		/* do something with rt_rq */
	}



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-11  9:21 ` Peter Zijlstra
@ 2011-05-11 17:30   ` Cheng Xu
  2011-05-12 10:12     ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Cheng Xu @ 2011-05-11 17:30 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Paul Mckenney, LKML

Hi Peter,

I tried but hit a boot-time error "Unable to handle kernel paging request for data at address 0x100000008", and therefore would like to propose an alternative patch like,

#define for_each_rt_rq(rt_rq, iter, rq) \
        for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
             (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
             iter = list_entry_rcu(iter->list.next, typeof(*iter), list))

This worked, it seems to pass the tests.  Is this correct from a scheduler perspective?

For the not CONFIG_RT_GROUP_SCHED part, I used 

typedef struct rt_rq *rt_rq_iter_t;

#define for_each_rt_rq(rt_rq, iter, rq) \
	(void) iter; \
	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)

An alternative is 
#define for_each_rt_rq(rt_rq, iter, rq) \
	for (rt_rq = iter = &rq->rt; iter; rt_rq = iter = NULL)

The patch is attached below. Could you check whether it is workable? Thank you. 

---
 kernel/sched_rt.c |   22 +++++++++++++++++++---
 1 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc..f9e621a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+typedef struct task_group *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
+	     (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
+	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 	list_add_rcu(&rt_rq->leaf_rt_rq_list,
@@ -288,6 +295,12 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+typedef struct rt_rq *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	(void) iter; \
+	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 }
@@ -402,12 +415,13 @@ next:
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
 		return;
 
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 		s64 want;
 		int i;
@@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq)
 
 static void __enable_runtime(struct rq *rq)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
@@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
 	/*
 	 * Reset each runqueue's bandwidth settings
 	 */
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
 		raw_spin_lock(&rt_b->rt_runtime_lock);
@@ -1796,10 +1811,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
 static void print_rt_stats(struct seq_file *m, int cpu)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	rcu_read_lock();
-	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
+	for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
 		print_rt_rq(m, cpu, rt_rq);
 	rcu_read_unlock();
 }
-- 
1.7.1




On 2011-5-11 17:21, Peter Zijlstra wrote:
> On Wed, 2011-05-11 at 15:34 +0800, Cheng Xu wrote:
>> This patch is to fix bug report https://lkml.org/lkml/2011/4/26/13
> 
> This really doesn't tell me anything, please restate the relevant
> information.
> 
>> Function __disable_runtime() reports leakage of rt_rq runtime. The
>> root cause is __disable_runtime() assumes it iterates through all the
>> existing rt_rq's while walking rq->leaf_rt_rq_list, which actually
>> contains only runnable rt_rq's. This problem also applies to
>> __enable_runtime() and print_rt_stats().
> 
> Teach your mailer to wrap at 78 characters for changelogs.
> 
>> The patch is based on above analysis, appears to fix the problem, but is only lightly tested.
>>
>>
>> Signed-off-by: Cheng Xu <chengxu@linux.vnet.ibm.com>
>> Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>>
> 
> Don't leave whitespace between the tags and the tripple-dash. Also, I'm
> suspecting you're missing a Reported-by: paulmck tag.
> 
>> ---
>>  kernel/sched_rt.c |   31 ++++++++++++++++++++++++-------
>>  1 files changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
>> index e7cebdc..7f478ff 100644
>> --- a/kernel/sched_rt.c
>> +++ b/kernel/sched_rt.c
>> @@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>>  	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
>>  }
>>  
>> +#define rt_rq_of_rq_decls(name) struct task_group *name
>> +
>> +#define list_for_rt_rq_of_rq(iterator, rq) \
>> +	list_for_each_entry_rcu(iterator, &task_groups, list)
>> +
>> +#define rt_rq_of_rq_deref(iterator, rq) (iterator->rt_rq[cpu_of(rq)])
>> +
>>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>>  {
>>  	list_add_rcu(&rt_rq->leaf_rt_rq_list,
>> @@ -288,6 +295,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>>  	return ktime_to_ns(def_rt_bandwidth.rt_period);
>>  }
>>  
>> +#define rt_rq_of_rq_decls(name) struct rt_rq *name
>> +
>> +#define list_for_rt_rq_of_rq(iterator, rq) \
>> +	for (iterator = &rq->rt; iterator; iterator = NULL)
>> +
>> +#define rt_rq_of_rq_deref(iterator, rq) (iterator)
>> +
>>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>>  {
>>  }
> 
> So I see why you did that, I just don't much like it.. esp the decls
> macros, C has typedef to deal with that problem, also you can get rid of
> the deref macros (now if we were allowed C99 we could avoid the whole
> iter thing and declare a for-scope variable).
> 
> How about something like:
> 
> typedef struct task_group *rt_rq_iter_t;
> 
> #define for_each_rt_rq(rt_rq, iter, rq)                                     \
> 	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list),  \
> 	     rt_rq = iter->rt_rq[cpu_of(rq)]; &iter->list != &task_groups;  \
> 	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list),   \
> 	     rt_rq = iter->rt_rq[cpu_of(rq)])
> 
> 	    
> which is then used like:
> 
> 	rt_rq_iter_t iter;
> 	struct rt_rq *rt_rq;
> 
> 	for_each_rt_rq(rt_rq, iter, rq) {
> 		/* do something with rt_rq */
> 	}
> 
> 


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-11 17:30   ` Cheng Xu
@ 2011-05-12 10:12     ` Peter Zijlstra
  2011-05-12 10:55       ` Cheng Xu
  2011-05-14  5:48       ` Cheng Xu
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Zijlstra @ 2011-05-12 10:12 UTC (permalink / raw)
  To: Cheng Xu; +Cc: Ingo Molnar, Paul Mckenney, LKML

On Thu, 2011-05-12 at 01:30 +0800, Cheng Xu wrote:
> 
> I tried but hit a boot-time error "Unable to handle kernel paging
> request for data at address 0x100000008", and therefore would like to
> propose an alternative patch like,
> 
I probably made a silly mistake somehwere, it was after all something
quickly typed in an email :-)

> #define for_each_rt_rq(rt_rq, iter, rq) \
>         for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
>              (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
>              iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
> 
> This worked, it seems to pass the tests.  Is this correct from a scheduler perspective?

Creative ;-), it would be nice to know why the , operator version
doesn't work though, since that looks to be the more conventional way to
write it.

That said, I don't see a problem with using your existing on.

> For the not CONFIG_RT_GROUP_SCHED part, I used 
> 
> typedef struct rt_rq *rt_rq_iter_t;
> 
> #define for_each_rt_rq(rt_rq, iter, rq) \
>         (void) iter; \
>         for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
> 
> An alternative is 
> #define for_each_rt_rq(rt_rq, iter, rq) \
>         for (rt_rq = iter = &rq->rt; iter; rt_rq = iter = NULL)

Tough call that, the first has a multi-statement macro, which is
generally discouraged because then:

  for()
   for_each_rt_rq() {
   }

will not work as expected, so I think we want the second version.

> The patch is attached below. Could you check whether it is workable? Thank you. 

Yes, given how things are I can't really see it getting any better,
thanks!


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-12 10:12     ` Peter Zijlstra
@ 2011-05-12 10:55       ` Cheng Xu
  2011-05-12 11:27         ` Peter Zijlstra
  2011-05-14  5:48       ` Cheng Xu
  1 sibling, 1 reply; 7+ messages in thread
From: Cheng Xu @ 2011-05-12 10:55 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Paul Mckenney, LKML

On 5/12/2011 18:12, Peter Zijlstra wrote:
> On Thu, 2011-05-12 at 01:30 +0800, Cheng Xu wrote:
>>
>> I tried but hit a boot-time error "Unable to handle kernel paging
>> request for data at address 0x100000008", and therefore would like to
>> propose an alternative patch like,
>>
> I probably made a silly mistake somehwere, it was after all something
> quickly typed in an email :-)
> 
>> #define for_each_rt_rq(rt_rq, iter, rq) \
>>         for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
>>              (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
>>              iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
>>
>> This worked, it seems to pass the tests.  Is this correct from a scheduler perspective?
> 
> Creative ;-), it would be nice to know why the , operator version
> doesn't work though, since that looks to be the more conventional way to
> write it.

Yes I am also wondering why it doesn't work. will look into it and get
back to you later.

> 
> That said, I don't see a problem with using your existing on.
> 
>> For the not CONFIG_RT_GROUP_SCHED part, I used 
>>
>> typedef struct rt_rq *rt_rq_iter_t;
>>
>> #define for_each_rt_rq(rt_rq, iter, rq) \
>>         (void) iter; \
>>         for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
>>
>> An alternative is 
>> #define for_each_rt_rq(rt_rq, iter, rq) \
>>         for (rt_rq = iter = &rq->rt; iter; rt_rq = iter = NULL)
> 
> Tough call that, the first has a multi-statement macro, which is
> generally discouraged because then:
> 
>   for()
>    for_each_rt_rq() {
>    }
> 
> will not work as expected, so I think we want the second version.

Agree, I realized this problem soon after sending out the email
yesterday, :) and improved it to be

#define for_each_rt_rq(rt_rq, iter, rq) \
	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)

maybe we can still use it?

> 
>> The patch is attached below. Could you check whether it is workable? Thank you. 
> 
> Yes, given how things are I can't really see it getting any better,
> thanks!
> 

I have updated the patch content according to the comments, and done
part of the test. will send out the complete second version for your
review soon.

Thank you very much!

> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-12 10:55       ` Cheng Xu
@ 2011-05-12 11:27         ` Peter Zijlstra
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Zijlstra @ 2011-05-12 11:27 UTC (permalink / raw)
  To: Cheng Xu; +Cc: Ingo Molnar, Paul Mckenney, LKML

On Thu, 2011-05-12 at 18:55 +0800, Cheng Xu wrote:
> #define for_each_rt_rq(rt_rq, iter, rq) \
>         for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
> 
> maybe we can still use it? 

Sure that looks fine to me, looking fwd to your final patch!


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: rt_rq runtime leakage bug fix
  2011-05-12 10:12     ` Peter Zijlstra
  2011-05-12 10:55       ` Cheng Xu
@ 2011-05-14  5:48       ` Cheng Xu
  1 sibling, 0 replies; 7+ messages in thread
From: Cheng Xu @ 2011-05-14  5:48 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Paul Mckenney, LKML

On 2011-5-12 18:12, Peter Zijlstra wrote:
> 
> it would be nice to know why the , operator version
> doesn't work though, since that looks to be the more conventional way to
> write it.
> 

I did some investigation, it looks that, 

1 #define for_each_rt_rq(rt_rq, iter, rq)                                   \
2	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list),  \
3	     rt_rq = iter->rt_rq[cpu_of(rq)]; &iter->list != &task_groups;  \
4	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list),   \
5	     rt_rq = iter->rt_rq[cpu_of(rq)])

in for loop, when task_groups (as sentinel node of the doubly linked circular list) is reached after the final iteration, a fake iter (of struct task_group *) is calculated at line 4 via container_of(&task_groups, struct task_group, list). By "fake", as we know, it is just an address, with &iter->list == &task_groups, but not pointing to a true struct task_group object. Accessing other members of fake iter might be the cause of page fault. 

In my JS22 blade, cpu_of(rq)=1 and fake iter->rt_rq happens to be 0x100000000, value of another global variable near task_groups. Kernel tries to take it plus 8 as address, to retrieve iter->rt_rq[1]. and then page fault happens at address 0x100000008. 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-05-14  5:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-11  7:34 [PATCH] sched: rt_rq runtime leakage bug fix Cheng Xu
2011-05-11  9:21 ` Peter Zijlstra
2011-05-11 17:30   ` Cheng Xu
2011-05-12 10:12     ` Peter Zijlstra
2011-05-12 10:55       ` Cheng Xu
2011-05-12 11:27         ` Peter Zijlstra
2011-05-14  5:48       ` Cheng Xu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.