All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chengming Zhou <zhouchengming@bytedance.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, eranian@google.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	duanxiongchun@bytedance.com, songmuchun@bytedance.com
Subject: Re: [External] Re: [PATCH v2 2/6] perf/core: Introduce percpu perf_cgroup
Date: Wed, 23 Mar 2022 21:07:01 +0800	[thread overview]
Message-ID: <f6a46509-a373-5c7a-8694-8eaf0ebc69ab@bytedance.com> (raw)
In-Reply-To: <20220323125116.GX8939@worktop.programming.kicks-ass.net>

On 2022/3/23 8:51 下午, Peter Zijlstra wrote:
> On Tue, Mar 22, 2022 at 08:08:30PM +0800, Chengming Zhou wrote:
> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 8b5cf2aedfe6..848a3bfa9513 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
> 
>> @@ -843,11 +845,21 @@ static void perf_cgroup_switch(struct task_struct *task)
>>  	 */
>>  	local_irq_save(flags);
>>  
>> +	cgrp = perf_cgroup_from_task(task, NULL);
>> +	if (cgrp == __this_cpu_read(cpu_perf_cgroup))
>> +		goto out;
>> +
>> +	__this_cpu_write(cpu_perf_cgroup, cgrp);
>> +
>>  	list = this_cpu_ptr(&cgrp_cpuctx_list);
>>  	list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
>>  		WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
>>  
>>  		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>> +
>> +		if (cpuctx->cgrp == cgrp)
>> +			continue;
>> +
>>  		perf_pmu_disable(cpuctx->ctx.pmu);
>>  
>>  		cpu_ctx_sched_out(cpuctx, EVENT_ALL);
> 
> This is just straight up broken.. you can't continue after taking a
> lock, that'll miss unlock.

Yes, Namhyung has pointed it out, I will fix it next version.

> 
> Also, I really don't see the point of cpu_perf_cgroup, cpuctx->cgrp
> should be able to do this.

But the problem is that we have two cpuctx on the percpu list, do you
mean we should use perf_cgroup of the first cpuctx to compare with
the next task's perf_cgroup ?

Or we should delete the cgrp in cpuctx, and use this new percpu cpu_perf_cgroup?

> 
> Also, perhaps merge this in the previous patch, I'm not sure it makes
> sense to split this.

Ok, will do. I put it in one patch in v1 RFC, then split it for easier review.
I will put it together in the next version.

Thanks.

> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -833,6 +833,7 @@ static DEFINE_PER_CPU(struct list_head,
>   */
>  static void perf_cgroup_switch(struct task_struct *task)
>  {
> +	struct perf_cgroup *cgrp;
>  	struct perf_cpu_context *cpuctx, *tmp;
>  	struct list_head *list;
>  	unsigned long flags;
> @@ -843,11 +844,20 @@ static void perf_cgroup_switch(struct ta
>  	 */
>  	local_irq_save(flags);
>  
> +	cgrp = perf_cgroup_from_task(task, NULL);
> +
>  	list = this_cpu_ptr(&cgrp_cpuctx_list);
>  	list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
>  		WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
>  
> +		if (READ_ONCE(cpuctx->cgrp == cgrp))
> +			continue
> +
>  		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> +
> +		if (cpuctx->cgrp == cgrp)
> +			goto next;
> +
>  		perf_pmu_disable(cpuctx->ctx.pmu);
>  
>  		cpu_ctx_sched_out(cpuctx, EVENT_ALL);
> @@ -855,50 +865,22 @@ static void perf_cgroup_switch(struct ta
>  		 * must not be done before ctxswout due
>  		 * to event_filter_match() in event_sched_out()
>  		 */
> -		cpuctx->cgrp = perf_cgroup_from_task(task,
> -						     &cpuctx->ctx);
> +		WRITE_ONCE(cpuctx->cgrp, cgrp);
>  		/*
>  		 * set cgrp before ctxsw in to allow
>  		 * event_filter_match() to not have to pass
>  		 * task around
> -		 * we pass the cpuctx->ctx to perf_cgroup_from_task()
> -		 * because cgroup events are only per-cpu
>  		 */
>  		cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
>  
>  		perf_pmu_enable(cpuctx->ctx.pmu);
> +next:
>  		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
>  	}
>  
>  	local_irq_restore(flags);
>  }
>  
> -static inline void perf_cgroup_sched_switch(struct task_struct *task,
> -					    struct task_struct *next)
> -{
> -	struct perf_cgroup *cgrp1;
> -	struct perf_cgroup *cgrp2 = NULL;
> -
> -	rcu_read_lock();
> -	/*
> -	 * we come here when we know perf_cgroup_events > 0
> -	 * we do not need to pass the ctx here because we know
> -	 * we are holding the rcu lock
> -	 */
> -	cgrp1 = perf_cgroup_from_task(task, NULL);
> -	cgrp2 = perf_cgroup_from_task(next, NULL);
> -
> -	/*
> -	 * only schedule out current cgroup events if we know
> -	 * that we are switching to a different cgroup. Otherwise,
> -	 * do no touch the cgroup events.
> -	 */
> -	if (cgrp1 != cgrp2)
> -		perf_cgroup_switch(task);
> -
> -	rcu_read_unlock();
> -}
> -
>  static int perf_cgroup_ensure_storage(struct perf_event *event,
>  				struct cgroup_subsys_state *css)
>  {
> @@ -1062,11 +1044,6 @@ static inline void update_cgrp_time_from
>  {
>  }
>  
> -static inline void perf_cgroup_sched_switch(struct task_struct *task,
> -					    struct task_struct *next)
> -{
> -}
> -
>  static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
>  				      struct perf_event_attr *attr,
>  				      struct perf_event *group_leader)
> @@ -1080,11 +1057,6 @@ perf_cgroup_set_timestamp(struct task_st
>  {
>  }
>  
> -static inline void
> -perf_cgroup_sched_switch(struct task_struct *task, struct task_struct *next)
> -{
> -}
> -
>  static inline u64 perf_cgroup_event_time(struct perf_event *event)
>  {
>  	return 0;
> @@ -1104,6 +1076,10 @@ static inline void
>  perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
>  {
>  }
> +
> +static void perf_cgroup_switch(struct task_struct *task)
> +{
> +}
>  #endif
>  
>  /*
> @@ -3625,7 +3601,7 @@ void __perf_event_task_sched_out(struct
>  	 * cgroup event are system-wide mode only
>  	 */
>  	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
> -		perf_cgroup_sched_switch(task, next);
> +		perf_cgroup_switch(next);
>  }
>  
>  /*

  reply	other threads:[~2022-03-23 13:07 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-22 12:08 [PATCH v2 0/6] perf/core: Fixes and cleanup for cgroup events Chengming Zhou
2022-03-22 12:08 ` [PATCH v2 1/6] perf/core: Fix incosistency between cgroup sched_out and sched_in Chengming Zhou
2022-03-22 12:59   ` Peter Zijlstra
2022-03-22 13:38     ` [External] " Chengming Zhou
2022-03-22 14:54       ` Peter Zijlstra
2022-03-22 15:16         ` Chengming Zhou
2022-03-22 15:28           ` Chengming Zhou
2022-03-22 22:06             ` Namhyung Kim
2022-03-23  8:11             ` Peter Zijlstra
2022-03-22 12:08 ` [PATCH v2 2/6] perf/core: Introduce percpu perf_cgroup Chengming Zhou
2022-03-22 13:01   ` Peter Zijlstra
2022-03-22 16:33     ` [External] " Chengming Zhou
2022-03-23  8:13       ` Peter Zijlstra
2022-03-23 12:58         ` Chengming Zhou
2022-03-22 22:21     ` Namhyung Kim
2022-03-22 22:18   ` Namhyung Kim
2022-03-23  1:27     ` [Phishing Risk] [External] " Chengming Zhou
2022-03-23 12:51   ` Peter Zijlstra
2022-03-23 13:07     ` Chengming Zhou [this message]
2022-03-23 13:17       ` [External] " Peter Zijlstra
2022-03-23 13:37         ` Chengming Zhou
2022-03-23 14:05           ` Peter Zijlstra
2022-03-23 15:44             ` Chengming Zhou
2022-03-22 12:08 ` [PATCH v2 3/6] perf/core: Don't pass task around when ctx sched in Chengming Zhou
2022-03-22 13:01   ` Peter Zijlstra
2022-03-22 12:08 ` [PATCH v2 4/6] perf/core: Use stable cpuctx->cgrp when update perf cgroup time Chengming Zhou
2022-03-22 13:03   ` Peter Zijlstra
2022-03-22 12:08 ` [PATCH v2 5/6] perf/core: Always set cpuctx cgrp when enable cgroup event Chengming Zhou
2022-03-22 12:08 ` [PATCH v2 6/6] perf/core: Don't need event_filter_match when merge_sched_in() Chengming Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f6a46509-a373-5c7a-8694-8eaf0ebc69ab@bytedance.com \
    --to=zhouchengming@bytedance.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=eranian@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=songmuchun@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.