All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Marco Elver <elver@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>,
	"linux-sh@vger.kernel.org" <linux-sh@vger.kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"linuxppc-dev@lists.ozlabs.org" <linuxppc-dev@lists.ozlabs.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-perf-users@vger.kernel.org"
	<linux-perf-users@vger.kernel.org>,
	"kasan-dev@googlegroups.com" <kasan-dev@googlegroups.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Jiri Olsa <jolsa@redhat.com>, Dmitry Vyukov <dvyukov@google.com>
Subject: Re: [PATCH v2 08/13] powerpc/hw_breakpoint: Avoid relying on caller synchronization
Date: Fri, 1 Jul 2022 08:54:50 +0000	[thread overview]
Message-ID: <045a825c-cd7d-5878-d655-3d55fffb9ac2@csgroup.eu> (raw)
In-Reply-To: <20220628095833.2579903-9-elver@google.com>

Hi Marco,

Le 28/06/2022 à 11:58, Marco Elver a écrit :
> Internal data structures (cpu_bps, task_bps) of powerpc's hw_breakpoint
> implementation have relied on nr_bp_mutex serializing access to them.
> 
> Before overhauling synchronization of kernel/events/hw_breakpoint.c,
> introduce 2 spinlocks to synchronize cpu_bps and task_bps respectively,
> thus avoiding reliance on callers synchronizing powerpc's hw_breakpoint.

We have an still opened old issue in our database related to 
hw_breakpoint, I was wondering if it could have any link with the 
changes you are doing and whether you could handle it at the same time.

https://github.com/linuxppc/issues/issues/38

Maybe it is completely unrelated, but as your series modifies only 
powerpc and as the issue says that powerpc is the only one to do that, I 
thought it might be worth a hand up.

Thanks
Christophe

> 
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Marco Elver <elver@google.com>
> ---
> v2:
> * New patch.
> ---
>   arch/powerpc/kernel/hw_breakpoint.c | 53 ++++++++++++++++++++++-------
>   1 file changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 2669f80b3a49..8db1a15d7acb 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -15,6 +15,7 @@
>   #include <linux/kernel.h>
>   #include <linux/sched.h>
>   #include <linux/smp.h>
> +#include <linux/spinlock.h>
>   #include <linux/debugfs.h>
>   #include <linux/init.h>
>   
> @@ -129,7 +130,14 @@ struct breakpoint {
>   	bool ptrace_bp;
>   };
>   
> +/*
> + * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
> + * rely on it safely synchronizing internals here; however, we can rely on it
> + * not requesting more breakpoints than available.
> + */
> +static DEFINE_SPINLOCK(cpu_bps_lock);
>   static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
> +static DEFINE_SPINLOCK(task_bps_lock);
>   static LIST_HEAD(task_bps);
>   
>   static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
> @@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
>   	if (IS_ERR(tmp))
>   		return PTR_ERR(tmp);
>   
> +	spin_lock(&task_bps_lock);
>   	list_add(&tmp->list, &task_bps);
> +	spin_unlock(&task_bps_lock);
>   	return 0;
>   }
>   
> @@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
>   {
>   	struct list_head *pos, *q;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_safe(pos, q, &task_bps) {
>   		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
>   
> @@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&task_bps_lock);
>   }
>   
>   /*
> @@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
>   static bool all_task_bps_check(struct perf_event *bp)
>   {
>   	struct breakpoint *tmp;
> +	bool ret = false;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_entry(tmp, &task_bps, list) {
> -		if (!can_co_exist(tmp, bp))
> -			return true;
> +		if (!can_co_exist(tmp, bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&task_bps_lock);
> +	return ret;
>   }
>   
>   /*
> @@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
>   static bool same_task_bps_check(struct perf_event *bp)
>   {
>   	struct breakpoint *tmp;
> +	bool ret = false;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_entry(tmp, &task_bps, list) {
>   		if (tmp->bp->hw.target == bp->hw.target &&
> -		    !can_co_exist(tmp, bp))
> -			return true;
> +		    !can_co_exist(tmp, bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&task_bps_lock);
> +	return ret;
>   }
>   
>   static int cpu_bps_add(struct perf_event *bp)
> @@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
>   	if (IS_ERR(tmp))
>   		return PTR_ERR(tmp);
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
>   		if (!cpu_bp[i]) {
> @@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&cpu_bps_lock);
>   	return 0;
>   }
>   
> @@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
>   	struct breakpoint **cpu_bp;
>   	int i = 0;
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
>   		if (!cpu_bp[i])
> @@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&cpu_bps_lock);
>   }
>   
>   static bool cpu_bps_check(int cpu, struct perf_event *bp)
>   {
>   	struct breakpoint **cpu_bp;
> +	bool ret = false;
>   	int i;
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
> -		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
> -			return true;
> +		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&cpu_bps_lock);
> +	return ret;
>   }
>   
>   static bool all_cpu_bps_check(struct perf_event *bp)
> @@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
>   	return false;
>   }
>   
> -/*
> - * We don't use any locks to serialize accesses to cpu_bps or task_bps
> - * because are already inside nr_bp_mutex.
> - */
>   int arch_reserve_bp_slot(struct perf_event *bp)
>   {
>   	int ret;

WARNING: multiple messages have this Message-ID (diff)
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: Marco Elver <elver@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>,
	"linux-sh@vger.kernel.org" <linux-sh@vger.kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, "x86@kernel.org" <x86@kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	"linux-perf-users@vger.kernel.org"
	<linux-perf-users@vger.kernel.org>,
	"kasan-dev@googlegroups.com" <kasan-dev@googlegroups.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	"linuxppc-dev@lists.ozlabs.org" <linuxppc-dev@lists.ozlabs.org>,
	Dmitry Vyukov <dvyukov@google.com>
Subject: Re: [PATCH v2 08/13] powerpc/hw_breakpoint: Avoid relying on caller synchronization
Date: Fri, 1 Jul 2022 08:54:50 +0000	[thread overview]
Message-ID: <045a825c-cd7d-5878-d655-3d55fffb9ac2@csgroup.eu> (raw)
In-Reply-To: <20220628095833.2579903-9-elver@google.com>

Hi Marco,

Le 28/06/2022 à 11:58, Marco Elver a écrit :
> Internal data structures (cpu_bps, task_bps) of powerpc's hw_breakpoint
> implementation have relied on nr_bp_mutex serializing access to them.
> 
> Before overhauling synchronization of kernel/events/hw_breakpoint.c,
> introduce 2 spinlocks to synchronize cpu_bps and task_bps respectively,
> thus avoiding reliance on callers synchronizing powerpc's hw_breakpoint.

We have an still opened old issue in our database related to 
hw_breakpoint, I was wondering if it could have any link with the 
changes you are doing and whether you could handle it at the same time.

https://github.com/linuxppc/issues/issues/38

Maybe it is completely unrelated, but as your series modifies only 
powerpc and as the issue says that powerpc is the only one to do that, I 
thought it might be worth a hand up.

Thanks
Christophe

> 
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Marco Elver <elver@google.com>
> ---
> v2:
> * New patch.
> ---
>   arch/powerpc/kernel/hw_breakpoint.c | 53 ++++++++++++++++++++++-------
>   1 file changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 2669f80b3a49..8db1a15d7acb 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -15,6 +15,7 @@
>   #include <linux/kernel.h>
>   #include <linux/sched.h>
>   #include <linux/smp.h>
> +#include <linux/spinlock.h>
>   #include <linux/debugfs.h>
>   #include <linux/init.h>
>   
> @@ -129,7 +130,14 @@ struct breakpoint {
>   	bool ptrace_bp;
>   };
>   
> +/*
> + * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
> + * rely on it safely synchronizing internals here; however, we can rely on it
> + * not requesting more breakpoints than available.
> + */
> +static DEFINE_SPINLOCK(cpu_bps_lock);
>   static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
> +static DEFINE_SPINLOCK(task_bps_lock);
>   static LIST_HEAD(task_bps);
>   
>   static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
> @@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
>   	if (IS_ERR(tmp))
>   		return PTR_ERR(tmp);
>   
> +	spin_lock(&task_bps_lock);
>   	list_add(&tmp->list, &task_bps);
> +	spin_unlock(&task_bps_lock);
>   	return 0;
>   }
>   
> @@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
>   {
>   	struct list_head *pos, *q;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_safe(pos, q, &task_bps) {
>   		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
>   
> @@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&task_bps_lock);
>   }
>   
>   /*
> @@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
>   static bool all_task_bps_check(struct perf_event *bp)
>   {
>   	struct breakpoint *tmp;
> +	bool ret = false;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_entry(tmp, &task_bps, list) {
> -		if (!can_co_exist(tmp, bp))
> -			return true;
> +		if (!can_co_exist(tmp, bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&task_bps_lock);
> +	return ret;
>   }
>   
>   /*
> @@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
>   static bool same_task_bps_check(struct perf_event *bp)
>   {
>   	struct breakpoint *tmp;
> +	bool ret = false;
>   
> +	spin_lock(&task_bps_lock);
>   	list_for_each_entry(tmp, &task_bps, list) {
>   		if (tmp->bp->hw.target == bp->hw.target &&
> -		    !can_co_exist(tmp, bp))
> -			return true;
> +		    !can_co_exist(tmp, bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&task_bps_lock);
> +	return ret;
>   }
>   
>   static int cpu_bps_add(struct perf_event *bp)
> @@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
>   	if (IS_ERR(tmp))
>   		return PTR_ERR(tmp);
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
>   		if (!cpu_bp[i]) {
> @@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&cpu_bps_lock);
>   	return 0;
>   }
>   
> @@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
>   	struct breakpoint **cpu_bp;
>   	int i = 0;
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
>   		if (!cpu_bp[i])
> @@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
>   			break;
>   		}
>   	}
> +	spin_unlock(&cpu_bps_lock);
>   }
>   
>   static bool cpu_bps_check(int cpu, struct perf_event *bp)
>   {
>   	struct breakpoint **cpu_bp;
> +	bool ret = false;
>   	int i;
>   
> +	spin_lock(&cpu_bps_lock);
>   	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
>   	for (i = 0; i < nr_wp_slots(); i++) {
> -		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
> -			return true;
> +		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
> +			ret = true;
> +			break;
> +		}
>   	}
> -	return false;
> +	spin_unlock(&cpu_bps_lock);
> +	return ret;
>   }
>   
>   static bool all_cpu_bps_check(struct perf_event *bp)
> @@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
>   	return false;
>   }
>   
> -/*
> - * We don't use any locks to serialize accesses to cpu_bps or task_bps
> - * because are already inside nr_bp_mutex.
> - */
>   int arch_reserve_bp_slot(struct perf_event *bp)
>   {
>   	int ret;

  parent reply	other threads:[~2022-07-01  8:54 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-28  9:58 [PATCH v2 00/13] perf/hw_breakpoint: Optimize for thousands of tasks Marco Elver
2022-06-28  9:58 ` Marco Elver
2022-06-28  9:58 ` [PATCH v2 01/13] perf/hw_breakpoint: Add KUnit test for constraints accounting Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 12:53   ` Dmitry Vyukov
2022-06-28 12:53     ` Dmitry Vyukov
2022-06-28 13:26     ` Marco Elver
2022-06-28 13:26       ` Marco Elver
2022-06-28  9:58 ` [PATCH v2 02/13] perf/hw_breakpoint: Clean up headers Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28  9:58 ` [PATCH v2 03/13] perf/hw_breakpoint: Optimize list of per-task breakpoints Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 13:08   ` Dmitry Vyukov
2022-06-28 13:08     ` Dmitry Vyukov
2022-06-28 14:53     ` Marco Elver
2022-06-28 14:53       ` Marco Elver
2022-06-28 15:27       ` Dmitry Vyukov
2022-06-28 15:27         ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 04/13] perf/hw_breakpoint: Mark data __ro_after_init Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28  9:58 ` [PATCH v2 05/13] perf/hw_breakpoint: Optimize constant number of breakpoint slots Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28  9:58 ` [PATCH v2 06/13] perf/hw_breakpoint: Make hw_breakpoint_weight() inlinable Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 13:16   ` Dmitry Vyukov
2022-06-28 13:16     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 07/13] perf/hw_breakpoint: Remove useless code related to flexible breakpoints Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 13:18   ` Dmitry Vyukov
2022-06-28 13:18     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 08/13] powerpc/hw_breakpoint: Avoid relying on caller synchronization Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 13:21   ` Dmitry Vyukov
2022-06-28 13:21     ` Dmitry Vyukov
2022-07-01  8:54   ` Christophe Leroy [this message]
2022-07-01  8:54     ` Christophe Leroy
2022-07-01  9:41     ` Marco Elver
2022-07-01  9:41       ` Marco Elver
2022-07-01 10:15       ` Christophe Leroy
2022-07-01 10:15         ` Christophe Leroy
2022-06-28  9:58 ` [PATCH v2 09/13] locking/percpu-rwsem: Add percpu_is_write_locked() and percpu_is_read_locked() Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 14:44   ` Dmitry Vyukov
2022-06-28 14:44     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 10/13] perf/hw_breakpoint: Reduce contention with large number of tasks Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 14:45   ` Dmitry Vyukov
2022-06-28 14:45     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 11/13] perf/hw_breakpoint: Introduce bp_slots_histogram Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 14:52   ` Dmitry Vyukov
2022-06-28 14:52     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 12/13] perf/hw_breakpoint: Optimize max_bp_pinned_slots() for CPU-independent task targets Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 15:41   ` Dmitry Vyukov
2022-06-28 15:41     ` Dmitry Vyukov
2022-06-28  9:58 ` [PATCH v2 13/13] perf/hw_breakpoint: Optimize toggle_bp_slot() " Marco Elver
2022-06-28  9:58   ` Marco Elver
2022-06-28 10:54   ` Marco Elver
2022-06-28 10:54     ` Marco Elver
2022-06-28 15:45   ` Dmitry Vyukov
2022-06-28 15:45     ` Dmitry Vyukov
2022-06-28 16:00     ` Marco Elver
2022-06-28 16:00       ` Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=045a825c-cd7d-5878-d655-3d55fffb9ac2@csgroup.eu \
    --to=christophe.leroy@csgroup.eu \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dvyukov@google.com \
    --cc=elver@google.com \
    --cc=frederic@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.