linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marco Elver <elver@google.com>
To: elver@google.com, Peter Zijlstra <peterz@infradead.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Dmitry Vyukov <dvyukov@google.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	linuxppc-dev@lists.ozlabs.org, linux-perf-users@vger.kernel.org,
	x86@kernel.org, linux-sh@vger.kernel.org,
	kasan-dev@googlegroups.com, linux-kernel@vger.kernel.org
Subject: [PATCH v3 06/14] perf/hw_breakpoint: Optimize constant number of breakpoint slots
Date: Mon,  4 Jul 2022 17:05:06 +0200	[thread overview]
Message-ID: <20220704150514.48816-7-elver@google.com> (raw)
In-Reply-To: <20220704150514.48816-1-elver@google.com>

Optimize internal hw_breakpoint state if the architecture's number of
breakpoint slots is constant. This avoids several kmalloc() calls and
potentially unnecessary failures if the allocations fail, as well as
subtly improves code generation and cache locality.

The protocol is that if an architecture defines hw_breakpoint_slots via
the preprocessor, it must be constant and the same for all types.

Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
---
 arch/sh/include/asm/hw_breakpoint.h  |  5 +-
 arch/x86/include/asm/hw_breakpoint.h |  5 +-
 kernel/events/hw_breakpoint.c        | 94 ++++++++++++++++++----------
 3 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h
index 199d17b765f2..361a0f57bdeb 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -48,10 +48,7 @@ struct pmu;
 /* Maximum number of UBC channels */
 #define HBP_NUM		2
 
-static inline int hw_breakpoint_slots(int type)
-{
-	return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
 
 /* arch/sh/kernel/hw_breakpoint.c */
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index a1f0e90d0818..0bc931cd0698 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-static inline int hw_breakpoint_slots(int type)
-{
-	return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
 
 struct perf_event_attr;
 struct perf_event;
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 7df46b276452..9fb66d358d81 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -40,13 +40,16 @@ struct bp_cpuinfo {
 	/* Number of pinned cpu breakpoints in a cpu */
 	unsigned int	cpu_pinned;
 	/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
+#ifdef hw_breakpoint_slots
+	unsigned int	tsk_pinned[hw_breakpoint_slots(0)];
+#else
 	unsigned int	*tsk_pinned;
+#endif
 	/* Number of non-pinned cpu/task breakpoints in a cpu */
 	unsigned int	flexible; /* XXX: placeholder, see fetch_this_slot() */
 };
 
 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
-static int nr_slots[TYPE_MAX] __ro_after_init;
 
 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 {
@@ -73,6 +76,54 @@ struct bp_busy_slots {
 /* Serialize accesses to the above constraints */
 static DEFINE_MUTEX(nr_bp_mutex);
 
+#ifdef hw_breakpoint_slots
+/*
+ * Number of breakpoint slots is constant, and the same for all types.
+ */
+static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
+static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); }
+static inline int init_breakpoint_slots(void)		{ return 0; }
+#else
+/*
+ * Dynamic number of breakpoint slots.
+ */
+static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
+
+static inline int hw_breakpoint_slots_cached(int type)
+{
+	return __nr_bp_slots[type];
+}
+
+static __init int init_breakpoint_slots(void)
+{
+	int i, cpu, err_cpu;
+
+	for (i = 0; i < TYPE_MAX; i++)
+		__nr_bp_slots[i] = hw_breakpoint_slots(i);
+
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < TYPE_MAX; i++) {
+			struct bp_cpuinfo *info = get_bp_info(cpu, i);
+
+			info->tsk_pinned = kcalloc(__nr_bp_slots[i], sizeof(int), GFP_KERNEL);
+			if (!info->tsk_pinned)
+				goto err;
+		}
+	}
+
+	return 0;
+err:
+	for_each_possible_cpu(err_cpu) {
+		for (i = 0; i < TYPE_MAX; i++)
+			kfree(get_bp_info(err_cpu, i)->tsk_pinned);
+		if (err_cpu == cpu)
+			break;
+	}
+
+	return -ENOMEM;
+}
+#endif
+
 __weak int hw_breakpoint_weight(struct perf_event *bp)
 {
 	return 1;
@@ -95,7 +146,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
 	int i;
 
-	for (i = nr_slots[type] - 1; i >= 0; i--) {
+	for (i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 		if (tsk_pinned[i] > 0)
 			return i + 1;
 	}
@@ -312,7 +363,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 	fetch_this_slot(&slots, weight);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
+	if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
 		return -ENOSPC;
 
 	ret = arch_reserve_bp_slot(bp);
@@ -632,7 +683,7 @@ bool hw_breakpoint_is_used(void)
 			if (info->cpu_pinned)
 				return true;
 
-			for (int slot = 0; slot < nr_slots[type]; ++slot) {
+			for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 				if (info->tsk_pinned[slot])
 					return true;
 			}
@@ -716,42 +767,19 @@ static struct pmu perf_breakpoint = {
 
 int __init init_hw_breakpoint(void)
 {
-	int cpu, err_cpu;
-	int i, ret;
-
-	for (i = 0; i < TYPE_MAX; i++)
-		nr_slots[i] = hw_breakpoint_slots(i);
-
-	for_each_possible_cpu(cpu) {
-		for (i = 0; i < TYPE_MAX; i++) {
-			struct bp_cpuinfo *info = get_bp_info(cpu, i);
-
-			info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
-							GFP_KERNEL);
-			if (!info->tsk_pinned) {
-				ret = -ENOMEM;
-				goto err;
-			}
-		}
-	}
+	int ret;
 
 	ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
 	if (ret)
-		goto err;
+		return ret;
+
+	ret = init_breakpoint_slots();
+	if (ret)
+		return ret;
 
 	constraints_initialized = true;
 
 	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
-
-err:
-	for_each_possible_cpu(err_cpu) {
-		for (i = 0; i < TYPE_MAX; i++)
-			kfree(get_bp_info(err_cpu, i)->tsk_pinned);
-		if (err_cpu == cpu)
-			break;
-	}
-
-	return ret;
 }
-- 
2.37.0.rc0.161.g10f37bed90-goog


  parent reply	other threads:[~2022-07-04 15:06 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 15:05 [PATCH v3 00/14] perf/hw_breakpoint: Optimize for thousands of tasks Marco Elver
2022-07-04 15:05 ` [PATCH v3 01/14] perf/hw_breakpoint: Add KUnit test for constraints accounting Marco Elver
2022-07-04 15:10   ` Dmitry Vyukov
2022-07-20 15:22     ` Ian Rogers
2022-07-21 16:22   ` Mark Rutland
2022-07-22  9:10     ` Will Deacon
2022-07-22  9:20       ` Dmitry Vyukov
2022-07-22 10:10         ` Will Deacon
2022-07-22 10:31           ` Dmitry Vyukov
2022-07-22 11:03             ` Will Deacon
2022-07-22 13:41               ` Dmitry Vyukov
2022-07-25 11:00     ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 02/14] perf/hw_breakpoint: Provide hw_breakpoint_is_used() and use in test Marco Elver
2022-07-04 15:09   ` Dmitry Vyukov
2022-07-20 15:22     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 03/14] perf/hw_breakpoint: Clean up headers Marco Elver
2022-07-20 15:23   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 04/14] perf/hw_breakpoint: Optimize list of per-task breakpoints Marco Elver
2022-07-20 15:29   ` Ian Rogers
2022-07-20 15:39     ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 05/14] perf/hw_breakpoint: Mark data __ro_after_init Marco Elver
2022-07-20 15:30   ` Ian Rogers
2022-07-04 15:05 ` Marco Elver [this message]
2022-07-20 15:31   ` [PATCH v3 06/14] perf/hw_breakpoint: Optimize constant number of breakpoint slots Ian Rogers
2022-07-04 15:05 ` [PATCH v3 07/14] perf/hw_breakpoint: Make hw_breakpoint_weight() inlinable Marco Elver
2022-07-20 15:32   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 08/14] perf/hw_breakpoint: Remove useless code related to flexible breakpoints Marco Elver
2022-07-20 15:34   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 09/14] powerpc/hw_breakpoint: Avoid relying on caller synchronization Marco Elver
2022-07-20 15:35   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 10/14] locking/percpu-rwsem: Add percpu_is_write_locked() and percpu_is_read_locked() Marco Elver
2022-07-20 15:36   ` Ian Rogers
2022-08-17 12:47   ` Peter Zijlstra
2022-08-29  6:00     ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 11/14] perf/hw_breakpoint: Reduce contention with large number of tasks Marco Elver
2022-07-20 15:38   ` Ian Rogers
2022-08-17 13:03   ` Peter Zijlstra
2022-08-17 13:14     ` Marco Elver
2022-08-29  8:38       ` Peter Zijlstra
2022-08-29  9:38         ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 12/14] perf/hw_breakpoint: Introduce bp_slots_histogram Marco Elver
2022-07-20 15:40   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 13/14] perf/hw_breakpoint: Optimize max_bp_pinned_slots() for CPU-independent task targets Marco Elver
2022-07-20 15:42   ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 14/14] perf/hw_breakpoint: Optimize toggle_bp_slot() " Marco Elver
2022-07-20 15:44   ` Ian Rogers
2022-07-12 13:39 ` [PATCH v3 00/14] perf/hw_breakpoint: Optimize for thousands of tasks Marco Elver
2022-07-20 15:47   ` Ian Rogers
2022-08-16 14:12     ` Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220704150514.48816-7-elver@google.com \
    --to=elver@google.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dvyukov@google.com \
    --cc=frederic@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).