linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Song Liu <songliubraving@fb.com>
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	acme@kernel.org, mingo@redhat.com, kjain@linux.ibm.com,
	kernel-team@fb.com
Subject: Re: [PATCH v5 bpf-next 1/3] perf: enable branch record for software events
Date: Fri, 3 Sep 2021 10:02:40 +0200	[thread overview]
Message-ID: <YTHWoCcSgvfx24/N@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20210902165706.2812867-2-songliubraving@fb.com>

On Thu, Sep 02, 2021 at 09:57:04AM -0700, Song Liu wrote:
> +static int
> +intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +
> +	intel_pmu_disable_all();
> +	intel_pmu_lbr_read();
> +	cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
> +
> +	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
> +	intel_pmu_enable_all(0);
> +	return cnt;
> +}

Would something like the below help get rid of that memcpy() ?

(compile tested only)

---
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..c508ba6099d2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2938,7 +2938,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	loops = 0;
 again:
-	intel_pmu_lbr_read();
+	intel_pmu_lbr_read(&cpuc->lbr_stack);
 	intel_pmu_ack_status(status);
 	if (++loops > 100) {
 		static bool warned;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e6d6eaeb4cb..0a5bacba86c2 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -170,7 +170,7 @@ enum {
 
 #define ARCH_LBR_CTL_MASK			0x7f000e
 
-static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs);
 
 static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
 {
@@ -783,7 +783,7 @@ void intel_pmu_lbr_disable_all(void)
 		__intel_pmu_lbr_disable();
 }
 
-void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs)
 {
 	unsigned long mask = x86_pmu.lbr_nr - 1;
 	u64 tos = intel_pmu_lbr_tos();
@@ -801,18 +801,18 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
 
-		cpuc->lbr_entries[i].from	= msr_lastbranch.from;
-		cpuc->lbr_entries[i].to		= msr_lastbranch.to;
-		cpuc->lbr_entries[i].mispred	= 0;
-		cpuc->lbr_entries[i].predicted	= 0;
-		cpuc->lbr_entries[i].in_tx	= 0;
-		cpuc->lbr_entries[i].abort	= 0;
-		cpuc->lbr_entries[i].cycles	= 0;
-		cpuc->lbr_entries[i].type	= 0;
-		cpuc->lbr_entries[i].reserved	= 0;
+		bs->entries[i].from	= msr_lastbranch.from;
+		bs->entries[i].to	= msr_lastbranch.to;
+		bs->entries[i].mispred	= 0;
+		bs->entries[i].predicted= 0;
+		bs->entries[i].in_tx	= 0;
+		bs->entries[i].abort	= 0;
+		bs->entries[i].cycles	= 0;
+		bs->entries[i].type	= 0;
+		bs->entries[i].reserved	= 0;
 	}
-	cpuc->lbr_stack.nr = i;
-	cpuc->lbr_stack.hw_idx = tos;
+	bs->nr = i;
+	bs->hw_idx = tos;
 }
 
 /*
@@ -820,7 +820,7 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  * is the same as the linear address, allowing us to merge the LIP and EIP
  * LBR formats.
  */
-void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs)
 {
 	bool need_info = false, call_stack = false;
 	unsigned long mask = x86_pmu.lbr_nr - 1;
@@ -896,19 +896,19 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		if (abort && x86_pmu.lbr_double_abort && out > 0)
 			out--;
 
-		cpuc->lbr_entries[out].from	 = from;
-		cpuc->lbr_entries[out].to	 = to;
-		cpuc->lbr_entries[out].mispred	 = mis;
-		cpuc->lbr_entries[out].predicted = pred;
-		cpuc->lbr_entries[out].in_tx	 = in_tx;
-		cpuc->lbr_entries[out].abort	 = abort;
-		cpuc->lbr_entries[out].cycles	 = cycles;
-		cpuc->lbr_entries[out].type	 = 0;
-		cpuc->lbr_entries[out].reserved	 = 0;
+		bs->entries[out].from	 = from;
+		bs->entries[out].to	 = to;
+		bs->entries[out].mispred = mis;
+		bs->entries[out].predicted = pred;
+		bs->entries[out].in_tx	 = in_tx;
+		bs->entries[out].abort	 = abort;
+		bs->entries[out].cycles	 = cycles;
+		bs->entries[out].type	 = 0;
+		bs->entries[out].reserved = 0;
 		out++;
 	}
-	cpuc->lbr_stack.nr = out;
-	cpuc->lbr_stack.hw_idx = tos;
+	bs->nr = out;
+	bs->hw_idx = tos;
 }
 
 static __always_inline int get_lbr_br_type(u64 info)
@@ -945,7 +945,8 @@ static __always_inline u16 get_lbr_cycles(u64 info)
 }
 
 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
-				struct lbr_entry *entries)
+				struct lbr_entry *entries,
+				struct perf_branch_stack *bs)
 {
 	struct perf_branch_entry *e;
 	struct lbr_entry *lbr;
@@ -954,7 +955,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
 
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		lbr = entries ? &entries[i] : NULL;
-		e = &cpuc->lbr_entries[i];
+		e = &bs->entries[i];
 
 		from = rdlbr_from(i, lbr);
 		/*
@@ -977,28 +978,28 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
 		e->reserved	= 0;
 	}
 
-	cpuc->lbr_stack.nr = i;
+	bs->nr = i;
 }
 
-static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
+static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs)
 {
-	intel_pmu_store_lbr(cpuc, NULL);
+	intel_pmu_store_lbr(cpuc, NULL, bs);
 }
 
-static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
+static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs)
 {
 	struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
 
 	if (!xsave) {
-		intel_pmu_store_lbr(cpuc, NULL);
+		intel_pmu_store_lbr(cpuc, NULL, bs);
 		return;
 	}
 	xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
 
-	intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
+	intel_pmu_store_lbr(cpuc, xsave->lbr.entries, bs);
 }
 
-void intel_pmu_lbr_read(void)
+void intel_pmu_lbr_read(struct perf_branch_stack *bs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
@@ -1012,9 +1013,8 @@ void intel_pmu_lbr_read(void)
 	    cpuc->lbr_users == cpuc->lbr_pebs_users)
 		return;
 
-	x86_pmu.lbr_read(cpuc);
-
-	intel_pmu_lbr_filter(cpuc);
+	x86_pmu.lbr_read(cpuc, bs);
+	intel_pmu_lbr_filter(cpuc, bs);
 }
 
 /*
@@ -1402,7 +1402,7 @@ static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
  * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  */
 static void
-intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs)
 {
 	u64 from, to;
 	int br_sel = cpuc->br_sel;
@@ -1414,11 +1414,11 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
 		return;
 
-	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+	for (i = 0; i < bs->nr; i++) {
 
-		from = cpuc->lbr_entries[i].from;
-		to = cpuc->lbr_entries[i].to;
-		type = cpuc->lbr_entries[i].type;
+		from = bs->entries[i].from;
+		to = bs->entries[i].to;
+		type = bs->entries[i].type;
 
 		/*
 		 * Parse the branch type recorded in LBR_x_INFO MSR.
@@ -1430,9 +1430,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 			to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
 			type = arch_lbr_br_type_map[type] | to_plm;
 		} else
-			type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+			type = branch_type(from, to, bs->entries[i].abort);
 		if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
-			if (cpuc->lbr_entries[i].in_tx)
+			if (bs->entries[i].in_tx)
 				type |= X86_BR_IN_TX;
 			else
 				type |= X86_BR_NO_TX;
@@ -1440,25 +1440,25 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 
 		/* if type does not correspond, then discard */
 		if (type == X86_BR_NONE || (br_sel & type) != type) {
-			cpuc->lbr_entries[i].from = 0;
+			bs->entries[i].from = 0;
 			compress = true;
 		}
 
 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
-			cpuc->lbr_entries[i].type = common_branch_type(type);
+			bs->entries[i].type = common_branch_type(type);
 	}
 
 	if (!compress)
 		return;
 
 	/* remove all entries with from=0 */
-	for (i = 0; i < cpuc->lbr_stack.nr; ) {
-		if (!cpuc->lbr_entries[i].from) {
+	for (i = 0; i < bs->nr; ) {
+		if (!bs->entries[i].from) {
 			j = i;
-			while (++j < cpuc->lbr_stack.nr)
-				cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
-			cpuc->lbr_stack.nr--;
-			if (!cpuc->lbr_entries[i].from)
+			while (++j < bs->nr)
+				bs->entries[j-1] = bs->entries[j];
+			bs->nr--;
+			if (!bs->entries[i].from)
 				continue;
 		}
 		i++;
@@ -1476,8 +1476,8 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
 	else
 		cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
 
-	intel_pmu_store_lbr(cpuc, lbr);
-	intel_pmu_lbr_filter(cpuc);
+	intel_pmu_store_lbr(cpuc, lbr, &cpuc->lbr_stack);
+	intel_pmu_lbr_filter(cpuc, &cpuc->lbr_stack);
 }
 
 /*
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..9700fb1f47c3 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -852,7 +852,7 @@ struct x86_pmu {
 	unsigned int	lbr_br_type:1;
 
 	void		(*lbr_reset)(void);
-	void		(*lbr_read)(struct cpu_hw_events *cpuc);
+	void		(*lbr_read)(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs);
 	void		(*lbr_save)(void *ctx);
 	void		(*lbr_restore)(void *ctx);
 
@@ -1345,11 +1345,11 @@ void intel_pmu_lbr_enable_all(bool pmi);
 
 void intel_pmu_lbr_disable_all(void);
 
-void intel_pmu_lbr_read(void);
+void intel_pmu_lbr_read(struct perf_branch_stack *bs);
 
-void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc);
+void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs);
 
-void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc);
+void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc, struct perf_branch_stack *bs);
 
 void intel_pmu_lbr_save(void *ctx);
 

  parent reply	other threads:[~2021-09-03  8:04 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-02 16:57 [PATCH v5 bpf-next 0/3] bpf: introduce bpf_get_branch_snapshot Song Liu
2021-09-02 16:57 ` [PATCH v5 bpf-next 1/3] perf: enable branch record for software events Song Liu
2021-09-02 20:49   ` John Fastabend
2021-09-03  8:02   ` Peter Zijlstra [this message]
2021-09-03 16:50     ` Song Liu
2021-09-07 18:59       ` Song Liu
2021-09-07 20:50         ` Andrii Nakryiko
2021-09-03  8:27   ` Peter Zijlstra
2021-09-03 16:45     ` Song Liu
2021-09-04 10:18       ` Peter Zijlstra
2021-09-02 16:57 ` [PATCH v5 bpf-next 2/3] bpf: introduce helper bpf_get_branch_snapshot Song Liu
2021-09-02 20:56   ` John Fastabend
2021-09-02 22:04     ` Song Liu
2021-09-02 22:53   ` Andrii Nakryiko
2021-09-02 23:03     ` Song Liu
2021-09-02 23:05       ` Andrii Nakryiko
2021-09-03  1:03   ` Alexei Starovoitov
2021-09-03  8:28   ` Peter Zijlstra
2021-09-03 16:58     ` Song Liu
2021-09-03  8:47   ` Peter Zijlstra
2021-09-03 17:06     ` Song Liu
2021-09-03 17:10     ` Andrii Nakryiko
2021-09-04 10:24       ` Peter Zijlstra
2021-09-04 10:55         ` Peter Zijlstra
2021-09-02 16:57 ` [PATCH v5 bpf-next 3/3] selftests/bpf: add test for bpf_get_branch_snapshot Song Liu
2021-09-02 21:05   ` John Fastabend
2021-09-02 22:54 ` [PATCH v5 bpf-next 0/3] bpf: introduce bpf_get_branch_snapshot Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YTHWoCcSgvfx24/N@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=kjain@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=songliubraving@fb.com \
    --subject='Re: [PATCH v5 bpf-next 1/3] perf: enable branch record for software events' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).