All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched
@ 2015-06-08 11:38 Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB Anshuman Khandual
                   ` (9 more replies)
  0 siblings, 10 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

BHRB is a rolling buffer. Hence we might end up in a situation where
we have read one target address but when we try to read the next entry
indicating the from address of the targe, the buffer just overflows.
In this case, the captured from address will be zero which indicates
the end of the buffer.

This patch drops the entire branch record which would have otherwise
confused the user space tools.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 12b6384..c246e65 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -452,7 +452,6 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 			 *    In this case we need to read the instruction from
 			 *    memory to determine the target/to address.
 			 */
-
 			if (val & BHRB_TARGET) {
 				/* Target branches use two entries
 				 * (ie. computed gotos/XL form)
@@ -463,6 +462,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
 				/* Get from address in next entry */
 				val = read_bhrb(r_index++);
+				if (!val)
+					break;
 				addr = val & BHRB_EA;
 				if (val & BHRB_TARGET) {
 					/* Shouldn't have two targets in a
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-10  3:43   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing Anshuman Khandual
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>

'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
broke the PMU based BHRB privilege level filter. BHRB depends on the
same MMCR0 bits for privilege level filter which was used to freeze all
the PMCs as a group. Once we moved to individual event based privilege
filters through MMCR2 register on POWER8, event associated privilege
filters are no longer applicable to the BHRB captured branches.

This patch solves the problem by restoring to the previous method of
privilege level filters for the event in case BHRB based branch stack
sampling is requested. This patch also changes 'check_excludes' for
the same reason.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c246e65..ae61629 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
  * added events.
  */
 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
-			  int n_prev, int n_new)
+			  int n_prev, int n_new, int bhrb_users)
 {
 	int eu = 0, ek = 0, eh = 0;
 	int i, n, first;
@@ -941,7 +941,7 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
 	 * don't need to do any of this logic. NB. This assumes no PMU has both
 	 * per event exclude and limited PMCs.
 	 */
-	if (ppmu->flags & PPMU_ARCH_207S)
+	if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
 		return 0;
 
 	n = n_prev + n_new;
@@ -1259,7 +1259,7 @@ static void power_pmu_enable(struct pmu *pmu)
 		goto out;
 	}
 
-	if (!(ppmu->flags & PPMU_ARCH_207S)) {
+	if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users) {
 		/*
 		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
 		 * bits for the first event. We have already checked that all
@@ -1284,7 +1284,7 @@ static void power_pmu_enable(struct pmu *pmu)
 	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 				| MMCR0_FC);
-	if (ppmu->flags & PPMU_ARCH_207S)
+	if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
 		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
 	/*
@@ -1436,7 +1436,8 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	if (cpuhw->group_flag & PERF_EVENT_TXN)
 		goto nocheck;
 
-	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+	if (check_excludes(cpuhw->event, cpuhw->flags,
+				n0, 1, cpuhw->bhrb_users))
 		goto out;
 	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
 		goto out;
@@ -1615,7 +1616,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 		return -EAGAIN;
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	n = cpuhw->n_events;
-	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
 		return -EAGAIN;
 	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
 	if (i < 0)
@@ -1828,10 +1829,12 @@ static int power_pmu_event_init(struct perf_event *event)
 	events[n] = ev;
 	ctrs[n] = event;
 	cflags[n] = flags;
-	if (check_excludes(ctrs, cflags, n, 1))
+	cpuhw = &get_cpu_var(cpu_hw_events);
+	if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
+		put_cpu_var(cpu_hw_events);
 		return -EINVAL;
+	}
 
-	cpuhw = &get_cpu_var(cpu_hw_events);
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
 	if (has_branch_stack(event)) {
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-10  4:36   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8 Anshuman Khandual
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

This patch cleans up some existing indentation problem in code and
re organizes the BHRB processing code with an helper function named
'update_branch_entry' making it more readable. This patch does not
change any functionality.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 107 ++++++++++++++++++++--------------------
 1 file changed, 54 insertions(+), 53 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ae61629..d10d2c1 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -412,11 +412,19 @@ static __u64 power_pmu_bhrb_to(u64 addr)
 	return target - (unsigned long)&instr + addr;
 }
 
+void update_branch_entry(struct cpu_hw_events *cpuhw,
+			int index, u64 from, u64 to, int pred)
+{
+	cpuhw->bhrb_entries[index].from = from;
+	cpuhw->bhrb_entries[index].to = to;
+	cpuhw->bhrb_entries[index].mispred = pred;
+	cpuhw->bhrb_entries[index].predicted = ~pred;
+}
+
 /* Processing BHRB entries */
 static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
-	u64 val;
-	u64 addr;
+	u64 val, addr, tmp;
 	int r_index, u_index, pred;
 
 	r_index = 0;
@@ -427,63 +435,56 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 		if (!val)
 			/* Terminal marker: End of valid BHRB entries */
 			break;
-		else {
-			addr = val & BHRB_EA;
-			pred = val & BHRB_PREDICTION;
 
-			if (!addr)
-				/* invalid entry */
-				continue;
+		addr = val & BHRB_EA;
+		pred = val & BHRB_PREDICTION;
+
+		if (!addr)
+			/* invalid entry */
+			continue;
 
-			/* Branches are read most recent first (ie. mfbhrb 0 is
-			 * the most recent branch).
-			 * There are two types of valid entries:
-			 * 1) a target entry which is the to address of a
-			 *    computed goto like a blr,bctr,btar.  The next
-			 *    entry read from the bhrb will be branch
-			 *    corresponding to this target (ie. the actual
-			 *    blr/bctr/btar instruction).
-			 * 2) a from address which is an actual branch.  If a
-			 *    target entry proceeds this, then this is the
-			 *    matching branch for that target.  If this is not
-			 *    following a target entry, then this is a branch
-			 *    where the target is given as an immediate field
-			 *    in the instruction (ie. an i or b form branch).
-			 *    In this case we need to read the instruction from
-			 *    memory to determine the target/to address.
+		/* Branches are read most recent first (ie. mfbhrb 0 is
+		 * the most recent branch).
+		 * There are two types of valid entries:
+		 * 1) a target entry which is the to address of a
+		 *    computed goto like a blr,bctr,btar.  The next
+		 *    entry read from the bhrb will be branch
+		 *    corresponding to this target (ie. the actual
+		 *    blr/bctr/btar instruction).
+		 * 2) a from address which is an actual branch.  If a
+		 *    target entry proceeds this, then this is the
+		 *    matching branch for that target.  If this is not
+		 *    following a target entry, then this is a branch
+		 *    where the target is given as an immediate field
+		 *    in the instruction (ie. an i or b form branch).
+		 *    In this case we need to read the instruction from
+		 *    memory to determine the target/to address.
+		 */
+		if (val & BHRB_TARGET) {
+			/* Target branches use two entries
+			 * (ie. computed gotos/XL form)
 			 */
+			tmp = addr;
+
+			/* Get from address in next entry */
+			val = read_bhrb(r_index++);
+			if (!val)
+				break;
+			addr = val & BHRB_EA;
 			if (val & BHRB_TARGET) {
-				/* Target branches use two entries
-				 * (ie. computed gotos/XL form)
-				 */
-				cpuhw->bhrb_entries[u_index].to = addr;
-				cpuhw->bhrb_entries[u_index].mispred = pred;
-				cpuhw->bhrb_entries[u_index].predicted = ~pred;
-
-				/* Get from address in next entry */
-				val = read_bhrb(r_index++);
-				if (!val)
-					break;
-				addr = val & BHRB_EA;
-				if (val & BHRB_TARGET) {
-					/* Shouldn't have two targets in a
-					   row.. Reset index and try again */
-					r_index--;
-					addr = 0;
-				}
-				cpuhw->bhrb_entries[u_index].from = addr;
-			} else {
-				/* Branches to immediate field 
-				   (ie I or B form) */
-				cpuhw->bhrb_entries[u_index].from = addr;
-				cpuhw->bhrb_entries[u_index].to =
-					power_pmu_bhrb_to(addr);
-				cpuhw->bhrb_entries[u_index].mispred = pred;
-				cpuhw->bhrb_entries[u_index].predicted = ~pred;
+				/* Shouldn't have two targets in a
+				   row.. Reset index and try again */
+				r_index--;
+				addr = 0;
 			}
-			u_index++;
-
+			update_branch_entry(cpuhw, u_index, addr, tmp, pred);
+		} else {
+			/* Branches to immediate field
+			   (ie I or B form) */
+			tmp = power_pmu_bhrb_to(addr);
+			update_branch_entry(cpuhw, u_index, addr, tmp, pred);
 		}
+		u_index++;
 	}
 	cpuhw->bhrb_stack.nr = u_index;
 	return;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-10  5:07   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 05/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable Anshuman Khandual
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

This patch does some code re-arrangements to make it clear that it ignores
any separate privilege level branch filter request and does not support
any combinations of HW PMU branch filters.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/power8-pmu.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351d..a6c6a2c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,8 +656,6 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
-	u64 pmu_bhrb_filter = 0;
-
 	/* BHRB and regular PMU events share the same privilege state
 	 * filter configuration. BHRB is always recorded along with a
 	 * regular PMU event. As the privilege state filter is handled
@@ -665,21 +663,15 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 	 * PMU event, we ignore any separate BHRB specific request.
 	 */
 
-	/* No branch filter requested */
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-		return pmu_bhrb_filter;
-
-	/* Invalid branch filter options - HW does not support */
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-		return -1;
+	/* Ignore user, kernel, hv bits */
+	branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
-		return -1;
+	/* No branch filter requested */
+	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
+		return 0;
 
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
-		pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
-		return pmu_bhrb_filter;
-	}
+	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
+		return POWER8_MMCRA_IFM1;
 
 	/* Every thing else is unsupported */
 	return -1;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 05/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (2 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8 Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions Anshuman Khandual
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

This patch simply changes the name of the variable from 'bhrb_filter' to
'bhrb_hw_filter' in order to add one more variable which will track SW
filters in generic powerpc book3s code which will be implemented in the
subsequent patch. This patch does not change any functionality.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d10d2c1..080b038 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -52,7 +52,7 @@ struct cpu_hw_events {
 	int n_txn_start;
 
 	/* BHRB bits */
-	u64				bhrb_filter;	/* BHRB HW branch filter */
+	u64				bhrb_hw_filter;	/* BHRB HW filter */
 	int				bhrb_users;
 	void				*bhrb_context;
 	struct	perf_branch_stack	bhrb_stack;
@@ -1346,7 +1346,7 @@ static void power_pmu_enable(struct pmu *pmu)
 
 	mb();
 	if (cpuhw->bhrb_users)
-		ppmu->config_bhrb(cpuhw->bhrb_filter);
+		ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
 	write_mmcr0(cpuhw, mmcr0);
 
@@ -1454,7 +1454,7 @@ nocheck:
  out:
 	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
-		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 	}
 
@@ -1839,10 +1839,10 @@ static int power_pmu_event_init(struct perf_event *event)
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
 	if (has_branch_stack(event)) {
-		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 
-		if (cpuhw->bhrb_filter == -1) {
+		if (cpuhw->bhrb_hw_filter == -1) {
 			put_cpu_var(cpu_hw_events);
 			return -EOPNOTSUPP;
 		}
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (3 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 05/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-10  5:33   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 07/10] powerpc, perf: Enable SW filtering in branch stack sampling framework Anshuman Khandual
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

Generic powerpc branch analysis support added in the code patching
library which will help the subsequent patch on SW based filtering
of branch records in perf.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h | 15 ++++++++
 arch/powerpc/lib/code-patching.c         | 66 ++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 840a550..0a6f0d8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,16 @@
 #define BRANCH_SET_LINK	0x1
 #define BRANCH_ABSOLUTE	0x2
 
+#define XL_FORM_LR  0x4C000020
+#define XL_FORM_CTR 0x4C000420
+#define XL_FORM_TAR 0x4C000460
+
+#define BO_ALWAYS    0x02800000
+#define BO_CTR       0x02000000
+#define BO_CRBI_OFF  0x00800000
+#define BO_CRBI_ON   0x01800000
+#define BO_CRBI_HINT 0x00400000
+
 unsigned int create_branch(const unsigned int *addr,
 			   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
@@ -99,4 +109,9 @@ static inline unsigned long ppc_global_function_entry(void *func)
 #endif
 }
 
+bool instr_is_return_branch(unsigned int instr);
+bool instr_is_conditional_branch(unsigned int instr);
+bool instr_is_func_call(unsigned int instr);
+bool instr_is_indirect_func_call(unsigned int instr);
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..15b7b88 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -87,6 +87,72 @@ static int instr_is_branch_bform(unsigned int instr)
 	return branch_opcode(instr) == 16;
 }
 
+static int instr_is_branch_xlform(unsigned int instr)
+{
+	return branch_opcode(instr) == 19;
+}
+
+static int is_xlform_lr(unsigned int instr)
+{
+	return (instr & XL_FORM_LR) == XL_FORM_LR;
+}
+
+static int is_bo_always(unsigned int instr)
+{
+	return (instr & BO_ALWAYS) == BO_ALWAYS;
+}
+
+static int is_branch_link_set(unsigned int instr)
+{
+	return (instr & BRANCH_SET_LINK) == BRANCH_SET_LINK;
+}
+
+bool instr_is_return_branch(unsigned int instr)
+{
+	/*
+	 * Conditional and unconditional branch to LR register
+	 * without seting the link register.
+	 */
+	if (is_xlform_lr(instr) && !is_branch_link_set(instr))
+		return true;
+
+	return false;
+}
+
+bool instr_is_conditional_branch(unsigned int instr)
+{
+	/* I-form instruction - excluded */
+	if (instr_is_branch_iform(instr))
+		return false;
+
+	/* B-form or XL-form instruction */
+	if (instr_is_branch_bform(instr) || instr_is_branch_xlform(instr))  {
+
+		/* Not branch always */
+		if (!is_bo_always(instr))
+			return true;
+	}
+	return false;
+}
+
+bool instr_is_func_call(unsigned int instr)
+{
+	/* LR should be set */
+	if (is_branch_link_set(instr))
+		return true;
+
+	return false;
+}
+
+bool instr_is_indirect_func_call(unsigned int instr)
+{
+	/* XL-form instruction with LR set */
+	if (instr_is_branch_xlform(instr) && is_branch_link_set(instr))
+		return true;
+
+	return false;
+}
+
 int instr_is_relative_branch(unsigned int instr)
 {
 	if (instr & BRANCH_ABSOLUTE)
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 07/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (4 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-08 11:38 ` [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters Anshuman Khandual
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

This patch enables SW based post processing of BHRB captured branches
to be able to meet more user defined branch filtration criteria in perf
branch stack sampling framework. These changes increase the number of
branch filters and their valid combinations on any powerpc64 server
platform with BHRB support. Find the summary of code changes here.

(1) struct cpu_hw_event

Introduced two new variables track various filter values and mask

(a) bhrb_sw_filter      Tracks SW implemented branch filter flags
(b) bhrb_filter         Tracks both (SW and HW) branch filter flags

(2) Event creation

Kernel will figure out supported BHRB branch filters through a PMU
call back 'bhrb_filter_map'. This function will find out how many of
the requested branch filters can be supported in the PMU HW. It will
not try to invalidate any branch filter combinations. Event creation
will not error out because of lack of HW based branch filters.
Meanwhile it will track the overall supported branch filters in the
'bhrb_filter' variable.

Once the PMU call back returns kernel will process the user branch
filter request against available SW filters (bhrb_sw_filter_map) while
looking at the 'bhrb_filter'. During this phase all the branch filters
which are still pending from the user requested list will have to be
supported in SW failing which the event creation will error out.

(3) SW branch filter

During the BHRB data capture inside the PMU interrupt context, each
of the captured 'perf_branch_entry.from' will be checked for compliance
with applicable SW branch filters. If the entry does not conform to the
filter requirements, it will be discarded from the final perf branch
stack buffer.

(4) Supported SW based branch filters

(a) PERF_SAMPLE_BRANCH_ANY_RETURN
(b) PERF_SAMPLE_BRANCH_IND_CALL
(c) PERF_SAMPLE_BRANCH_ANY_CALL
(d) PERF_SAMPLE_BRANCH_COND

Please refer the patch to understand the classification of instructions
into these branch filter categories.

(5) Multiple branch filter semantics

Book3 sever implementation follows the same OR semantics (as implemented
in x86) while dealing with multiple branch filters at any point of time.
SW branch filter analysis is carried on the data set captured in the PMU
HW. So the resulting set of data (after applying the SW filters) will
inherently be an AND with the HW captured set. Hence any combination of
HW and SW branch filters will be invalid. HW based branch filters are
more efficient and faster compared to SW implemented branch filters. So
at first the PMU should decide whether it can support all the requested
branch filters itself or not. In case it can support all the branch
filters in an OR manner, we dont apply any SW branch filter on top of the
HW captured set (which is the final set). This preserves the OR semantic
of multiple branch filters as required. But in case where the PMU cannot
support all the requested branch filters in an OR manner, it should not
apply any it's filters and leave it upto the SW to handle them all. Its
the PMU code's responsibility to uphold this protocol to be able to
conform to the overall OR semantic of perf branch stack sampling framework.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/perf_event_server.h |   7 +-
 arch/powerpc/perf/core-book3s.c              | 188 ++++++++++++++++++++++++++-
 arch/powerpc/perf/power8-pmu.c               |   2 +-
 3 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8146221..cb7ca1a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -38,7 +38,8 @@ struct power_pmu {
 				unsigned long *valp);
 	int		(*get_alternatives)(u64 event_id, unsigned int flags,
 				u64 alt[]);
-	u64             (*bhrb_filter_map)(u64 branch_sample_type);
+	u64             (*bhrb_filter_map)(u64 branch_sample_type,
+							u64 *bhrb_filter);
 	void            (*config_bhrb)(u64 pmu_bhrb_filter);
 	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
 	int		(*limited_pmc_event)(u64 event_id);
@@ -80,6 +81,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long int read_bhrb(int n);
 
+#define for_each_branch_sample_type(x) \
+		for ((x) = PERF_SAMPLE_BRANCH_USER; \
+			(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 080b038..9a682c9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -53,6 +53,8 @@ struct cpu_hw_events {
 
 	/* BHRB bits */
 	u64				bhrb_hw_filter;	/* BHRB HW filter */
+	u64				bhrb_sw_filter;	/* BHRB SW filter */
+	u64				bhrb_filter;	/* Branch filter mask */
 	int				bhrb_users;
 	void				*bhrb_context;
 	struct	perf_branch_stack	bhrb_stack;
@@ -421,6 +423,84 @@ void update_branch_entry(struct cpu_hw_events *cpuhw,
 	cpuhw->bhrb_entries[index].predicted = ~pred;
 }
 
+/**
+ * check_instruction - Instruction opcode analysis
+ * @addr:	Instruction address
+ * @sw_filter:	SW branch filter
+ *
+ * Analyse instruction opcodes and classify them into various
+ * branch filter options available. This follows the standard
+ * semantics of OR which means that instructions which conforms
+ * to any of the requested branch filters get picked up.
+ */
+static bool check_instruction(unsigned int *addr, u64 sw_filter)
+{
+	if (sw_filter & PERF_SAMPLE_BRANCH_ANY_RETURN) {
+		if (instr_is_return_branch(*addr))
+			return true;
+	}
+
+	if (sw_filter & PERF_SAMPLE_BRANCH_IND_CALL) {
+		if (instr_is_indirect_func_call(*addr))
+			return true;
+	}
+
+	if (sw_filter & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		if (instr_is_func_call(*addr))
+			return true;
+	}
+
+	if (sw_filter & PERF_SAMPLE_BRANCH_COND) {
+		if (instr_is_conditional_branch(*addr))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * keep_branch - Whether the branch conforms to applicable filter
+ * @from:	From address
+ * @sw_filter	SW branch filter
+ *
+ * Access the instruction contained in the address and then check
+ * whether it complies with the applicable SW branch filters.
+ */
+static bool keep_branch(u64 from, u64 sw_filter)
+{
+	unsigned int instr;
+	bool ret;
+
+	/*
+	 * The "from" branch for every branch record has to go
+	 * through this filter verification. So this quick check
+	 * here for no SW filters will improve performance.
+	 */
+	if (sw_filter == 0)
+		return true;
+
+	if (is_kernel_addr(from)) {
+		return check_instruction((unsigned int *) from, sw_filter);
+	} else {
+		/*
+		 * Userspace address needs to be
+		 * copied first before analysis.
+		 */
+		pagefault_disable();
+		ret =  __get_user_inatomic(instr, (unsigned int __user *) from);
+
+		/*
+		 * If the instruction could not be accessible
+		 * from user space, we still 'okay' the entry.
+		 */
+		if (ret) {
+			pagefault_enable();
+			return true;
+		}
+		pagefault_enable();
+		return check_instruction(&instr, sw_filter);
+	}
+}
+
 /* Processing BHRB entries */
 static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
@@ -484,6 +564,11 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 			tmp = power_pmu_bhrb_to(addr);
 			update_branch_entry(cpuhw, u_index, addr, tmp, pred);
 		}
+
+		/* Apply SW branch filters and drop the entry if required */
+		if (!keep_branch(cpuhw->bhrb_entries[u_index].from,
+						cpuhw->bhrb_sw_filter))
+			u_index--;
 		u_index++;
 	}
 	cpuhw->bhrb_stack.nr = u_index;
@@ -681,6 +766,79 @@ static void pmao_restore_workaround(bool ebb)
 }
 #endif /* CONFIG_PPC64 */
 
+/* SW implemented branch filters */
+static unsigned int power_sw_filter[] = { PERF_SAMPLE_BRANCH_USER,
+					  PERF_SAMPLE_BRANCH_KERNEL,
+					  PERF_SAMPLE_BRANCH_HV,
+					  PERF_SAMPLE_BRANCH_ANY_CALL,
+					  PERF_SAMPLE_BRANCH_COND,
+					  PERF_SAMPLE_BRANCH_ANY_RETURN,
+					  PERF_SAMPLE_BRANCH_IND_CALL };
+
+/**
+ * all_filters_covered - Whether requested filters covered
+ * @branch_sample_type:	Requested branch sample filter
+ * @bhrb_filter:	Branch Filters covered through SW and HW
+ *
+ * Validate whether all the user requested branch filters are getting
+ * processed either in the PMU HW or in SW.
+ */
+static int all_filters_covered(u64 branch_sample_type, u64 bhrb_filter)
+{
+	u64 x;
+
+	if (bhrb_filter == PERF_SAMPLE_BRANCH_ANY)
+		return true;
+
+	for_each_branch_sample_type(x) {
+		if (!(branch_sample_type & x))
+			continue;
+		/*
+		 * Requested filter not available either
+		 * in PMU or in SW.
+		 */
+		if (!(bhrb_filter & x))
+			return false;
+	}
+	return true;
+}
+
+/**
+ * bhrb_sw_filter_map - Required SW based branch filters
+ * @branch_sample_type:	Requested branch sample type
+ * @bhrb_filter:	Branch filters covered through HW
+ *
+ * This is called after figuring out what all branch filters the PMU HW
+ * supports for the requested branch filter set. Here we will go through
+ * all the SW implemented branch filters one by one and pick them up if
+ * its not already supported in the PMU.
+ */
+static u64 bhrb_sw_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
+{
+	u64 branch_sw_filter = 0;
+	unsigned int i;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
+		WARN_ON(*bhrb_filter != PERF_SAMPLE_BRANCH_ANY);
+		return branch_sw_filter;
+	}
+
+	/*
+	 * PMU supported branch filters must be implemented in SW
+	 * when the PMU is unable to process them for some reason.
+	 */
+	for (i = 0; i < ARRAY_SIZE(power_sw_filter); i++) {
+		if (branch_sample_type & power_sw_filter[i]) {
+			if (!(*bhrb_filter & power_sw_filter[i])) {
+				branch_sw_filter |= power_sw_filter[i];
+				*bhrb_filter |= power_sw_filter[i];
+			}
+		}
+	}
+
+	return branch_sw_filter;
+}
+
 static void perf_event_interrupt(struct pt_regs *regs);
 
 /*
@@ -1345,6 +1503,8 @@ static void power_pmu_enable(struct pmu *pmu)
 	mmcr0 = ebb_switch_in(ebb, cpuhw);
 
 	mb();
+
+	/* Enable PMU based branch filters */
 	if (cpuhw->bhrb_users)
 		ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
@@ -1454,8 +1614,12 @@ nocheck:
  out:
 	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
-		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
-					event->attr.branch_sample_type);
+		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map
+					(event->attr.branch_sample_type,
+							&cpuhw->bhrb_filter);
+		cpuhw->bhrb_sw_filter = bhrb_sw_filter_map
+					(event->attr.branch_sample_type,
+							&cpuhw->bhrb_filter);
 	}
 
 	perf_pmu_enable(event->pmu);
@@ -1838,11 +2002,27 @@ static int power_pmu_event_init(struct perf_event *event)
 
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
+	/*
+	 * BHRB branch filters implemented in PMU will take
+	 * effect when we enable the event and data set
+	 * collected thereafter will be compliant with those
+	 * branch filters. Where as the SW branch filters will
+	 * be applied during the post processing of BHRB data.
+	 */
 	if (has_branch_stack(event)) {
+		/* Query available PMU branch filter support */
 		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
-					event->attr.branch_sample_type);
+					event->attr.branch_sample_type,
+					&cpuhw->bhrb_filter);
+
+		/* Query available SW branch filter support */
+		cpuhw->bhrb_sw_filter = bhrb_sw_filter_map(
+					event->attr.branch_sample_type,
+					&cpuhw->bhrb_filter);
 
-		if (cpuhw->bhrb_hw_filter == -1) {
+		/* Check overall coverage of branch filter request */
+		if (!all_filters_covered(event->attr.branch_sample_type,
+							cpuhw->bhrb_filter)) {
 			put_cpu_var(cpu_hw_events);
 			return -EOPNOTSUPP;
 		}
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index a6c6a2c..5e17cb5 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -654,7 +654,7 @@ static int power8_generic_events[] = {
 	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
 };
 
-static u64 power8_bhrb_filter_map(u64 branch_sample_type)
+static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 {
 	/* BHRB and regular PMU events share the same privilege state
 	 * filter configuration. BHRB is always recorded along with a
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (5 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 07/10] powerpc, perf: Enable SW filtering in branch stack sampling framework Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-10  5:49   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters Anshuman Khandual
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

The kernel now supports SW based branch filters for book3s systems with
some specific requirements while dealing with HW supported branch filters
in order to achieve overall OR semantics prevailing in perf branch stack
sampling framework. This patch adapts the BHRB branch filter configuration
to meet those protocols. POWER8 PMU can only handle one HW based branch
filter request at any point of time. For all other combinations PMU will
pass it on to the SW.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/power8-pmu.c | 51 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5e17cb5..8fccf6c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -656,6 +656,16 @@ static int power8_generic_events[] = {
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 {
+	u64 x, pmu_bhrb_filter;
+
+	pmu_bhrb_filter = 0;
+	*bhrb_filter = 0;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
+		*bhrb_filter = PERF_SAMPLE_BRANCH_ANY;
+		return pmu_bhrb_filter;
+	}
+
 	/* BHRB and regular PMU events share the same privilege state
 	 * filter configuration. BHRB is always recorded along with a
 	 * regular PMU event. As the privilege state filter is handled
@@ -666,15 +676,42 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 	/* Ignore user, kernel, hv bits */
 	branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
-	/* No branch filter requested */
-	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
-		return 0;
+	/*
+	 * POWER8 does not support ORing of PMU HW branch filters. Hence
+	 * if multiple branch filters are requested which may include filters
+	 * supported in PMU, still go ahead and clear the PMU based HW branch
+	 * filter component as in this case all the filters will be processed
+	 * in SW.
+	 */
 
-	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
-		return POWER8_MMCRA_IFM1;
+	for_each_branch_sample_type(x) {
+		/* Ignore privilege branch filters */
+		if ((x == PERF_SAMPLE_BRANCH_USER)
+			|| (x == PERF_SAMPLE_BRANCH_KERNEL)
+				|| (x == PERF_SAMPLE_BRANCH_HV))
+			continue;
+
+		if (!(branch_sample_type & x))
+			continue;
+
+		/* Supported individual PMU branch filters */
+		if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+			branch_sample_type &= ~PERF_SAMPLE_BRANCH_ANY_CALL;
+			if (branch_sample_type) {
+				/* Multiple filters will be processed in SW */
+				pmu_bhrb_filter = 0;
+				*bhrb_filter = 0;
+				return pmu_bhrb_filter;
+			} else {
+				/* Individual filter will be processed in HW */
+				pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+				*bhrb_filter    |= PERF_SAMPLE_BRANCH_ANY_CALL;
+				return pmu_bhrb_filter;
+			}
+		}
+	}
 
-	/* Every thing else is unsupported */
-	return -1;
+	return pmu_bhrb_filter;
 }
 
 static void power8_config_bhrb(u64 pmu_bhrb_filter)
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (6 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-11  1:19   ` Daniel Axtens
  2015-06-08 11:38 ` [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW) Anshuman Khandual
  2015-06-10  3:21 ` [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Daniel Axtens
  9 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>

This patch enables privilege mode SW branch filters. Also modifies
POWER8 PMU branch filter configuration so that the privilege mode
branch filter implemented as part of base PMU event configuration
is reflected in bhrb filter mask. As a result, the SW will skip and
not try to process the privilege mode branch filters itself.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/perf_event_server.h |  3 +++
 arch/powerpc/perf/core-book3s.c              | 38 ++++++++++++++++++++++++++--
 arch/powerpc/perf/power8-pmu.c               | 13 ++++++++--
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index cb7ca1a..23d68d3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -85,6 +85,9 @@ extern unsigned long int read_bhrb(int n);
 		for ((x) = PERF_SAMPLE_BRANCH_USER; \
 			(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
 
+#define POWER_ADDR_USER		0
+#define POWER_ADDR_KERNEL	1
+
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9a682c9..c151399 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -20,6 +20,7 @@
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
 #include <asm/code-patching.h>
+#include <asm/cputable.h>
 
 #define BHRB_MAX_ENTRIES	32
 #define BHRB_TARGET		0x0000000000000002
@@ -465,10 +466,10 @@ static bool check_instruction(unsigned int *addr, u64 sw_filter)
  * Access the instruction contained in the address and then check
  * whether it complies with the applicable SW branch filters.
  */
-static bool keep_branch(u64 from, u64 sw_filter)
+static bool keep_branch(u64 from, u64 to, u64 sw_filter)
 {
 	unsigned int instr;
-	bool ret;
+	bool to_plm, ret, flag;
 
 	/*
 	 * The "from" branch for every branch record has to go
@@ -478,6 +479,38 @@ static bool keep_branch(u64 from, u64 sw_filter)
 	if (sw_filter == 0)
 		return true;
 
+	to_plm = is_kernel_addr(to) ? POWER_ADDR_KERNEL : POWER_ADDR_USER;
+
+	/*
+	 * XXX: Applying the privilege mode SW branch filters first on
+	 * the 'TO' address creates an AND semantic with other SW branch
+	 * filters which are ORed with each other being applied on the
+	 * 'FROM' address there after.
+	 */
+	if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
+		flag = false;
+
+		if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
+			if (to_plm == POWER_ADDR_USER)
+				flag = true;
+		}
+
+		if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
+			if (to_plm == POWER_ADDR_KERNEL)
+				flag = true;
+		}
+
+		if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
+			if (cpu_has_feature(CPU_FTR_HVMODE)) {
+				if (to_plm == POWER_ADDR_KERNEL)
+					flag = true;
+			}
+		}
+
+		if (!flag)
+			return false;
+	}
+
 	if (is_kernel_addr(from)) {
 		return check_instruction((unsigned int *) from, sw_filter);
 	} else {
@@ -567,6 +600,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 
 		/* Apply SW branch filters and drop the entry if required */
 		if (!keep_branch(cpuhw->bhrb_entries[u_index].from,
+					cpuhw->bhrb_entries[u_index].to,
 						cpuhw->bhrb_sw_filter))
 			u_index--;
 		u_index++;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 8fccf6c..b56afc6 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -670,9 +670,19 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 	 * filter configuration. BHRB is always recorded along with a
 	 * regular PMU event. As the privilege state filter is handled
 	 * in the basic PMC configuration of the accompanying regular
-	 * PMU event, we ignore any separate BHRB specific request.
+	 * PMU event, we ignore any separate BHRB specific request. But
+	 * this needs to be communicated with the branch filter mask.
 	 */
 
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_USER)
+		*bhrb_filter |= PERF_SAMPLE_BRANCH_USER;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)
+		*bhrb_filter |= PERF_SAMPLE_BRANCH_KERNEL;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_HV)
+		*bhrb_filter |= PERF_SAMPLE_BRANCH_HV;
+
 	/* Ignore user, kernel, hv bits */
 	branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
@@ -700,7 +710,6 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
 			if (branch_sample_type) {
 				/* Multiple filters will be processed in SW */
 				pmu_bhrb_filter = 0;
-				*bhrb_filter = 0;
 				return pmu_bhrb_filter;
 			} else {
 				/* Individual filter will be processed in HW */
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (7 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters Anshuman Khandual
@ 2015-06-08 11:38 ` Anshuman Khandual
  2015-06-09  5:41   ` Anshuman Khandual
  2015-06-11  2:09   ` Daniel Axtens
  2015-06-10  3:21 ` [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Daniel Axtens
  9 siblings, 2 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-08 11:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, sukadev, mikey

From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>

This patch adds a test for verifying that all the branch stack
sampling filters supported on powerpc works correctly. It also
adds some assembly helper functions in this regard. This patch
extends the generic event description to handle kernel mapped
ring buffers.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 tools/testing/selftests/powerpc/pmu/Makefile       |  11 +-
 tools/testing/selftests/powerpc/pmu/bhrb/Makefile  |  13 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.c      | 513 +++++++++++++++++++++
 .../selftests/powerpc/pmu/bhrb/bhrb_filters.h      |  16 +
 .../selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S  | 260 +++++++++++
 tools/testing/selftests/powerpc/pmu/event.h        |   5 +
 6 files changed, 816 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/Makefile
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
 create mode 100644 tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index a9099d9..2e103fd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -4,7 +4,7 @@ noarg:
 TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(TEST_PROGS) ebb
+all: $(TEST_PROGS) ebb bhrb
 
 $(TEST_PROGS): $(EXTRA_SOURCES)
 
@@ -18,25 +18,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
 	$(DEFAULT_RUN_TESTS)
 	$(MAKE) -C ebb run_tests
+	$(MAKE) -C bhrb run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
 	$(DEFAULT_EMIT_TESTS)
 	$(MAKE) -s -C ebb emit_tests
+	$(MAKE) -s -C bhrb emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
 	$(DEFAULT_INSTALL_RULE)
 	$(MAKE) -C ebb install
+	$(MAKE) -C bhrb install
 endef
 
 clean:
 	rm -f $(TEST_PROGS) loop.o
 	$(MAKE) -C ebb clean
+	$(MAKE) -C bhrb clean
 
 ebb:
 	$(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean ebb
+bhrb:
+	$(MAKE) -k -C $@ all
+
+.PHONY: all run_tests clean ebb bhrb
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/Makefile b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
new file mode 100644
index 0000000..61c032a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile
@@ -0,0 +1,13 @@
+noarg:
+	$(MAKE) -C ../../
+
+TEST_PROGS := bhrb_filters
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c bhrb_filters_asm.S
+
+include ../../../lib.mk
+
+clean:
+	rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
new file mode 100644
index 0000000..13e6b72
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
@@ -0,0 +1,513 @@
+/*
+ * BHRB filter test (HW & SW)
+ *
+ * Copyright 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <poll.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "bhrb_filters.h"
+#include "utils.h"
+#include "../event.h"
+#include "../lib.h"
+
+/* Fetched address counts */
+#define ALL_MAX		32
+#define CALL_MAX	12
+#define RET_MAX		10
+#define COND_MAX	8
+#define IND_MAX		4
+
+/* Test tunables */
+#define LOOP_COUNT	10
+#define SAMPLE_PERIOD	10000
+
+static int branch_sample_type;
+static int branch_test_set[27] = {
+		PERF_SAMPLE_BRANCH_ANY_CALL,		/* Single filters */
+		PERF_SAMPLE_BRANCH_ANY_RETURN,
+		PERF_SAMPLE_BRANCH_COND,
+		PERF_SAMPLE_BRANCH_IND_CALL,
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Double filters */
+		PERF_SAMPLE_BRANCH_ANY_RETURN,
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_COND,
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND,
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Tripple filters */
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Quadruple filters */
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY,
+
+		PERF_SAMPLE_BRANCH_ANY_CALL |		/* All filters */
+		PERF_SAMPLE_BRANCH_ANY_RETURN |
+		PERF_SAMPLE_BRANCH_COND |
+		PERF_SAMPLE_BRANCH_IND_CALL |
+		PERF_SAMPLE_BRANCH_ANY };
+
+static unsigned int all_set[ALL_MAX], call_set[CALL_MAX];
+static unsigned int ret_set[RET_MAX], cond_set[COND_MAX], ind_set[IND_MAX];
+
+static bool has_failed;
+static unsigned long branch_any_call;
+static unsigned long branch_any_return;
+static unsigned long branch_cond;
+static unsigned long branch_ind_call;
+static unsigned long branch_any;
+static unsigned long branch_total;
+
+static void init_branch_stats(void)
+{
+	branch_any_call = 0;
+	branch_any_return = 0;
+	branch_cond = 0;
+	branch_ind_call = 0;
+	branch_any = 0;
+	branch_total = 0;
+}
+
+static void show_branch_stats(void)
+{
+	printf("BRANCH STATS\n");
+	printf("ANY_CALL:	%ld\n", branch_any_call);
+	printf("ANY_RETURN:	%ld\n", branch_any_return);
+	printf("COND:		%ld\n", branch_cond);
+	printf("IND_CALL:	%ld\n", branch_ind_call);
+	printf("ANY:		%ld\n", branch_any);
+	printf("TOTAL:		%ld\n", branch_total);
+
+}
+
+static void fetch_branches(void)
+{
+	int i;
+
+	/* Clear */
+	memset(all_set, 0, sizeof(all_set));
+	memset(call_set, 0, sizeof(call_set));
+	memset(ret_set, 0, sizeof(ret_set));
+	memset(cond_set, 0, sizeof(cond_set));
+	memset(ind_set, 0, sizeof(ind_set));
+
+	/* Fetch */
+	fetch_all_branches(all_set);
+	fetch_all_calls(call_set);
+	fetch_all_rets(ret_set);
+	fetch_all_conds(cond_set);
+	fetch_all_inds(ind_set);
+
+	/* Display */
+	printf("ANY branches\n");
+	for (i = 0; i < ALL_MAX; i += 2)
+		printf("%x ---> %x\n", all_set[i], all_set[i + 1]);
+
+	printf("ANY_CALL branches\n");
+	for (i = 0; i < CALL_MAX; i += 2)
+		printf("%x ---> %x\n", call_set[i], call_set[i + 1]);
+
+	printf("ANY_RETURN branches\n");
+	for (i = 0; i < RET_MAX; i += 2)
+		printf("%x ---> %x\n", ret_set[i], ret_set[i + 1]);
+
+	printf("COND branches\n");
+	for (i = 0; i < COND_MAX; i += 2)
+		printf("%x ---> %x\n", cond_set[i], cond_set[i + 1]);
+
+	printf("IND_CALL branches\n");
+	for (i = 0; i < IND_MAX; i += 2)
+		printf("%x ---> %x\n", ind_set[i], ind_set[i + 1]);
+
+}
+
+/* Perf mmap stats */
+static unsigned long record_sample;
+static unsigned long record_mmap;
+static unsigned long record_lost;
+static unsigned long record_throttle;
+static unsigned long record_unthrottle;
+static unsigned long record_overlap;
+
+static void init_perf_mmap_stats(void)
+{
+	record_sample = 0;
+	record_mmap = 0;
+	record_lost = 0;
+	record_throttle = 0;
+	record_unthrottle = 0;
+	record_overlap = 0;
+}
+
+static void show_perf_mmap_stats(void)
+{
+	printf("PERF STATS\n");
+	printf("OVERLAP:		%ld\n", record_overlap);
+	printf("RECORD_SAMPLE:		%ld\n", record_sample);
+	printf("RECORD_MAP:		%ld\n", record_mmap);
+	printf("RECORD_LOST:		%ld\n", record_lost);
+	printf("RECORD_THROTTLE:	%ld\n", record_throttle);
+	printf("RECORD_UNTHROTTLE:	%ld\n", record_unthrottle);
+}
+
+static bool search_all_set(unsigned long from, unsigned long to)
+{
+	int i;
+
+	for (i = 0; i < ALL_MAX; i += 2) {
+		if ((all_set[i] == from) && (all_set[i+1] == to))
+			return true;
+	}
+	return false;
+}
+
+static bool search_call_set(unsigned long from, unsigned long to)
+{
+	int i;
+
+	for (i = 0; i < CALL_MAX; i += 2) {
+		if ((call_set[i] == from) && (call_set[i+1] == to))
+			return true;
+	}
+	return false;
+}
+
+static bool search_ret_set(unsigned long from, unsigned long to)
+{
+	int i;
+
+	for (i = 0; i < RET_MAX; i += 2) {
+		if ((ret_set[i] == from) && (ret_set[i+1] == to))
+			return true;
+	}
+	return false;
+}
+
+static bool search_cond_set(unsigned long from, unsigned long to)
+{
+	int i;
+
+	for (i = 0; i < COND_MAX; i += 2) {
+		if ((cond_set[i] == from) && (cond_set[i+1] == to))
+			return true;
+	}
+	return false;
+}
+
+static bool search_ind_set(unsigned long from, unsigned long to)
+{
+	int i;
+
+	for (i = 0; i < IND_MAX; i += 2) {
+		if ((ind_set[i] == from) && (ind_set[i+1] == to))
+			return true;
+	}
+	return false;
+}
+
+static int check_branch(unsigned long from, unsigned long to)
+{
+	bool result = false;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		if (search_call_set(from, to)) {
+			branch_any_call++;
+			result = true;
+		}
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) {
+		if (search_ret_set(from, to)) {
+			branch_any_return++;
+			result = true;
+		}
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
+		if (search_cond_set(from, to)) {
+			branch_cond++;
+			result = true;
+		}
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
+		if (search_ind_set(from, to)) {
+			branch_ind_call++;
+			result = true;
+		}
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
+		if (search_all_set(from, to)) {
+			branch_any++;
+			result = true;
+		}
+	}
+
+	branch_total++;
+	return result;
+}
+
+static void *ring_buffer_mask(struct ring_buffer *r, void *p)
+{
+	unsigned long l = (unsigned long)p;
+
+	return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
+}
+
+static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r)
+{
+	unsigned long from, to, flag;
+	int i, nr;
+	int64_t *v;
+
+	/* NR Branches */
+	v = ring_buffer_mask(r, hdr + 1);
+
+	nr = *v;
+
+	/* Branches */
+	for (i = 0; i < nr; i++) {
+		v = ring_buffer_mask(r, v + 1);
+		from = *v;
+
+		v = ring_buffer_mask(r, v + 1);
+		to = *v;
+
+		v = ring_buffer_mask(r, v + 1);
+		flag = *v;
+
+		if (!check_branch(from, to)) {
+			has_failed = true;
+			printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
+					branch_sample_type, from, to, flag);
+		}
+	}
+}
+
+static void read_ring_buffer(struct event *e)
+{
+	struct ring_buffer *r = &e->ring_buffer;
+	struct perf_event_header *hdr;
+	int old, head;
+
+	head = r->page->data_head & r->mask;
+
+	asm volatile ("sync": : :"memory");
+
+	old = r->page->data_tail & r->mask;
+
+	while (old != head) {
+		hdr = (struct perf_event_header *)(r->ring_base + old);
+
+		if ((old & r->mask) + hdr->size !=
+					((old + hdr->size) & r->mask))
+			++record_overlap;
+
+		if (hdr->type == PERF_RECORD_SAMPLE) {
+			++record_sample;
+			dump_sample(hdr, r);
+		}
+
+		if (hdr->type == PERF_RECORD_MMAP)
+			++record_mmap;
+
+		if (hdr->type == PERF_RECORD_LOST)
+			++record_lost;
+
+		if (hdr->type == PERF_RECORD_THROTTLE)
+			++record_throttle;
+
+		if (hdr->type == PERF_RECORD_UNTHROTTLE)
+			++record_unthrottle;
+
+		old = (old + hdr->size) & r->mask;
+	}
+	r->page->data_tail = old;
+}
+
+static void event_mmap(struct event *e)
+{
+	struct ring_buffer *r = &e->ring_buffer;
+
+	r->page = mmap(NULL, 9 * getpagesize(), PROT_READ |
+					PROT_WRITE, MAP_SHARED, e->fd, 0);
+
+	if (r->page == MAP_FAILED) {
+		r->page = NULL;
+		perror("mmap() failed");
+	}
+
+	r->mask = (8 * getpagesize()) - 1;
+	r->ring_base = (unsigned long)r->page + getpagesize();
+
+}
+
+static int filter_test(void)
+{
+	struct pollfd pollfd;
+	struct event ebhrb;
+	pid_t pid;
+	int ret, loop = 0;
+
+	has_failed = false;
+	pid = fork();
+	if (pid == -1) {
+		perror("fork() failed");
+		return 1;
+	}
+
+	/* Run child */
+	if (pid == 0) {
+		start_loop();
+		exit(0);
+	}
+
+	/* Prepare event */
+	event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS,
+				PERF_TYPE_HARDWARE, "insturctions");
+	ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+	ebhrb.attr.disabled = 1;
+	ebhrb.attr.mmap = 1;
+	ebhrb.attr.mmap_data = 1;
+	ebhrb.attr.sample_period = SAMPLE_PERIOD;
+	ebhrb.attr.exclude_user = 0;
+	ebhrb.attr.exclude_kernel = 1;
+	ebhrb.attr.exclude_hv = 1;
+	ebhrb.attr.branch_sample_type = branch_sample_type;
+
+	/* Open event */
+	event_open_with_pid(&ebhrb, pid);
+
+	/* Mmap ring buffer and enable event */
+	event_mmap(&ebhrb);
+	FAIL_IF(event_enable(&ebhrb));
+
+	/* Prepare polling */
+	pollfd.fd = ebhrb.fd;
+	pollfd.events = POLLIN;
+
+	for (loop = 0; loop < LOOP_COUNT; loop++) {
+		ret = poll(&pollfd, 1, -1);
+		if (ret == -1) {
+			perror("poll() failed");
+			return 1;
+		}
+		if (ret == 0) {
+			perror("poll() timeout");
+			return 1;
+		}
+		read_ring_buffer(&ebhrb);
+		if (has_failed)
+			return 1;
+	}
+
+	/* Disable and close event */
+	FAIL_IF(event_disable(&ebhrb));
+	event_close(&ebhrb);
+
+	/* Terminate child */
+	kill(pid, SIGKILL);
+	return 0;
+}
+
+static int  bhrb_filters_test(void)
+{
+	int i;
+
+	/* Fetch branches */
+	fetch_branches();
+	init_branch_stats();
+	init_perf_mmap_stats();
+
+	for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) {
+		branch_sample_type = branch_test_set[i];
+		if (filter_test())
+			return 1;
+	}
+
+	/* Show stats */
+	show_branch_stats();
+	show_perf_mmap_stats();
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(bhrb_filters_test, "bhrb_filters");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
new file mode 100644
index 0000000..072375a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Assembly routines */
+extern void fetch_all_branches(unsigned int *);
+extern void fetch_all_calls(unsigned int *);
+extern void fetch_all_rets(unsigned int *);
+extern void fetch_all_conds(unsigned int *);
+extern void fetch_all_inds(unsigned int *);
+extern void start_loop(void);
diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S
new file mode 100644
index 0000000..e72a585
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S
@@ -0,0 +1,260 @@
+/*
+ * Assembly functions for BHRB test
+ *
+ * Copyright 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define GLUE(a,b) a##b
+#define _GLOBAL(name) \
+       .section ".text"; \
+       .align 2 ; \
+       .globl name; \
+       .globl GLUE(.,name); \
+       .section ".opd","aw"; \
+name: \
+       .quad GLUE(.,name); \
+       .quad .TOC.@tocbase; \
+       .quad 0; \
+       .previous; \
+       .type GLUE(.,name),@function; \
+GLUE(.,name):
+
+#define LR_SAVE 19
+
+#define LOAD_ADDR(reg, label)			\
+	lis     reg,(label)@highest;		\
+	ori     reg,reg,(label)@higher;		\
+	rldicr  reg,reg,32,31;			\
+	oris    reg,reg,(label)@h;		\
+	ori     reg,reg,(label)@l;		\
+
+#define LOAD_LABEL(reg1, label1, reg2, label2)	\
+	lis     reg1,(label1)@highest;		\
+	ori     reg1,reg1,(label1)@higher;	\
+	rldicr  reg1,reg1,32,31;		\
+	oris    reg1,reg1,(label1)@h;		\
+	ori     reg1,reg1,(label1)@l;		\
+	lis     reg2,(label2)@highest;		\
+	ori     reg2,reg2,(label2)@higher;	\
+	rldicr  reg2,reg2,32,31;		\
+	oris    reg2,reg2,(label2)@h;		\
+	ori     reg2,reg2,(label2)@l;		\
+
+_GLOBAL(start_loop)
+label:
+	b label0			/* ANY */
+	blr				/* ANY_RETURN */
+label0:
+	b label1			/* ANY */
+
+label1:
+	b label2			/* ANY */
+
+label2:
+	b label3			/* ANY */
+
+label3:
+	mflr LR_SAVE
+	bl label4			/* ANY | ANY_CALL */
+	mtlr LR_SAVE
+	b start_loop			/* ANY */
+label4:
+	mflr LR_SAVE
+	li 20, 12
+	cmpi 3, 20, 12
+	bcl 12, 4 * cr3+2, label5	/* ANY | ANY_CALL | COND */
+	li 20, 12
+	cmpi 4, 20, 20
+	bcl 12, 4 * cr4+0, label5	/* ANY | ANY_CALL | COND */
+	LOAD_ADDR(20, label5)
+	mtctr 20
+	li 22, 10
+	cmpi 0, 22, 10
+	bcctrl 12, 4*cr0+2		/* ANY | NY_CALL | IND_CALL | COND */
+	LOAD_ADDR(20, label5)
+	mtlr 20
+	li      20, 10
+	cmpi    0, 20, 10
+	bclrl   12, 4*cr0+2		/* ANY | ANY_CALL | IND_CALL | COND */
+	mtlr LR_SAVE
+	blr				/* ANY | ANY_RETURN */
+
+label5:
+	blr				/* ANY | ANY_RETURN */
+
+_GLOBAL(fetch_all_branches)
+	LOAD_LABEL(20, label, 21, label0)
+	st 20, 0(3)
+	st 21, 4(3)
+
+	LOAD_LABEL(20, label0, 21, label1)
+	st 20, 8(3)
+	st 21, 12(3)
+
+	LOAD_LABEL(20, label1, 21, label2)
+	st 20, 16(3)
+	st 21, 20(3)
+
+	LOAD_LABEL(20, label2, 21, label3)
+	st 20, 24(3)
+	st 21, 28(3)
+
+	LOAD_LABEL(20, label3, 21, label4)
+	addi 20, 20, 4
+	st 20, 32(3)
+	st 21, 36(3)
+
+	LOAD_LABEL(20, label3, 21, label)
+	addi 20, 20, 12
+	st 20, 40(3)
+	st 21, 44(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 12
+	st 20, 48(3)
+	st 21, 52(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 24
+	st 20, 56(3)
+	st 21, 60(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 60
+	st 20, 64(3)
+	st 21, 68(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 96
+	st 20, 72(3)
+	st 21, 76(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 104
+	st 20, 80(3)
+	st 21, 84(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 88(3)
+	addi 21, 21, 16
+	st 21, 92(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 96(3)
+	addi 21, 21, 28
+	st 21, 100(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 104(3)
+	addi 21, 21, 64
+	st 21, 108(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 112(3)
+	addi 21, 21, 100
+	st 21, 116(3)
+
+	LOAD_LABEL(20, label4, 21, label3)
+	addi 20, 20, 104
+	st 20, 120(3)
+	addi 21, 21, 8
+	st 21, 124(3)
+	blr
+
+_GLOBAL(fetch_all_calls)
+	LOAD_LABEL(20, label3, 21, label4)
+	addi 20, 20, 4
+	st 20, 0(3)
+	st 21, 4(3)
+
+	LOAD_LABEL(20, label3, 21, label4)
+	addi 20, 20, 4
+	st 20, 8(3)
+	st 21, 12(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 12
+	st 20, 16(3)
+	st 21, 20(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 24
+	st 20, 24(3)
+	st 21, 28(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 60
+	st 20, 32(3)
+	st 21, 36(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 96
+	st 20, 40(3)
+	st 21, 44(3)
+	blr
+
+_GLOBAL(fetch_all_rets)
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 0(3)
+	addi 21, 21, 16
+	st 21, 4(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 8(3)
+	addi 21, 21, 28
+	st 21, 12(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 16(3)
+	addi 21, 21, 64
+	st 21, 20(3)
+
+	LOAD_LABEL(20, label5, 21, label4)
+	st 20, 24(3)
+	addi 21, 21, 100
+	st 21, 28(3)
+
+	LOAD_LABEL(20, label4, 21, label3)
+	addi 20, 20, 104
+	st 20, 32(3)
+	addi 21, 21, 8
+	st 21, 36(3)
+	blr
+
+_GLOBAL(fetch_all_conds)
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 12
+	st 20, 0(3)
+	st 21, 4(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 24
+	st 20, 8(3)
+	st 21, 12(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 60
+	st 20, 16(3)
+	st 21, 20(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 96
+	st 20, 24(3)
+	st 21, 28(3)
+	blr
+
+_GLOBAL(fetch_all_inds)
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 60
+	st 20, 0(3)
+	st 21, 4(3)
+
+	LOAD_LABEL(20, label4, 21, label5)
+	addi 20, 20, 96
+	st 20, 8(3)
+	st 21, 12(3)
+	blr
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index a0ea6b1..385b1c4 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -11,6 +11,10 @@
 
 #include "utils.h"
 
+struct ring_buffer {
+	struct perf_event_mmap_page *page;
+	unsigned long ring_base, old, mask;
+};
 
 struct event {
 	struct perf_event_attr attr;
@@ -22,6 +26,7 @@ struct event {
 		u64 running;
 		u64 enabled;
 	} result;
+	struct ring_buffer ring_buffer;
 };
 
 void event_init(struct event *e, u64 config);
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-08 11:38 ` [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW) Anshuman Khandual
@ 2015-06-09  5:41   ` Anshuman Khandual
  2015-06-11  2:09   ` Daniel Axtens
  1 sibling, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-09  5:41 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mikey, sukadev

On 06/08/2015 05:08 PM, Anshuman Khandual wrote:
> From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>

This should be "Anshuman Khandual <khandual@linux.vnet.ibm.com>" and it happened
to couple of other patches in this series as well. I believe it got messed up in
a test machine, will fix it next time around.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched
  2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
                   ` (8 preceding siblings ...)
  2015-06-08 11:38 ` [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW) Anshuman Khandual
@ 2015-06-10  3:21 ` Daniel Axtens
  2015-06-10 12:02   ` Anshuman Khandual
  9 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  3:21 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1748 bytes --]

Hi Anshuman,

Was there a cover letter for this series that I missed?

On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> BHRB is a rolling buffer. Hence we might end up in a situation where
Could you spell out what BHRB stands for?

> we have read one target address but when we try to read the next entry
>  indicating the from address of the targe, the buffer just overflows.
target?

> In this case, the captured from address will be zero which indicates
> the end of the buffer.
> 
In what sort of situations would this occur? It seems like something we
would want to avoid if possible?

> This patch drops the entire branch record which would have otherwise
> confused the user space tools.
> 
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/core-book3s.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 12b6384..c246e65 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -452,7 +452,6 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>  			 *    In this case we need to read the instruction from
>  			 *    memory to determine the target/to address.
>  			 */
> -
>  			if (val & BHRB_TARGET) {
>  				/* Target branches use two entries
>  				 * (ie. computed gotos/XL form)
> @@ -463,6 +462,8 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>  
>  				/* Get from address in next entry */
>  				val = read_bhrb(r_index++);
> +				if (!val)
> +					break;
>  				addr = val & BHRB_EA;
>  				if (val & BHRB_TARGET) {
>  					/* Shouldn't have two targets in a


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB
  2015-06-08 11:38 ` [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB Anshuman Khandual
@ 2015-06-10  3:43   ` Daniel Axtens
  2015-06-10 12:08     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  3:43 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 4559 bytes --]

In the subject line, privilege should only have 1 l, and I think it
should probably start with "powerpc/perf:" rather than "powerpc, perf:".

On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>
> 
> 'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
Does this need a 'Fixes:' tag then?

> broke the PMU based BHRB privilege level filter. BHRB depends on the
> same MMCR0 bits for privilege level filter which was used to freeze all
> the PMCs as a group. Once we moved to individual event based privilege
> filters through MMCR2 register on POWER8, event associated privilege
> filters are no longer applicable to the BHRB captured branches.
> 
> This patch solves the problem by restoring to the previous method of
> privilege level filters for the event in case BHRB based branch stack
> sampling is requested. This patch also changes 'check_excludes' for
> the same reason.
> 
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/core-book3s.c | 19 +++++++++++--------
>  1 file changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index c246e65..ae61629 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
>   * added events.
>   */
Does this comment need to be updated?
>  static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
> -			  int n_prev, int n_new)
> +			  int n_prev, int n_new, int bhrb_users)
>  {
>  	int eu = 0, ek = 0, eh = 0;
>  	int i, n, first;
> @@ -941,7 +941,7 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
>  	 * don't need to do any of this logic. NB. This assumes no PMU has both
>  	 * per event exclude and limited PMCs.
>  	 */
Likewise, does this comment need to be updated?
> -	if (ppmu->flags & PPMU_ARCH_207S)
> +	if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
>  		return 0;
>  
>  	n = n_prev + n_new;
> @@ -1259,7 +1259,7 @@ static void power_pmu_enable(struct pmu *pmu)
>  		goto out;
>  	}
>  
> -	if (!(ppmu->flags & PPMU_ARCH_207S)) {
> +	if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)
You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
you make the test more specific so that it's clear exactly what you're
expecting bhrb_users to contain?
>  {
>  		/*
>  		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
>  		 * bits for the first event. We have already checked that all
> @@ -1284,7 +1284,7 @@ static void power_pmu_enable(struct pmu *pmu)
>  	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
>  	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
>  				| MMCR0_FC);
> -	if (ppmu->flags & PPMU_ARCH_207S)
> +	if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
>  		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
>  
>  	/*
> @@ -1436,7 +1436,8 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
>  	if (cpuhw->group_flag & PERF_EVENT_TXN)
>  		goto nocheck;
>  
> -	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
> +	if (check_excludes(cpuhw->event, cpuhw->flags,
> +				n0, 1, cpuhw->bhrb_users))
>  		goto out;
>  	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
>  		goto out;
> @@ -1615,7 +1616,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
>  		return -EAGAIN;
>  	cpuhw = this_cpu_ptr(&cpu_hw_events);
>  	n = cpuhw->n_events;
> -	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
> +	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
>  		return -EAGAIN;
>  	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
>  	if (i < 0)
> @@ -1828,10 +1829,12 @@ static int power_pmu_event_init(struct perf_event *event)
>  	events[n] = ev;
>  	ctrs[n] = event;
>  	cflags[n] = flags;
> -	if (check_excludes(ctrs, cflags, n, 1))
> +	cpuhw = &get_cpu_var(cpu_hw_events);
Should this be using a this_cpu_ptr rather than a get_cpu_var? (as with
the power_pmu_commit_txn case?)
> +	if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
> +		put_cpu_var(cpu_hw_events);
Likewise with this?
>  		return -EINVAL;
> +	}
>  
> -	cpuhw = &get_cpu_var(cpu_hw_events);
>  	err = power_check_constraints(cpuhw, events, cflags, n + 1);
>  
>  	if (has_branch_stack(event)) {


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing
  2015-06-08 11:38 ` [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing Anshuman Khandual
@ 2015-06-10  4:36   ` Daniel Axtens
  2015-06-10 12:09     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  4:36 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1148 bytes --]


> +void update_branch_entry(struct cpu_hw_events *cpuhw,
> +			int index, u64 from, u64 to, int pred)
> +{
> +	cpuhw->bhrb_entries[index].from = from;
> +	cpuhw->bhrb_entries[index].to = to;
> +	cpuhw->bhrb_entries[index].mispred = pred;
> +	cpuhw->bhrb_entries[index].predicted = ~pred;
> +}

I realise you're copying existing code, but:
 - could you please rename pred? If we assign .mispred to pred
and .predicted to ~pred, we should pick a different name for pred.
 - I'm really uncomfortable with the bitwise inverting a signed integer.
Can you explain what is going on here? Looking at
include/uapi/linux/perf_event.h, this seems to be a single bit flag:
shouldn't this then be a logical flip rather than a bitwise one?
(Furthermore, looking at that header, why is pred an int at all? Why not
a bool?)

> +
>  /* Processing BHRB entries */
>  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>  {
> -	u64 val;
> -	u64 addr;
> +	u64 val, addr, tmp;
Please don't use 'tmp' here. As far as I can tell, you use this variable
to compute the 'to' address. The name should reflect that.

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8
  2015-06-08 11:38 ` [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8 Anshuman Khandual
@ 2015-06-10  5:07   ` Daniel Axtens
  2015-06-10 12:09     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  5:07 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 337 bytes --]


>  	/* Every thing else is unsupported */
>  	return -1;
You're returning -1 as a unsigned 64bit number. Other code that reads
this value tests for -1 and I think it works everywhere just because it
wraps around consistently. But I would still rather not do this and I'm
surprised it doesn't throw a warning.

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions
  2015-06-08 11:38 ` [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions Anshuman Khandual
@ 2015-06-10  5:33   ` Daniel Axtens
  2015-06-10 12:10     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  5:33 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1410 bytes --]


> +static int instr_is_branch_xlform(unsigned int instr)
> +{
> +	return branch_opcode(instr) == 19;
> +}
Why do these not return bool? The functions below do.
> +
> +static int is_xlform_lr(unsigned int instr)
> +{
> +	return (instr & XL_FORM_LR) == XL_FORM_LR;
> +}
> +
> +static int is_bo_always(unsigned int instr)
> +{
> +	return (instr & BO_ALWAYS) == BO_ALWAYS;
> +}
> +
> +static int is_branch_link_set(unsigned int instr)
> +{
> +	return (instr & BRANCH_SET_LINK) == BRANCH_SET_LINK;
> +}
> +
> +bool instr_is_return_branch(unsigned int instr)
> +{
> +	/*
> +	 * Conditional and unconditional branch to LR register
> +	 * without seting the link register.
setting
> +	 */
> +	if (is_xlform_lr(instr) && !is_branch_link_set(instr))
> +		return true;
> +
> +	return false;
> +}
> +




> +bool instr_is_func_call(unsigned int instr)
> +{
> +	/* LR should be set */
> +	if (is_branch_link_set(instr))
> +		return true;
> +
> +	return false;
> +}
> +
> +bool instr_is_indirect_func_call(unsigned int instr)
> +{
> +	/* XL-form instruction with LR set */
> +	if (instr_is_branch_xlform(instr) && is_branch_link_set(instr))
> +		return true;
> +
> +	return false;
> +}
Both of these functions could be made into a single 'return' statement,
right?
> +
>  int instr_is_relative_branch(unsigned int instr)
>  {
>  	if (instr & BRANCH_ABSOLUTE)


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters
  2015-06-08 11:38 ` [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters Anshuman Khandual
@ 2015-06-10  5:49   ` Daniel Axtens
  2015-06-10 12:10     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-10  5:49 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 3597 bytes --]

On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> The kernel now supports SW based branch filters for book3s systems with
> some specific requirements while dealing with HW supported branch filters
> in order to achieve overall OR semantics prevailing in perf branch stack
> sampling framework. This patch adapts the BHRB branch filter configuration
> to meet those protocols. POWER8 PMU can only handle one HW based branch
> filter request at any point of time. For all other combinations PMU will
> pass it on to the SW.
> 
> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/power8-pmu.c | 51 ++++++++++++++++++++++++++++++++++++------
>  1 file changed, 44 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
> index 5e17cb5..8fccf6c 100644
> --- a/arch/powerpc/perf/power8-pmu.c
> +++ b/arch/powerpc/perf/power8-pmu.c
> @@ -656,6 +656,16 @@ static int power8_generic_events[] = {
>  

This is, I think, the third time you've modified this function in this
patch series. I appreciate the fact that you're trying to keep logical
changes separate, but it seems to me like this change might be able to
be combined with patch 4, and given a single commit message that clearly
explains the complete scope of the changes.
>  static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
>  {
> +	u64 x, pmu_bhrb_filter;
> +
> +	pmu_bhrb_filter = 0;
> +	*bhrb_filter = 0;
> +
> +	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
> +		*bhrb_filter = PERF_SAMPLE_BRANCH_ANY;
> +		return pmu_bhrb_filter;
> +	}
> +
>  	/* BHRB and regular PMU events share the same privilege state
>  	 * filter configuration. BHRB is always recorded along with a
>  	 * regular PMU event. As the privilege state filter is handled
> @@ -666,15 +676,42 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
>  	/* Ignore user, kernel, hv bits */
>  	branch_sample_type &= ~PERF_SAMPLE_BRANCH_PLM_ALL;
>  
> -	/* No branch filter requested */
> -	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
> -		return 0;
> +	/*
> +	 * POWER8 does not support ORing of PMU HW branch filters. Hence
> +	 * if multiple branch filters are requested which may include filters
> +	 * supported in PMU, still go ahead and clear the PMU based HW branch
> +	 * filter component as in this case all the filters will be processed
> +	 * in SW.
> +	 */
>  
> -	if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
> -		return POWER8_MMCRA_IFM1;
> +	for_each_branch_sample_type(x) {
> +		/* Ignore privilege branch filters */
> +		if ((x == PERF_SAMPLE_BRANCH_USER)
> +			|| (x == PERF_SAMPLE_BRANCH_KERNEL)
> +				|| (x == PERF_SAMPLE_BRANCH_HV))
> +			continue;
> +
> +		if (!(branch_sample_type & x))
> +			continue;
> +
> +		/* Supported individual PMU branch filters */
> +		if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
> +			branch_sample_type &= ~PERF_SAMPLE_BRANCH_ANY_CALL;
> +			if (branch_sample_type) {
> +				/* Multiple filters will be processed in SW */
> +				pmu_bhrb_filter = 0;
> +				*bhrb_filter = 0;
> +				return pmu_bhrb_filter;
> +			} else {
> +				/* Individual filter will be processed in HW */
> +				pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
> +				*bhrb_filter    |= PERF_SAMPLE_BRANCH_ANY_CALL;
> +				return pmu_bhrb_filter;
> +			}
> +		}
> +	}
>  
> -	/* Every thing else is unsupported */
> -	return -1;
> +	return pmu_bhrb_filter;
>  }

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched
  2015-06-10  3:21 ` [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Daniel Axtens
@ 2015-06-10 12:02   ` Anshuman Khandual
  2015-06-11  2:22     ` Daniel Axtens
  0 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:02 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 08:51 AM, Daniel Axtens wrote:
> Hi Anshuman,
> 
> Was there a cover letter for this series that I missed?

This is the continuation (rebased and reworked) of the series
posted at https://lkml.org/lkml/2014/5/5/153 (which is V6). I
remember to have incremented the count for the re-send of the
first four patches of the series to Peter Z for generic review
which got pulled in last year. These patches here are the
remaining powerpc part of the original series. Will list down
the current changes as well next time around along with the new
ones.

> 
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> BHRB is a rolling buffer. Hence we might end up in a situation where
> Could you spell out what BHRB stands for?

Branch History Rolling Buffer, would you like to have that in the
commit message as well ?

> 
>> we have read one target address but when we try to read the next entry
>>  indicating the from address of the targe, the buffer just overflows.
> target?

Yeah its target address.

> 
>> In this case, the captured from address will be zero which indicates
>> the end of the buffer.
>>
> In what sort of situations would this occur? It seems like something we
> would want to avoid if possible?

Its not avoidable. During regular flow of branch recording, the HW would
have written both the records correctly but then the new ones came in and
we just happen to loose one of them causing this situation.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB
  2015-06-10  3:43   ` Daniel Axtens
@ 2015-06-10 12:08     ` Anshuman Khandual
  2015-06-11  3:28       ` Daniel Axtens
  0 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:08 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 09:13 AM, Daniel Axtens wrote:
> In the subject line, privilege should only have 1 l, and I think it
> should probably start with "powerpc/perf:" rather than "powerpc, perf:".

Will fix the typo here. Have been using "powerpc, perf:" format for some
time now :) Seems to be more cleaner compared to "powerpc/perf:" format.
But again its subjective.

 > > On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>
>>
>> 'commit 9de5cb0f6df8 ("powerpc/perf: Add per-event excludes on Power8")'
> Does this need a 'Fixes:' tag then?

Not really, it only fixes the BHRB privilege request cases not other
scenarios which are impacted by this previous commit.
 
> 
>> broke the PMU based BHRB privilege level filter. BHRB depends on the
>> same MMCR0 bits for privilege level filter which was used to freeze all
>> the PMCs as a group. Once we moved to individual event based privilege
>> filters through MMCR2 register on POWER8, event associated privilege
>> filters are no longer applicable to the BHRB captured branches.
>>
>> This patch solves the problem by restoring to the previous method of
>> privilege level filters for the event in case BHRB based branch stack
>> sampling is requested. This patch also changes 'check_excludes' for
>> the same reason.
>>
>> Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/perf/core-book3s.c | 19 +++++++++++--------
>>  1 file changed, 11 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index c246e65..ae61629 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -930,7 +930,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
>>   * added events.
>>   */
> Does this comment need to be updated?

Not really. The previous commit did not update it, hence this patch would
skip it as well.

>>  static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
>> -			  int n_prev, int n_new)
>> +			  int n_prev, int n_new, int bhrb_users)
>>  {
>>  	int eu = 0, ek = 0, eh = 0;
>>  	int i, n, first;
>> @@ -941,7 +941,7 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
>>  	 * don't need to do any of this logic. NB. This assumes no PMU has both
>>  	 * per event exclude and limited PMCs.
>>  	 */
> Likewise, does this comment need to be updated?

Yeah, will update it.

>> -	if (ppmu->flags & PPMU_ARCH_207S)
>> +	if ((ppmu->flags & PPMU_ARCH_207S) && !bhrb_users)
>>  		return 0;
>>  
>>  	n = n_prev + n_new;
>> @@ -1259,7 +1259,7 @@ static void power_pmu_enable(struct pmu *pmu)
>>  		goto out;
>>  	}
>>  
>> -	if (!(ppmu->flags & PPMU_ARCH_207S)) {
>> +	if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)

> You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
> you make the test more specific so that it's clear exactly what you're
> expecting bhrb_users to contain?

Using cpuhw->bhrb_users as a bool just verifies whether it contains
zero or non-zero value in it. The test seems to be doing that as
expected. But yes, we can move it as a nested conditional block as
well if that is better.

>>  {
>>  		/*
>>  		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
>>  		 * bits for the first event. We have already checked that all
>> @@ -1284,7 +1284,7 @@ static void power_pmu_enable(struct pmu *pmu)
>>  	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
>>  	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
>>  				| MMCR0_FC);
>> -	if (ppmu->flags & PPMU_ARCH_207S)
>> +	if ((ppmu->flags & PPMU_ARCH_207S) && !cpuhw->bhrb_users)
>>  		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
>>  
>>  	/*
>> @@ -1436,7 +1436,8 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
>>  	if (cpuhw->group_flag & PERF_EVENT_TXN)
>>  		goto nocheck;
>>  
>> -	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
>> +	if (check_excludes(cpuhw->event, cpuhw->flags,
>> +				n0, 1, cpuhw->bhrb_users))
>>  		goto out;
>>  	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
>>  		goto out;
>> @@ -1615,7 +1616,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
>>  		return -EAGAIN;
>>  	cpuhw = this_cpu_ptr(&cpu_hw_events);
>>  	n = cpuhw->n_events;
>> -	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
>> +	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n, cpuhw->bhrb_users))
>>  		return -EAGAIN;
>>  	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
>>  	if (i < 0)
>> @@ -1828,10 +1829,12 @@ static int power_pmu_event_init(struct perf_event *event)
>>  	events[n] = ev;
>>  	ctrs[n] = event;
>>  	cflags[n] = flags;
>> -	if (check_excludes(ctrs, cflags, n, 1))
>> +	cpuhw = &get_cpu_var(cpu_hw_events);
> Should this be using a this_cpu_ptr rather than a get_cpu_var? (as with
> the power_pmu_commit_txn case?)
>> +	if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
>> +		put_cpu_var(cpu_hw_events);
> Likewise with this?
>>  		return -EINVAL;
>> +	}
>>  
>> -	cpuhw = &get_cpu_var(cpu_hw_events);

This patch just moves the existing code couple of lines above without
changing it in any manner.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing
  2015-06-10  4:36   ` Daniel Axtens
@ 2015-06-10 12:09     ` Anshuman Khandual
  2015-06-11  3:32       ` Daniel Axtens
  0 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:09 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 10:06 AM, Daniel Axtens wrote:
> 
>> +void update_branch_entry(struct cpu_hw_events *cpuhw,
>> +			int index, u64 from, u64 to, int pred)
>> +{
>> +	cpuhw->bhrb_entries[index].from = from;
>> +	cpuhw->bhrb_entries[index].to = to;
>> +	cpuhw->bhrb_entries[index].mispred = pred;
>> +	cpuhw->bhrb_entries[index].predicted = ~pred;
>> +}
> 
> I realise you're copying existing code, but:
>  - could you please rename pred? If we assign .mispred to pred
> and .predicted to ~pred, we should pick a different name for pred.

Agreed.

>  - I'm really uncomfortable with the bitwise inverting a signed integer.
> Can you explain what is going on here? Looking at
> include/uapi/linux/perf_event.h, this seems to be a single bit flag:
> shouldn't this then be a logical flip rather than a bitwise one?
> (Furthermore, looking at that header, why is pred an int at all? Why not
> a bool?)

Agreed.

> 
>> +
>>  /* Processing BHRB entries */
>>  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>>  {
>> -	u64 val;
>> -	u64 addr;
>> +	u64 val, addr, tmp;
> Please don't use 'tmp' here. As far as I can tell, you use this variable
> to compute the 'to' address. The name should reflect that.

Agreed but then it will be a new preparatory patch at the beginning
of this patch series.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8
  2015-06-10  5:07   ` Daniel Axtens
@ 2015-06-10 12:09     ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:09 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 10:37 AM, Daniel Axtens wrote:
> 
>>  	/* Every thing else is unsupported */
>>  	return -1;
> You're returning -1 as a unsigned 64bit number. Other code that reads
> this value tests for -1 and I think it works everywhere just because it
> wraps around consistently. But I would still rather not do this and I'm
> surprised it doesn't throw a warning.

Thats the existing code which is going away with this patch series.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions
  2015-06-10  5:33   ` Daniel Axtens
@ 2015-06-10 12:10     ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:10 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 11:03 AM, Daniel Axtens wrote:
> 
>> +static int instr_is_branch_xlform(unsigned int instr)
>> +{
>> +	return branch_opcode(instr) == 19;
>> +}
> Why do these not return bool? The functions below do.

Yeah they can, will change it.

>> +
>> +bool instr_is_indirect_func_call(unsigned int instr)
>> +{
>> +	/* XL-form instruction with LR set */
>> +	if (instr_is_branch_xlform(instr) && is_branch_link_set(instr))
>> +		return true;
>> +
>> +	return false;
>> +}

> Both of these functions could be made into a single 'return' statement,
> right?

Yeah, right.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters
  2015-06-10  5:49   ` Daniel Axtens
@ 2015-06-10 12:10     ` Anshuman Khandual
  2015-06-11  3:38       ` Daniel Axtens
  0 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-10 12:10 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/10/2015 11:19 AM, Daniel Axtens wrote:
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> > The kernel now supports SW based branch filters for book3s systems with
>> > some specific requirements while dealing with HW supported branch filters
>> > in order to achieve overall OR semantics prevailing in perf branch stack
>> > sampling framework. This patch adapts the BHRB branch filter configuration
>> > to meet those protocols. POWER8 PMU can only handle one HW based branch
>> > filter request at any point of time. For all other combinations PMU will
>> > pass it on to the SW.
>> > 
>> > Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>> > ---
>> >  arch/powerpc/perf/power8-pmu.c | 51 ++++++++++++++++++++++++++++++++++++------
>> >  1 file changed, 44 insertions(+), 7 deletions(-)
>> > 
>> > diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
>> > index 5e17cb5..8fccf6c 100644
>> > --- a/arch/powerpc/perf/power8-pmu.c
>> > +++ b/arch/powerpc/perf/power8-pmu.c
>> > @@ -656,6 +656,16 @@ static int power8_generic_events[] = {
>> >  
> This is, I think, the third time you've modified this function in this
> patch series. I appreciate the fact that you're trying to keep logical
> changes separate, but it seems to me like this change might be able to
> be combined with patch 4, and given a single commit message that clearly
> explains the complete scope of the changes.

Here I have to disagree with you. The changes in this patch like PMU
should not handle multiple filter requests as it does not support the
OR semantic required in the protocol, the fact that we need to pass
on the entire branch filtering responsibility to the SW comes into
picture after we have enabled the SW branch filtering support in the
previous patch. So these changes have to follow that up logically and
sequentially in that order.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters
  2015-06-08 11:38 ` [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters Anshuman Khandual
@ 2015-06-11  1:19   ` Daniel Axtens
  2015-06-12  7:04     ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  1:19 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]

> 	if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
> +		flag = false;
Would it be possible to use a more meaningful name than flag? Perhaps
indicating what is it flagging?
> +
> +		if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
> +			if (to_plm == POWER_ADDR_USER)
> +				flag = true;
> +		}
> +
> +		if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
> +			if (to_plm == POWER_ADDR_KERNEL)
> +				flag = true;
> +		}
> +
> +		if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
> +			if (cpu_has_feature(CPU_FTR_HVMODE)) {
> +				if (to_plm == POWER_ADDR_KERNEL)
> +					flag = true;
> +			}
> +		}

Is there any reason these are nested ifs rather than &&s?

> +
> +		if (!flag)
> +			return false;
> +	}
> +

> @@ -700,7 +710,6 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
>  			if (branch_sample_type) {
>  				/* Multiple filters will be processed in SW */
>  				pmu_bhrb_filter = 0;
> -				*bhrb_filter = 0;
>  				return pmu_bhrb_filter;
>  			} else {
>  				/* Individual filter will be processed in HW */
What's the justification for the removal of this line? You added it in
the previous patch...

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-08 11:38 ` [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW) Anshuman Khandual
  2015-06-09  5:41   ` Anshuman Khandual
@ 2015-06-11  2:09   ` Daniel Axtens
  2015-06-12  7:02     ` Anshuman Khandual
  1 sibling, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  2:09 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 8981 bytes --]

Hi,

On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
> new file mode 100644
> index 0000000..13e6b72
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
> @@ -0,0 +1,513 @@
> +/*
> + * BHRB filter test (HW & SW)
> + *
> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */

This should also be gpl2 only.
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <signal.h>
> +#include <poll.h>
> +#include <sys/shm.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/mman.h>
> +
> +#include "bhrb_filters.h"
> +#include "utils.h"
> +#include "../event.h"
> +#include "../lib.h"
> +
> +/* Fetched address counts */
> +#define ALL_MAX		32
> +#define CALL_MAX	12
> +#define RET_MAX		10
> +#define COND_MAX	8
> +#define IND_MAX		4
> +
> +/* Test tunables */
> +#define LOOP_COUNT	10
> +#define SAMPLE_PERIOD	10000
> +
> +static int branch_sample_type;
> +static int branch_test_set[27] = {
Do you need to explicitly provide the count here?
> +		PERF_SAMPLE_BRANCH_ANY_CALL,		/* Single filters */
> +		PERF_SAMPLE_BRANCH_ANY_RETURN,
> +		PERF_SAMPLE_BRANCH_COND,
> +		PERF_SAMPLE_BRANCH_IND_CALL,
> +		PERF_SAMPLE_BRANCH_ANY,
> +

> +		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Tripple filters */
s/Tripple/Triple/
> +		PERF_SAMPLE_BRANCH_ANY_RETURN |
> +		PERF_SAMPLE_BRANCH_COND,
> +


> +
> +static void *ring_buffer_mask(struct ring_buffer *r, void *p)
Is this actually returning a mask? It looks more like it's calculating
an offset, and that seems to be how you use it below.
> +{
> +	unsigned long l = (unsigned long)p;
> +
> +	return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
> +}
That's a lot of casts, especially when you then load it into a int64_t
pointer below...
> +
> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r)
> +{
> +	unsigned long from, to, flag;
> +	int i, nr;
> +	int64_t *v;
> +
> +	/* NR Branches */
> +	v = ring_buffer_mask(r, hdr + 1);
...here. (and everywhere else I can see that you're using the
ring_buffer_mask function)
> +
> +	nr = *v;
You are dereferencing a int64_t pointer into an int. Should nr be an
int64_t? Or should v be a different pointer type?

> +
> +	/* Branches */
> +	for (i = 0; i < nr; i++) {
> +		v = ring_buffer_mask(r, v + 1);
> +		from = *v;
Now you're dereferencing an *int64_t into an unsigned long.
> +
> +		v = ring_buffer_mask(r, v + 1);
> +		to = *v;
> +
> +		v = ring_buffer_mask(r, v + 1);
> +		flag = *v;
> +
> +		if (!check_branch(from, to)) {
> +			has_failed = true;
> +			printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
> +					branch_sample_type, from, to, flag);
> +		}
> +	}
> +}
> +
> +static void read_ring_buffer(struct event *e)
> +{
> +	struct ring_buffer *r = &e->ring_buffer;
> +	struct perf_event_header *hdr;
> +	int old, head;
Why not tail and head?
> +
> +	head = r->page->data_head & r->mask;
> +
> +	asm volatile ("sync": : :"memory");
> +
> +	old = r->page->data_tail & r->mask;
> +
Can you explain the logic of syncing between reading head and tail? Is
there an expectation that head is not likely to change?

As a more general comment, what is sync trying to achieve? If you're
trying to synchronise something, what's the sync actually achieving? Is
there a corresponding memory barrier somewhere else? What race
conditions are you trying to guard against and does this actually guard
against them?

> +	while (old != head) {
> +		hdr = (struct perf_event_header *)(r->ring_base + old);
> +
> +		if ((old & r->mask) + hdr->size !=
> +					((old + hdr->size) & r->mask))
> +			++record_overlap;
> +
> +		if (hdr->type == PERF_RECORD_SAMPLE) {
> +			++record_sample;
> +			dump_sample(hdr, r);
> +		}
> +
> +		if (hdr->type == PERF_RECORD_MMAP)
> +			++record_mmap;
> +
> +		if (hdr->type == PERF_RECORD_LOST)
> +			++record_lost;
> +
> +		if (hdr->type == PERF_RECORD_THROTTLE)
> +			++record_throttle;
> +
> +		if (hdr->type == PERF_RECORD_UNTHROTTLE)
> +			++record_unthrottle;
> +
> +		old = (old + hdr->size) & r->mask;
> +	}
> +	r->page->data_tail = old;
What happens if data_tail moves while you're doing the loop?



> +static int filter_test(void)
> +{
> +	struct pollfd pollfd;
> +	struct event ebhrb;
> +	pid_t pid;
> +	int ret, loop = 0;
> +
> +	has_failed = false;
> +	pid = fork();
> +	if (pid == -1) {
> +		perror("fork() failed");
> +		return 1;
> +	}
> +
> +	/* Run child */
> +	if (pid == 0) {
> +		start_loop();
> +		exit(0);
> +	}
> +
> +	/* Prepare event */
> +	event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS,
> +				PERF_TYPE_HARDWARE, "insturctions");
Is instructions deliberately spelled incorrectly?

> +	ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
> +	ebhrb.attr.disabled = 1;
> +	ebhrb.attr.mmap = 1;
> +	ebhrb.attr.mmap_data = 1;
> +	ebhrb.attr.sample_period = SAMPLE_PERIOD;
> +	ebhrb.attr.exclude_user = 0;
> +	ebhrb.attr.exclude_kernel = 1;
> +	ebhrb.attr.exclude_hv = 1;
> +	ebhrb.attr.branch_sample_type = branch_sample_type;
> +
> +	/* Open event */
> +	event_open_with_pid(&ebhrb, pid);
> +
> +	/* Mmap ring buffer and enable event */
> +	event_mmap(&ebhrb);
> +	FAIL_IF(event_enable(&ebhrb));
> +
> +	/* Prepare polling */
> +	pollfd.fd = ebhrb.fd;
> +	pollfd.events = POLLIN;
> +
> +	for (loop = 0; loop < LOOP_COUNT; loop++) {
> +		ret = poll(&pollfd, 1, -1);
> +		if (ret == -1) {
> +			perror("poll() failed");
> +			return 1;
> +		}
> +		if (ret == 0) {
> +			perror("poll() timeout");
> +			return 1;
> +		}
> +		read_ring_buffer(&ebhrb);
> +		if (has_failed)
> +			return 1;
1) I don't see anything that sets has_failed after it's initalised.
2) Should these error cases also explicitly terminate the child? Do you
need something like this perhaps?

		if (ret == 0) {
			perror("poll() failed");
			goto err;
		}

		...
	
	}

	...

	return 0;

	err:
	kill(pid, SIGTERM); // maybe even sigkill in the error case?
	return 1;

> +	}
> +
> +	/* Disable and close event */
> +	FAIL_IF(event_disable(&ebhrb));
> +	event_close(&ebhrb);
Again, do these need to be replicated in the error path?
> +
> +	/* Terminate child */
> +	kill(pid, SIGKILL);
SIGKILL seems a bit harsh: wouldn't SIGTERM work?
> +	return 0;
> +}
> +
> +static int  bhrb_filters_test(void)
> +{
> +	int i;
> +
> +	/* Fetch branches */
> +	fetch_branches();
> +	init_branch_stats();
> +	init_perf_mmap_stats();
> +
> +	for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) {
> +		branch_sample_type = branch_test_set[i];
> +		if (filter_test())
Couldn't branch_sample_type be passed to filter_test as a parameter,
rather than as a global?
> +			return 1;
> +	}
> +


> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
> new file mode 100644
> index 0000000..072375a
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
> @@ -0,0 +1,16 @@
> +/*
> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
License again. (And in the other files in this patch.)


> +_GLOBAL(start_loop)
> +label:
> +	b label0			/* ANY */
> +	blr				/* ANY_RETURN */
> +label0:
> +	b label1			/* ANY */
> +
> +label1:
> +	b label2			/* ANY */
> +
> +label2:
> +	b label3			/* ANY */
> +
> +label3:
> +	mflr LR_SAVE
> +	bl label4			/* ANY | ANY_CALL */
> +	mtlr LR_SAVE
> +	b start_loop			/* ANY */
> +label4:
> +	mflr LR_SAVE
> +	li 20, 12
> +	cmpi 3, 20, 12
> +	bcl 12, 4 * cr3+2, label5	/* ANY | ANY_CALL | COND */
> +	li 20, 12
> +	cmpi 4, 20, 20
> +	bcl 12, 4 * cr4+0, label5	/* ANY | ANY_CALL | COND */
> +	LOAD_ADDR(20, label5)
> +	mtctr 20
> +	li 22, 10
> +	cmpi 0, 22, 10
> +	bcctrl 12, 4*cr0+2		/* ANY | NY_CALL | IND_CALL | COND */
> +	LOAD_ADDR(20, label5)
> +	mtlr 20
> +	li      20, 10
> +	cmpi    0, 20, 10
> +	bclrl   12, 4*cr0+2		/* ANY | ANY_CALL | IND_CALL | COND */
> +	mtlr LR_SAVE
> +	blr				/* ANY | ANY_RETURN */
> +
> +label5:
> +	blr				/* ANY | ANY_RETURN */
> +
Could these labels have more descriptive names?


Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched
  2015-06-10 12:02   ` Anshuman Khandual
@ 2015-06-11  2:22     ` Daniel Axtens
  0 siblings, 0 replies; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  2:22 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1731 bytes --]

Hi,

On Wed, 2015-06-10 at 17:32 +0530, Anshuman Khandual wrote:
> On 06/10/2015 08:51 AM, Daniel Axtens wrote:
> > Hi Anshuman,
> > 
> > Was there a cover letter for this series that I missed?
> 
> This is the continuation (rebased and reworked) of the series
> posted at https://lkml.org/lkml/2014/5/5/153 (which is V6). I
> remember to have incremented the count for the re-send of the
> first four patches of the series to Peter Z for generic review
> which got pulled in last year. These patches here are the
> remaining powerpc part of the original series. Will list down
> the current changes as well next time around along with the new
> ones.
> 
> > 
> > On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> >> BHRB is a rolling buffer. Hence we might end up in a situation where
> > Could you spell out what BHRB stands for?
> 
> Branch History Rolling Buffer, would you like to have that in the
> commit message as well ?
Yes please.
> 
> > 
> >> we have read one target address but when we try to read the next entry
> >>  indicating the from address of the targe, the buffer just overflows.
> > target?
> 
> Yeah its target address.
Ok, I was just trying to point out the spelling mistake above.
> 
> > 
> >> In this case, the captured from address will be zero which indicates
> >> the end of the buffer.
> >>
> > In what sort of situations would this occur? It seems like something we
> > would want to avoid if possible?
> 
> Its not avoidable. During regular flow of branch recording, the HW would
> have written both the records correctly but then the new ones came in and
> we just happen to loose one of them causing this situation.
OK.

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB
  2015-06-10 12:08     ` Anshuman Khandual
@ 2015-06-11  3:28       ` Daniel Axtens
  2015-06-12  7:06         ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  3:28 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 1534 bytes --]


> >>  
> >> -	if (!(ppmu->flags & PPMU_ARCH_207S)) {
> >> +	if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)
> 
> > You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
> > you make the test more specific so that it's clear exactly what you're
> > expecting bhrb_users to contain?
> 
> Using cpuhw->bhrb_users as a bool just verifies whether it contains
> zero or non-zero value in it. The test seems to be doing that as
> expected. But yes, we can move it as a nested conditional block as
> well if that is better.
> 

What I meant was, should this read (cpuhw->bhrb_users != 0)? Because
bhrb_users in check_excludes() is a signed int, I also wanted to make
sure it shouldn't be a test for bhrb_users > 0 instead. (Also, if
bhrb_users is always positive, should it be an unsigned int?)

I don't think a nested conditional would be better. 



> >> -	if (check_excludes(ctrs, cflags, n, 1))
> >> +	cpuhw = &get_cpu_var(cpu_hw_events);
> > Should this be using a this_cpu_ptr rather than a get_cpu_var? (as with
> > the power_pmu_commit_txn case?)
> >> +	if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
> >> +		put_cpu_var(cpu_hw_events);
> > Likewise with this?
> >>  		return -EINVAL;
> >> +	}
> >>  
> >> -	cpuhw = &get_cpu_var(cpu_hw_events);
> 
> This patch just moves the existing code couple of lines above without
> changing it in any manner.
> 
I see that, but I still think you should take this opportunity to
improve it.

Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing
  2015-06-10 12:09     ` Anshuman Khandual
@ 2015-06-11  3:32       ` Daniel Axtens
  2015-06-12  7:05         ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  3:32 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 682 bytes --]

> >> +
> >>  /* Processing BHRB entries */
> >>  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
> >>  {
> >> -	u64 val;
> >> -	u64 addr;
> >> +	u64 val, addr, tmp;
> > Please don't use 'tmp' here. As far as I can tell, you use this variable
> > to compute the 'to' address. The name should reflect that.
> 
> Agreed but then it will be a new preparatory patch at the beginning
> of this patch series.
> 
I don't think I understand what you're saying here. Why do you need a
new patch? As I understand it, you've introduced 'tmp' in this patch;
couldn't you just rename it to, for example, to_addr, instead of tmp in
this patch?
-- 
Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters
  2015-06-10 12:10     ` Anshuman Khandual
@ 2015-06-11  3:38       ` Daniel Axtens
  0 siblings, 0 replies; 35+ messages in thread
From: Daniel Axtens @ 2015-06-11  3:38 UTC (permalink / raw)
  To: Anshuman Khandual; +Cc: linuxppc-dev, mikey, sukadev

[-- Attachment #1: Type: text/plain, Size: 2281 bytes --]

On Wed, 2015-06-10 at 17:40 +0530, Anshuman Khandual wrote:
> On 06/10/2015 11:19 AM, Daniel Axtens wrote:
> > On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> >> > The kernel now supports SW based branch filters for book3s systems with
> >> > some specific requirements while dealing with HW supported branch filters
> >> > in order to achieve overall OR semantics prevailing in perf branch stack
> >> > sampling framework. This patch adapts the BHRB branch filter configuration
> >> > to meet those protocols. POWER8 PMU can only handle one HW based branch
> >> > filter request at any point of time. For all other combinations PMU will
> >> > pass it on to the SW.
> >> > 
> >> > Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> >> > ---
> >> >  arch/powerpc/perf/power8-pmu.c | 51 ++++++++++++++++++++++++++++++++++++------
> >> >  1 file changed, 44 insertions(+), 7 deletions(-)
> >> > 
> >> > diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
> >> > index 5e17cb5..8fccf6c 100644
> >> > --- a/arch/powerpc/perf/power8-pmu.c
> >> > +++ b/arch/powerpc/perf/power8-pmu.c
> >> > @@ -656,6 +656,16 @@ static int power8_generic_events[] = {
> >> >  
> > This is, I think, the third time you've modified this function in this
> > patch series. I appreciate the fact that you're trying to keep logical
> > changes separate, but it seems to me like this change might be able to
> > be combined with patch 4, and given a single commit message that clearly
> > explains the complete scope of the changes.
> 
> Here I have to disagree with you. The changes in this patch like PMU
> should not handle multiple filter requests as it does not support the
> OR semantic required in the protocol, the fact that we need to pass
> on the entire branch filtering responsibility to the SW comes into
> picture after we have enabled the SW branch filtering support in the
> previous patch. So these changes have to follow that up logically and
> sequentially in that order.
> 
OK. I don't think I understand the patch set quite well enough to follow
your logic, but when you send out the next version I'll try to take a
closer look at how the series fits together as a whole.

-- 
Regards,
Daniel

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 860 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-11  2:09   ` Daniel Axtens
@ 2015-06-12  7:02     ` Anshuman Khandual
  2015-06-12  7:26       ` Madhavan Srinivasan
  0 siblings, 1 reply; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-12  7:02 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/11/2015 07:39 AM, Daniel Axtens wrote:
> Hi,
> 
> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>> new file mode 100644
>> index 0000000..13e6b72
>> --- /dev/null
>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>> @@ -0,0 +1,513 @@
>> +/*
>> + * BHRB filter test (HW & SW)
>> + *
>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * as published by the Free Software Foundation; either version
>> + * 2 of the License, or (at your option) any later version.
>> + */
> 
> This should also be gpl2 only.

Why ? Any special reason ? I can see similar existing statements here
in this file as well "powerpcC/primitives/load_unaligned_zeropad.c"

>> +#include <unistd.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <signal.h>
>> +#include <poll.h>
>> +#include <sys/shm.h>
>> +#include <sys/types.h>
>> +#include <sys/wait.h>
>> +#include <sys/mman.h>
>> +
>> +#include "bhrb_filters.h"
>> +#include "utils.h"
>> +#include "../event.h"
>> +#include "../lib.h"
>> +
>> +/* Fetched address counts */
>> +#define ALL_MAX		32
>> +#define CALL_MAX	12
>> +#define RET_MAX		10
>> +#define COND_MAX	8
>> +#define IND_MAX		4
>> +
>> +/* Test tunables */
>> +#define LOOP_COUNT	10
>> +#define SAMPLE_PERIOD	10000
>> +
>> +static int branch_sample_type;
>> +static int branch_test_set[27] = {
> Do you need to explicitly provide the count here?

Not really, will fix it.

>> +		PERF_SAMPLE_BRANCH_ANY_CALL,		/* Single filters */
>> +		PERF_SAMPLE_BRANCH_ANY_RETURN,
>> +		PERF_SAMPLE_BRANCH_COND,
>> +		PERF_SAMPLE_BRANCH_IND_CALL,
>> +		PERF_SAMPLE_BRANCH_ANY,
>> +
> 
>> +		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Tripple filters */
> s/Tripple/Triple/

Sure.will fix it.

>> +		PERF_SAMPLE_BRANCH_ANY_RETURN |
>> +		PERF_SAMPLE_BRANCH_COND,
>> +
> 
> 
>> +
>> +static void *ring_buffer_mask(struct ring_buffer *r, void *p)

> Is this actually returning a mask? It looks more like it's calculating
> an offset, and that seems to be how you use it below.

Yeah it does calculate an offset. Will change the function name to
ring_buffer_offset instead.

>> +{
>> +	unsigned long l = (unsigned long)p;
>> +
>> +	return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
>> +}
> That's a lot of casts, especially when you then load it into a int64_t
> pointer below...

Will it cause any problem ? I can return here int64_t * instead to void *
to match the receiving pointer.

>> +
>> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r)
>> +{
>> +	unsigned long from, to, flag;
>> +	int i, nr;
>> +	int64_t *v;
>> +
>> +	/* NR Branches */
>> +	v = ring_buffer_mask(r, hdr + 1);
> ...here. (and everywhere else I can see that you're using the
> ring_buffer_mask function)
>> +
>> +	nr = *v;
> You are dereferencing a int64_t pointer into an int. Should nr be an
> int64_t? Or should v be a different pointer type?

hmm, int64_t sounds good.

> 
>> +
>> +	/* Branches */
>> +	for (i = 0; i < nr; i++) {
>> +		v = ring_buffer_mask(r, v + 1);
>> +		from = *v;
> Now you're dereferencing an *int64_t into an unsigned long.

Just wanted to have 64 bits for that field. To achieve some uniformity
will change all of v, nr, from, to, flags as int64_t type variables.
Also will make ring_buffer_mask function return in64_t * pointer type
instead. Will change ring_buffer_mask into ring_buffer_offset as well. 

>> +
>> +		v = ring_buffer_mask(r, v + 1);
>> +		to = *v;
>> +
>> +		v = ring_buffer_mask(r, v + 1);
>> +		flag = *v;
>> +
>> +		if (!check_branch(from, to)) {
>> +			has_failed = true;
>> +			printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
>> +					branch_sample_type, from, to, flag);
>> +		}
>> +	}
>> +}
>> +
>> +static void read_ring_buffer(struct event *e)
>> +{
>> +	struct ring_buffer *r = &e->ring_buffer;
>> +	struct perf_event_header *hdr;
>> +	int old, head;
> Why not tail and head?

Sure, will change it.

>> +
>> +	head = r->page->data_head & r->mask;
>> +
>> +	asm volatile ("sync": : :"memory");
>> +
>> +	old = r->page->data_tail & r->mask;
>> +
> Can you explain the logic of syncing between reading head and tail? Is
> there an expectation that head is not likely to change?
> 
> As a more general comment, what is sync trying to achieve? If you're
> trying to synchronise something, what's the sync actually achieving? Is
> there a corresponding memory barrier somewhere else? What race
> conditions are you trying to guard against and does this actually guard
> against them?

Once we wake up from poll and try to read the ring buffer, head is not
going to change. We keep increment the tail and process the record till
we dont reach the head position. Once there we just write head into the
data_tail to inform kernel that the processing is over and kernel may
start filling up the ring buffer again. For more details , please look
into this file include/uapi/linux/perf_event.h (perf_event_mmap_page).

        /*
         * Control data for the mmap() data buffer.
         *
         * User-space reading the @data_head value should issue an smp_rmb(),
         * after reading this value.
         *
         * When the mapping is PROT_WRITE the @data_tail value should be
         * written by userspace to reflect the last read data, after issueing
         * an smp_mb() to separate the data read from the ->data_tail store.
         * In this case the kernel will not over-write unread data.

> 
>> +	while (old != head) {
>> +		hdr = (struct perf_event_header *)(r->ring_base + old);
>> +
>> +		if ((old & r->mask) + hdr->size !=
>> +					((old + hdr->size) & r->mask))
>> +			++record_overlap;
>> +
>> +		if (hdr->type == PERF_RECORD_SAMPLE) {
>> +			++record_sample;
>> +			dump_sample(hdr, r);
>> +		}
>> +
>> +		if (hdr->type == PERF_RECORD_MMAP)
>> +			++record_mmap;
>> +
>> +		if (hdr->type == PERF_RECORD_LOST)
>> +			++record_lost;
>> +
>> +		if (hdr->type == PERF_RECORD_THROTTLE)
>> +			++record_throttle;
>> +
>> +		if (hdr->type == PERF_RECORD_UNTHROTTLE)
>> +			++record_unthrottle;
>> +
>> +		old = (old + hdr->size) & r->mask;
>> +	}
>> +	r->page->data_tail = old;

> What happens if data_tail moves while you're doing the loop?

I believe that is not possible. Kernel wont change it unless we
we write head position into that after processing entire data.

> 
> 
> 
>> +static int filter_test(void)
>> +{
>> +	struct pollfd pollfd;
>> +	struct event ebhrb;
>> +	pid_t pid;
>> +	int ret, loop = 0;
>> +
>> +	has_failed = false;
>> +	pid = fork();
>> +	if (pid == -1) {
>> +		perror("fork() failed");
>> +		return 1;
>> +	}
>> +
>> +	/* Run child */
>> +	if (pid == 0) {
>> +		start_loop();
>> +		exit(0);
>> +	}
>> +
>> +	/* Prepare event */
>> +	event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS,
>> +				PERF_TYPE_HARDWARE, "insturctions");
> Is instructions deliberately spelled incorrectly?

:) No, its a mistake. Will fix it.

>> +	ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
>> +	ebhrb.attr.disabled = 1;
>> +	ebhrb.attr.mmap = 1;
>> +	ebhrb.attr.mmap_data = 1;
>> +	ebhrb.attr.sample_period = SAMPLE_PERIOD;
>> +	ebhrb.attr.exclude_user = 0;
>> +	ebhrb.attr.exclude_kernel = 1;
>> +	ebhrb.attr.exclude_hv = 1;
>> +	ebhrb.attr.branch_sample_type = branch_sample_type;
>> +
>> +	/* Open event */
>> +	event_open_with_pid(&ebhrb, pid);
>> +
>> +	/* Mmap ring buffer and enable event */
>> +	event_mmap(&ebhrb);
>> +	FAIL_IF(event_enable(&ebhrb));
>> +
>> +	/* Prepare polling */
>> +	pollfd.fd = ebhrb.fd;
>> +	pollfd.events = POLLIN;
>> +
>> +	for (loop = 0; loop < LOOP_COUNT; loop++) {
>> +		ret = poll(&pollfd, 1, -1);
>> +		if (ret == -1) {
>> +			perror("poll() failed");
>> +			return 1;
>> +		}
>> +		if (ret == 0) {
>> +			perror("poll() timeout");
>> +			return 1;
>> +		}
>> +		read_ring_buffer(&ebhrb);
>> +		if (has_failed)
>> +			return 1;
> 1) I don't see anything that sets has_failed after it's initalised.

Its initialized to 'false' in the beginning of each test and would be
set to 'true' inside dump_sample function if any of the captured branches
does not match the applied filter.

> 2) Should these error cases also explicitly terminate the child? Do you
> need something like this perhaps?
> 
> 		if (ret == 0) {
> 			perror("poll() failed");
> 			goto err;
> 		}
> 
> 		...
> 	
> 	}
> 
> 	...
> 
> 	return 0;
> 
> 	err:
> 	kill(pid, SIGTERM); // maybe even sigkill in the error case?
> 	return 1;

Right, will change it.

>> +	}
>> +
>> +	/* Disable and close event */
>> +	FAIL_IF(event_disable(&ebhrb));
>> +	event_close(&ebhrb);
> Again, do these need to be replicated in the error path?

Right, will change it.

>> +
>> +	/* Terminate child */
>> +	kill(pid, SIGKILL);
> SIGKILL seems a bit harsh: wouldn't SIGTERM work?
>> +	return 0;
>> +}
>> +
>> +static int  bhrb_filters_test(void)
>> +{
>> +	int i;
>> +
>> +	/* Fetch branches */
>> +	fetch_branches();
>> +	init_branch_stats();
>> +	init_perf_mmap_stats();
>> +
>> +	for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) {
>> +		branch_sample_type = branch_test_set[i];
>> +		if (filter_test())
> Couldn't branch_sample_type be passed to filter_test as a parameter,
> rather than as a global?

Yeah it can be. Will change it.

>> +			return 1;
>> +	}
>> +
> 
> 
>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
>> new file mode 100644
>> index 0000000..072375a
>> --- /dev/null
>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
>> @@ -0,0 +1,16 @@
>> +/*
>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * as published by the Free Software Foundation; either version
>> + * 2 of the License, or (at your option) any later version.
> License again. (And in the other files in this patch.)
> 
> 
>> +_GLOBAL(start_loop)
>> +label:
>> +	b label0			/* ANY */
>> +	blr				/* ANY_RETURN */
>> +label0:
>> +	b label1			/* ANY */
>> +
>> +label1:
>> +	b label2			/* ANY */
>> +
>> +label2:
>> +	b label3			/* ANY */
>> +
>> +label3:
>> +	mflr LR_SAVE
>> +	bl label4			/* ANY | ANY_CALL */
>> +	mtlr LR_SAVE
>> +	b start_loop			/* ANY */
>> +label4:
>> +	mflr LR_SAVE
>> +	li 20, 12
>> +	cmpi 3, 20, 12
>> +	bcl 12, 4 * cr3+2, label5	/* ANY | ANY_CALL | COND */
>> +	li 20, 12
>> +	cmpi 4, 20, 20
>> +	bcl 12, 4 * cr4+0, label5	/* ANY | ANY_CALL | COND */
>> +	LOAD_ADDR(20, label5)
>> +	mtctr 20
>> +	li 22, 10
>> +	cmpi 0, 22, 10
>> +	bcctrl 12, 4*cr0+2		/* ANY | NY_CALL | IND_CALL | COND */
>> +	LOAD_ADDR(20, label5)
>> +	mtlr 20
>> +	li      20, 10
>> +	cmpi    0, 20, 10
>> +	bclrl   12, 4*cr0+2		/* ANY | ANY_CALL | IND_CALL | COND */
>> +	mtlr LR_SAVE
>> +	blr				/* ANY | ANY_RETURN */
>> +
>> +label5:
>> +	blr				/* ANY | ANY_RETURN */
>> +
> Could these labels have more descriptive names?

Initially I had one label with same numeric numbering for each kind of
branch instruction it was intended for but then it seemed to be overkill
for that purpose. Compacted them to accommodate more branches per label
and here we are. What kind of descriptive names will each of these
label assume when each one accommodates multiple branches now ? Any
thoughts ? Though I am willing to change them.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters
  2015-06-11  1:19   ` Daniel Axtens
@ 2015-06-12  7:04     ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-12  7:04 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/11/2015 06:49 AM, Daniel Axtens wrote:
>> 	if (sw_filter & PERF_SAMPLE_BRANCH_PLM_ALL) {
>> +		flag = false;
> Would it be possible to use a more meaningful name than flag? Perhaps
> indicating what is it flagging?

sure, will change it with "select_branch"

>> +
>> +		if (sw_filter & PERF_SAMPLE_BRANCH_USER) {
>> +			if (to_plm == POWER_ADDR_USER)
>> +				flag = true;
>> +		}
>> +
>> +		if (sw_filter & PERF_SAMPLE_BRANCH_KERNEL) {
>> +			if (to_plm == POWER_ADDR_KERNEL)
>> +				flag = true;
>> +		}
>> +
>> +		if (sw_filter & PERF_SAMPLE_BRANCH_HV) {
>> +			if (cpu_has_feature(CPU_FTR_HVMODE)) {
>> +				if (to_plm == POWER_ADDR_KERNEL)
>> +					flag = true;
>> +			}
>> +		}
> 
> Is there any reason these are nested ifs rather than &&s?

No reason as such, will change it.

> 
>> +
>> +		if (!flag)
>> +			return false;
>> +	}
>> +
> 
>> @@ -700,7 +710,6 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type, u64 *bhrb_filter)
>>  			if (branch_sample_type) {
>>  				/* Multiple filters will be processed in SW */
>>  				pmu_bhrb_filter = 0;
>> -				*bhrb_filter = 0;
>>  				return pmu_bhrb_filter;
>>  			} else {
>>  				/* Individual filter will be processed in HW */
> What's the justification for the removal of this line? You added it in
> the previous patch...

Previously PMU passed the entire branch processing to SW by indicating
0 in the bhrb_filter mask although it was handling the privilege level
requests received from the normal PMU event. SW just ignored privilege
level requests while figuring out what other filters which need to be
processed for each captured branch.

Now that we support privilege level SW branch filters, PMU needs to
exclusively inform SW about it, so that SW does not do the processing
itself assuming its not already taken care of. That is the reason why
we removed the above statement and added this code block here instead.

        if (branch_sample_type & PERF_SAMPLE_BRANCH_USER)
                *bhrb_filter |= PERF_SAMPLE_BRANCH_USER;

        if (branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)
                *bhrb_filter |= PERF_SAMPLE_BRANCH_KERNEL;

        if (branch_sample_type & PERF_SAMPLE_BRANCH_HV)
                *bhrb_filter |= PERF_SAMPLE_BRANCH_HV;

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing
  2015-06-11  3:32       ` Daniel Axtens
@ 2015-06-12  7:05         ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-12  7:05 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/11/2015 09:02 AM, Daniel Axtens wrote:
>>>> +
>>>>  /* Processing BHRB entries */
>>>>  static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
>>>>  {
>>>> -	u64 val;
>>>> -	u64 addr;
>>>> +	u64 val, addr, tmp;
>>> Please don't use 'tmp' here. As far as I can tell, you use this variable
>>> to compute the 'to' address. The name should reflect that.
>>
>> Agreed but then it will be a new preparatory patch at the beginning
>> of this patch series.
>>
> I don't think I understand what you're saying here. Why do you need a
> new patch? As I understand it, you've introduced 'tmp' in this patch;
> couldn't you just rename it to, for example, to_addr, instead of tmp in
> this patch?

Sorry for the confusion, I meant separate patch for the other
two changes I had agreed to (i.e changing the name and type of
'pred' variable) as suggested in the previous mail not for this
one. Will change 'tmp' into 'to_addr' in this patch itself.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB
  2015-06-11  3:28       ` Daniel Axtens
@ 2015-06-12  7:06         ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-12  7:06 UTC (permalink / raw)
  To: Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/11/2015 08:58 AM, Daniel Axtens wrote:
> 
>>>>  
>>>> -	if (!(ppmu->flags & PPMU_ARCH_207S)) {
>>>> +	if (!(ppmu->flags & PPMU_ARCH_207S) || cpuhw->bhrb_users)
>>
>>> You're using cpuhw->bhrb_users as a bool here, where it's an int. Could
>>> you make the test more specific so that it's clear exactly what you're
>>> expecting bhrb_users to contain?
>>
>> Using cpuhw->bhrb_users as a bool just verifies whether it contains
>> zero or non-zero value in it. The test seems to be doing that as
>> expected. But yes, we can move it as a nested conditional block as
>> well if that is better.
>>
> 
> What I meant was, should this read (cpuhw->bhrb_users != 0)? Because
> bhrb_users in check_excludes() is a signed int, I also wanted to make
> sure it shouldn't be a test for bhrb_users > 0 instead. (Also, if
> bhrb_users is always positive, should it be an unsigned int?)

Will replace both the conditional checks in comparison against 0.
Will change the data type of bhrb_users into unsigned int as well.

> 
> I don't think a nested conditional would be better. 

Okay.

> 
> 
> 
>>>> -	if (check_excludes(ctrs, cflags, n, 1))
>>>> +	cpuhw = &get_cpu_var(cpu_hw_events);
>>> Should this be using a this_cpu_ptr rather than a get_cpu_var? (as with
>>> the power_pmu_commit_txn case?)
>>>> +	if (check_excludes(ctrs, cflags, n, 1, cpuhw->bhrb_users)) {
>>>> +		put_cpu_var(cpu_hw_events);
>>> Likewise with this?
>>>>  		return -EINVAL;
>>>> +	}
>>>>  
>>>> -	cpuhw = &get_cpu_var(cpu_hw_events);
>>
>> This patch just moves the existing code couple of lines above without
>> changing it in any manner.
>>
> I see that, but I still think you should take this opportunity to
> improve it.

Will try to change it in a separate patch.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-12  7:02     ` Anshuman Khandual
@ 2015-06-12  7:26       ` Madhavan Srinivasan
  2015-06-12  8:59         ` Anshuman Khandual
  0 siblings, 1 reply; 35+ messages in thread
From: Madhavan Srinivasan @ 2015-06-12  7:26 UTC (permalink / raw)
  To: Anshuman Khandual, Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev



On Friday 12 June 2015 12:32 PM, Anshuman Khandual wrote:
> On 06/11/2015 07:39 AM, Daniel Axtens wrote:
>> Hi,
>>
>> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>> new file mode 100644
>>> index 0000000..13e6b72
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>> @@ -0,0 +1,513 @@
>>> +/*
>>> + * BHRB filter test (HW & SW)
>>> + *
>>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU General Public License
>>> + * as published by the Free Software Foundation; either version
>>> + * 2 of the License, or (at your option) any later version.
>>> + */
>> This should also be gpl2 only.
> Why ? Any special reason ? I can see similar existing statements here
> in this file as well "powerpcC/primitives/load_unaligned_zeropad.c"
For the new files, mpe suggested to use gpl2 only version of the license.

 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License version 2 as published
 by the Free Software Foundation.
 
Also, preferred format for Copyright line is to have "(C)" next to word
Copyright

   
http://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/tree/COPYING#n9

Maddy
>>> +#include <unistd.h>
>>> +#include <stdio.h>
>>> +#include <stdlib.h>
>>> +#include <signal.h>
>>> +#include <poll.h>
>>> +#include <sys/shm.h>
>>> +#include <sys/types.h>
>>> +#include <sys/wait.h>
>>> +#include <sys/mman.h>
>>> +
>>> +#include "bhrb_filters.h"
>>> +#include "utils.h"
>>> +#include "../event.h"
>>> +#include "../lib.h"
>>> +
>>> +/* Fetched address counts */
>>> +#define ALL_MAX		32
>>> +#define CALL_MAX	12
>>> +#define RET_MAX		10
>>> +#define COND_MAX	8
>>> +#define IND_MAX		4
>>> +
>>> +/* Test tunables */
>>> +#define LOOP_COUNT	10
>>> +#define SAMPLE_PERIOD	10000
>>> +
>>> +static int branch_sample_type;
>>> +static int branch_test_set[27] = {
>> Do you need to explicitly provide the count here?
> Not really, will fix it.
>
>>> +		PERF_SAMPLE_BRANCH_ANY_CALL,		/* Single filters */
>>> +		PERF_SAMPLE_BRANCH_ANY_RETURN,
>>> +		PERF_SAMPLE_BRANCH_COND,
>>> +		PERF_SAMPLE_BRANCH_IND_CALL,
>>> +		PERF_SAMPLE_BRANCH_ANY,
>>> +
>>> +		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Tripple filters */
>> s/Tripple/Triple/
> Sure.will fix it.
>
>>> +		PERF_SAMPLE_BRANCH_ANY_RETURN |
>>> +		PERF_SAMPLE_BRANCH_COND,
>>> +
>>
>>> +
>>> +static void *ring_buffer_mask(struct ring_buffer *r, void *p)
>> Is this actually returning a mask? It looks more like it's calculating
>> an offset, and that seems to be how you use it below.
> Yeah it does calculate an offset. Will change the function name to
> ring_buffer_offset instead.
>
>>> +{
>>> +	unsigned long l = (unsigned long)p;
>>> +
>>> +	return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
>>> +}
>> That's a lot of casts, especially when you then load it into a int64_t
>> pointer below...
> Will it cause any problem ? I can return here int64_t * instead to void *
> to match the receiving pointer.
>
>>> +
>>> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r)
>>> +{
>>> +	unsigned long from, to, flag;
>>> +	int i, nr;
>>> +	int64_t *v;
>>> +
>>> +	/* NR Branches */
>>> +	v = ring_buffer_mask(r, hdr + 1);
>> ...here. (and everywhere else I can see that you're using the
>> ring_buffer_mask function)
>>> +
>>> +	nr = *v;
>> You are dereferencing a int64_t pointer into an int. Should nr be an
>> int64_t? Or should v be a different pointer type?
> hmm, int64_t sounds good.
>
>>> +
>>> +	/* Branches */
>>> +	for (i = 0; i < nr; i++) {
>>> +		v = ring_buffer_mask(r, v + 1);
>>> +		from = *v;
>> Now you're dereferencing an *int64_t into an unsigned long.
> Just wanted to have 64 bits for that field. To achieve some uniformity
> will change all of v, nr, from, to, flags as int64_t type variables.
> Also will make ring_buffer_mask function return in64_t * pointer type
> instead. Will change ring_buffer_mask into ring_buffer_offset as well. 
>
>>> +
>>> +		v = ring_buffer_mask(r, v + 1);
>>> +		to = *v;
>>> +
>>> +		v = ring_buffer_mask(r, v + 1);
>>> +		flag = *v;
>>> +
>>> +		if (!check_branch(from, to)) {
>>> +			has_failed = true;
>>> +			printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
>>> +					branch_sample_type, from, to, flag);
>>> +		}
>>> +	}
>>> +}
>>> +
>>> +static void read_ring_buffer(struct event *e)
>>> +{
>>> +	struct ring_buffer *r = &e->ring_buffer;
>>> +	struct perf_event_header *hdr;
>>> +	int old, head;
>> Why not tail and head?
> Sure, will change it.
>
>>> +
>>> +	head = r->page->data_head & r->mask;
>>> +
>>> +	asm volatile ("sync": : :"memory");
>>> +
>>> +	old = r->page->data_tail & r->mask;
>>> +
>> Can you explain the logic of syncing between reading head and tail? Is
>> there an expectation that head is not likely to change?
>>
>> As a more general comment, what is sync trying to achieve? If you're
>> trying to synchronise something, what's the sync actually achieving? Is
>> there a corresponding memory barrier somewhere else? What race
>> conditions are you trying to guard against and does this actually guard
>> against them?
> Once we wake up from poll and try to read the ring buffer, head is not
> going to change. We keep increment the tail and process the record till
> we dont reach the head position. Once there we just write head into the
> data_tail to inform kernel that the processing is over and kernel may
> start filling up the ring buffer again. For more details , please look
> into this file include/uapi/linux/perf_event.h (perf_event_mmap_page).
>
>         /*
>          * Control data for the mmap() data buffer.
>          *
>          * User-space reading the @data_head value should issue an smp_rmb(),
>          * after reading this value.
>          *
>          * When the mapping is PROT_WRITE the @data_tail value should be
>          * written by userspace to reflect the last read data, after issueing
>          * an smp_mb() to separate the data read from the ->data_tail store.
>          * In this case the kernel will not over-write unread data.
>
>>> +	while (old != head) {
>>> +		hdr = (struct perf_event_header *)(r->ring_base + old);
>>> +
>>> +		if ((old & r->mask) + hdr->size !=
>>> +					((old + hdr->size) & r->mask))
>>> +			++record_overlap;
>>> +
>>> +		if (hdr->type == PERF_RECORD_SAMPLE) {
>>> +			++record_sample;
>>> +			dump_sample(hdr, r);
>>> +		}
>>> +
>>> +		if (hdr->type == PERF_RECORD_MMAP)
>>> +			++record_mmap;
>>> +
>>> +		if (hdr->type == PERF_RECORD_LOST)
>>> +			++record_lost;
>>> +
>>> +		if (hdr->type == PERF_RECORD_THROTTLE)
>>> +			++record_throttle;
>>> +
>>> +		if (hdr->type == PERF_RECORD_UNTHROTTLE)
>>> +			++record_unthrottle;
>>> +
>>> +		old = (old + hdr->size) & r->mask;
>>> +	}
>>> +	r->page->data_tail = old;
>> What happens if data_tail moves while you're doing the loop?
> I believe that is not possible. Kernel wont change it unless we
> we write head position into that after processing entire data.
>
>>
>>
>>> +static int filter_test(void)
>>> +{
>>> +	struct pollfd pollfd;
>>> +	struct event ebhrb;
>>> +	pid_t pid;
>>> +	int ret, loop = 0;
>>> +
>>> +	has_failed = false;
>>> +	pid = fork();
>>> +	if (pid == -1) {
>>> +		perror("fork() failed");
>>> +		return 1;
>>> +	}
>>> +
>>> +	/* Run child */
>>> +	if (pid == 0) {
>>> +		start_loop();
>>> +		exit(0);
>>> +	}
>>> +
>>> +	/* Prepare event */
>>> +	event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS,
>>> +				PERF_TYPE_HARDWARE, "insturctions");
>> Is instructions deliberately spelled incorrectly?
> :) No, its a mistake. Will fix it.
>
>>> +	ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
>>> +	ebhrb.attr.disabled = 1;
>>> +	ebhrb.attr.mmap = 1;
>>> +	ebhrb.attr.mmap_data = 1;
>>> +	ebhrb.attr.sample_period = SAMPLE_PERIOD;
>>> +	ebhrb.attr.exclude_user = 0;
>>> +	ebhrb.attr.exclude_kernel = 1;
>>> +	ebhrb.attr.exclude_hv = 1;
>>> +	ebhrb.attr.branch_sample_type = branch_sample_type;
>>> +
>>> +	/* Open event */
>>> +	event_open_with_pid(&ebhrb, pid);
>>> +
>>> +	/* Mmap ring buffer and enable event */
>>> +	event_mmap(&ebhrb);
>>> +	FAIL_IF(event_enable(&ebhrb));
>>> +
>>> +	/* Prepare polling */
>>> +	pollfd.fd = ebhrb.fd;
>>> +	pollfd.events = POLLIN;
>>> +
>>> +	for (loop = 0; loop < LOOP_COUNT; loop++) {
>>> +		ret = poll(&pollfd, 1, -1);
>>> +		if (ret == -1) {
>>> +			perror("poll() failed");
>>> +			return 1;
>>> +		}
>>> +		if (ret == 0) {
>>> +			perror("poll() timeout");
>>> +			return 1;
>>> +		}
>>> +		read_ring_buffer(&ebhrb);
>>> +		if (has_failed)
>>> +			return 1;
>> 1) I don't see anything that sets has_failed after it's initalised.
> Its initialized to 'false' in the beginning of each test and would be
> set to 'true' inside dump_sample function if any of the captured branches
> does not match the applied filter.
>
>> 2) Should these error cases also explicitly terminate the child? Do you
>> need something like this perhaps?
>>
>> 		if (ret == 0) {
>> 			perror("poll() failed");
>> 			goto err;
>> 		}
>>
>> 		...
>> 	
>> 	}
>>
>> 	...
>>
>> 	return 0;
>>
>> 	err:
>> 	kill(pid, SIGTERM); // maybe even sigkill in the error case?
>> 	return 1;
> Right, will change it.
>
>>> +	}
>>> +
>>> +	/* Disable and close event */
>>> +	FAIL_IF(event_disable(&ebhrb));
>>> +	event_close(&ebhrb);
>> Again, do these need to be replicated in the error path?
> Right, will change it.
>
>>> +
>>> +	/* Terminate child */
>>> +	kill(pid, SIGKILL);
>> SIGKILL seems a bit harsh: wouldn't SIGTERM work?
>>> +	return 0;
>>> +}
>>> +
>>> +static int  bhrb_filters_test(void)
>>> +{
>>> +	int i;
>>> +
>>> +	/* Fetch branches */
>>> +	fetch_branches();
>>> +	init_branch_stats();
>>> +	init_perf_mmap_stats();
>>> +
>>> +	for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) {
>>> +		branch_sample_type = branch_test_set[i];
>>> +		if (filter_test())
>> Couldn't branch_sample_type be passed to filter_test as a parameter,
>> rather than as a global?
> Yeah it can be. Will change it.
>
>>> +			return 1;
>>> +	}
>>> +
>>
>>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
>>> new file mode 100644
>>> index 0000000..072375a
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
>>> @@ -0,0 +1,16 @@
>>> +/*
>>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU General Public License
>>> + * as published by the Free Software Foundation; either version
>>> + * 2 of the License, or (at your option) any later version.
>> License again. (And in the other files in this patch.)
>>
>>
>>> +_GLOBAL(start_loop)
>>> +label:
>>> +	b label0			/* ANY */
>>> +	blr				/* ANY_RETURN */
>>> +label0:
>>> +	b label1			/* ANY */
>>> +
>>> +label1:
>>> +	b label2			/* ANY */
>>> +
>>> +label2:
>>> +	b label3			/* ANY */
>>> +
>>> +label3:
>>> +	mflr LR_SAVE
>>> +	bl label4			/* ANY | ANY_CALL */
>>> +	mtlr LR_SAVE
>>> +	b start_loop			/* ANY */
>>> +label4:
>>> +	mflr LR_SAVE
>>> +	li 20, 12
>>> +	cmpi 3, 20, 12
>>> +	bcl 12, 4 * cr3+2, label5	/* ANY | ANY_CALL | COND */
>>> +	li 20, 12
>>> +	cmpi 4, 20, 20
>>> +	bcl 12, 4 * cr4+0, label5	/* ANY | ANY_CALL | COND */
>>> +	LOAD_ADDR(20, label5)
>>> +	mtctr 20
>>> +	li 22, 10
>>> +	cmpi 0, 22, 10
>>> +	bcctrl 12, 4*cr0+2		/* ANY | NY_CALL | IND_CALL | COND */
>>> +	LOAD_ADDR(20, label5)
>>> +	mtlr 20
>>> +	li      20, 10
>>> +	cmpi    0, 20, 10
>>> +	bclrl   12, 4*cr0+2		/* ANY | ANY_CALL | IND_CALL | COND */
>>> +	mtlr LR_SAVE
>>> +	blr				/* ANY | ANY_RETURN */
>>> +
>>> +label5:
>>> +	blr				/* ANY | ANY_RETURN */
>>> +
>> Could these labels have more descriptive names?
> Initially I had one label with same numeric numbering for each kind of
> branch instruction it was intended for but then it seemed to be overkill
> for that purpose. Compacted them to accommodate more branches per label
> and here we are. What kind of descriptive names will each of these
> label assume when each one accommodates multiple branches now ? Any
> thoughts ? Though I am willing to change them.
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)
  2015-06-12  7:26       ` Madhavan Srinivasan
@ 2015-06-12  8:59         ` Anshuman Khandual
  0 siblings, 0 replies; 35+ messages in thread
From: Anshuman Khandual @ 2015-06-12  8:59 UTC (permalink / raw)
  To: Madhavan Srinivasan, Daniel Axtens; +Cc: linuxppc-dev, mikey, sukadev

On 06/12/2015 12:56 PM, Madhavan Srinivasan wrote:
> 
> On Friday 12 June 2015 12:32 PM, Anshuman Khandual wrote:
>> > On 06/11/2015 07:39 AM, Daniel Axtens wrote:
>>> >> Hi,
>>> >>
>>> >> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
>>>> >>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>>> >>> new file mode 100644
>>>> >>> index 0000000..13e6b72
>>>> >>> --- /dev/null
>>>> >>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
>>>> >>> @@ -0,0 +1,513 @@
>>>> >>> +/*
>>>> >>> + * BHRB filter test (HW & SW)
>>>> >>> + *
>>>> >>> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
>>>> >>> + *
>>>> >>> + * This program is free software; you can redistribute it and/or
>>>> >>> + * modify it under the terms of the GNU General Public License
>>>> >>> + * as published by the Free Software Foundation; either version
>>>> >>> + * 2 of the License, or (at your option) any later version.
>>>> >>> + */
>>> >> This should also be gpl2 only.
>> > Why ? Any special reason ? I can see similar existing statements here
>> > in this file as well "powerpcC/primitives/load_unaligned_zeropad.c"
> For the new files, mpe suggested to use gpl2 only version of the license.
> 
>  This program is free software; you can redistribute it and/or modify it
>  under the terms of the GNU General Public License version 2 as published
>  by the Free Software Foundation.
> 
> Also, preferred format for Copyright line is to have "(C)" next to word
> Copyright

Sure, will accommodate both the proposed changes next time around.

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2015-06-12  9:00 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-08 11:38 [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 02/10] powerpc, perf: Restore privillege level filter support for BHRB Anshuman Khandual
2015-06-10  3:43   ` Daniel Axtens
2015-06-10 12:08     ` Anshuman Khandual
2015-06-11  3:28       ` Daniel Axtens
2015-06-12  7:06         ` Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 03/10] powerpc, perf: Re organize BHRB processing Anshuman Khandual
2015-06-10  4:36   ` Daniel Axtens
2015-06-10 12:09     ` Anshuman Khandual
2015-06-11  3:32       ` Daniel Axtens
2015-06-12  7:05         ` Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 04/10] powerpc, perf: Re organize PMU based branch filter processing in POWER8 Anshuman Khandual
2015-06-10  5:07   ` Daniel Axtens
2015-06-10 12:09     ` Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 05/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 06/10] powerpc, lib: Add new branch analysis support functions Anshuman Khandual
2015-06-10  5:33   ` Daniel Axtens
2015-06-10 12:10     ` Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 07/10] powerpc, perf: Enable SW filtering in branch stack sampling framework Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 08/10] powerpc, perf: Change POWER8 PMU configuration to work with SW filters Anshuman Khandual
2015-06-10  5:49   ` Daniel Axtens
2015-06-10 12:10     ` Anshuman Khandual
2015-06-11  3:38       ` Daniel Axtens
2015-06-08 11:38 ` [PATCH V8 09/10] powerpc, perf: Enable privilege mode SW branch filters Anshuman Khandual
2015-06-11  1:19   ` Daniel Axtens
2015-06-12  7:04     ` Anshuman Khandual
2015-06-08 11:38 ` [PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW) Anshuman Khandual
2015-06-09  5:41   ` Anshuman Khandual
2015-06-11  2:09   ` Daniel Axtens
2015-06-12  7:02     ` Anshuman Khandual
2015-06-12  7:26       ` Madhavan Srinivasan
2015-06-12  8:59         ` Anshuman Khandual
2015-06-10  3:21 ` [PATCH V8 01/10] powerpc, perf: Drop the branch sample when 'from' cannot be fetched Daniel Axtens
2015-06-10 12:02   ` Anshuman Khandual
2015-06-11  2:22     ` Daniel Axtens

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.