From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752741AbdGJFUD (ORCPT ); Mon, 10 Jul 2017 01:20:03 -0400 Received: from ozlabs.org ([103.22.144.67]:40295 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750924AbdGJFUB (ORCPT ); Mon, 10 Jul 2017 01:20:01 -0400 From: Michael Ellerman To: Peter Zijlstra , Jin Yao Cc: ak@linux.intel.com, maddy@linux.vnet.ibm.com, alexander.shishkin@linux.intel.com, kan.liang@intel.com, linuxppc-dev@lists.ozlabs.org, Linux-kernel@vger.kernel.org, acme@kernel.org, mingo@redhat.com, jolsa@kernel.org, yao.jin@intel.com Subject: Re: [PATCH v6 1/7] perf/core: Define the common branch type classification In-Reply-To: <20170707084201.zglggi2sfziirjio@hirez.programming.kicks-ass.net> References: <1492690075-17243-1-git-send-email-yao.jin@linux.intel.com> <1492690075-17243-2-git-send-email-yao.jin@linux.intel.com> <20170707084201.zglggi2sfziirjio@hirez.programming.kicks-ass.net> User-Agent: Notmuch/0.21 (https://notmuchmail.org) Date: Mon, 10 Jul 2017 15:19:57 +1000 Message-ID: <87tw2kj2bm.fsf@concordia.ellerman.id.au> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Peter Zijlstra writes: > PPC folks, maddy, does this work for you guys? It think it works for us, but I have some comments, I'll reply to the original. cheers > On Thu, Apr 20, 2017 at 08:07:49PM +0800, Jin Yao wrote: >> It is often useful to know the branch types while analyzing branch >> data. For example, a call is very different from a conditional branch. >> >> Currently we have to look it up in binary while the binary may later >> not be available and even the binary is available but user has to take >> some time. It is very useful for user to check it directly in perf >> report. >> >> Perf already has support for disassembling the branch instruction >> to get the x86 branch type. >> >> To keep consistent on kernel and userspace and make the classification >> more common, the patch adds the common branch type classification >> in perf_event.h. >> >> PERF_BR_NONE : unknown >> PERF_BR_JCC : conditional jump >> PERF_BR_JMP : jump >> PERF_BR_IND_JMP : indirect jump >> PERF_BR_CALL : call >> PERF_BR_IND_CALL : indirect call >> PERF_BR_RET : return >> PERF_BR_SYSCALL : syscall >> PERF_BR_SYSRET : syscall return >> PERF_BR_IRQ : hw interrupt/trap/fault >> PERF_BR_INT : sw interrupt >> PERF_BR_IRET : return from interrupt >> PERF_BR_FAR_BRANCH: not generic far branch type >> >> The patch also adds a new field type (4 bits) in perf_branch_entry >> to record the branch type. >> >> Since the disassembling of branch instruction needs some overhead, >> a new PERF_SAMPLE_BRANCH_TYPE_SAVE is introduced to indicate if it >> needs to disassemble the branch instruction and record the branch >> type. >> >> Change log >> ---------- >> >> v6: Not changed. >> >> v5: Not changed. The v5 patch series just change the userspace. >> >> v4: Comparing to previous version, the major changes are: >> >> 1. Remove the PERF_BR_JCC_FWD/PERF_BR_JCC_BWD, they will be >> computed later in userspace. >> >> 2. Remove the "cross" field in perf_branch_entry. The cross page >> computing will be done later in userspace. >> >> Signed-off-by: Jin Yao >> --- >> include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++++- >> tools/include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++++- >> 2 files changed, 56 insertions(+), 2 deletions(-) >> >> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h >> index d09a9cd..69af012 100644 >> --- a/include/uapi/linux/perf_event.h >> +++ b/include/uapi/linux/perf_event.h >> @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift { >> PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ >> PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ >> >> + PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ >> + >> PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ >> }; >> >> @@ -198,9 +200,32 @@ enum perf_branch_sample_type { >> PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, >> PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, >> >> + PERF_SAMPLE_BRANCH_TYPE_SAVE = >> + 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, >> + >> PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, >> }; >> >> +/* >> + * Common flow change classification >> + */ >> +enum { >> + PERF_BR_NONE = 0, /* unknown */ >> + PERF_BR_JCC = 1, /* conditional jump */ >> + PERF_BR_JMP = 2, /* jump */ >> + PERF_BR_IND_JMP = 3, /* indirect jump */ >> + PERF_BR_CALL = 4, /* call */ >> + PERF_BR_IND_CALL = 5, /* indirect call */ >> + PERF_BR_RET = 6, /* return */ >> + PERF_BR_SYSCALL = 7, /* syscall */ >> + PERF_BR_SYSRET = 8, /* syscall return */ >> + PERF_BR_IRQ = 9, /* hw interrupt/trap/fault */ >> + PERF_BR_INT = 10, /* sw interrupt */ >> + PERF_BR_IRET = 11, /* return from interrupt */ >> + PERF_BR_FAR_BRANCH = 12, /* not generic far branch type */ >> + PERF_BR_MAX, >> +}; >> + >> #define PERF_SAMPLE_BRANCH_PLM_ALL \ >> (PERF_SAMPLE_BRANCH_USER|\ >> PERF_SAMPLE_BRANCH_KERNEL|\ >> @@ -999,6 +1024,7 @@ union perf_mem_data_src { >> * in_tx: running in a hardware transaction >> * abort: aborting a hardware transaction >> * cycles: cycles from last branch (or 0 if not supported) >> + * type: branch type >> */ >> struct perf_branch_entry { >> __u64 from; >> @@ -1008,7 +1034,8 @@ struct perf_branch_entry { >> in_tx:1, /* in transaction */ >> abort:1, /* transaction abort */ >> cycles:16, /* cycle count to last branch */ >> - reserved:44; >> + type:4, /* branch type */ >> + reserved:40; >> }; >> >> #endif /* _UAPI_LINUX_PERF_EVENT_H */ >> diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h >> index d09a9cd..69af012 100644 >> --- a/tools/include/uapi/linux/perf_event.h >> +++ b/tools/include/uapi/linux/perf_event.h >> @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift { >> PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ >> PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ >> >> + PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ >> + >> PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ >> }; >> >> @@ -198,9 +200,32 @@ enum perf_branch_sample_type { >> PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, >> PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, >> >> + PERF_SAMPLE_BRANCH_TYPE_SAVE = >> + 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, >> + >> PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, >> }; >> >> +/* >> + * Common flow change classification >> + */ >> +enum { >> + PERF_BR_NONE = 0, /* unknown */ >> + PERF_BR_JCC = 1, /* conditional jump */ >> + PERF_BR_JMP = 2, /* jump */ >> + PERF_BR_IND_JMP = 3, /* indirect jump */ >> + PERF_BR_CALL = 4, /* call */ >> + PERF_BR_IND_CALL = 5, /* indirect call */ >> + PERF_BR_RET = 6, /* return */ >> + PERF_BR_SYSCALL = 7, /* syscall */ >> + PERF_BR_SYSRET = 8, /* syscall return */ >> + PERF_BR_IRQ = 9, /* hw interrupt/trap/fault */ >> + PERF_BR_INT = 10, /* sw interrupt */ >> + PERF_BR_IRET = 11, /* return from interrupt */ >> + PERF_BR_FAR_BRANCH = 12, /* not generic far branch type */ >> + PERF_BR_MAX, >> +}; >> + >> #define PERF_SAMPLE_BRANCH_PLM_ALL \ >> (PERF_SAMPLE_BRANCH_USER|\ >> PERF_SAMPLE_BRANCH_KERNEL|\ >> @@ -999,6 +1024,7 @@ union perf_mem_data_src { >> * in_tx: running in a hardware transaction >> * abort: aborting a hardware transaction >> * cycles: cycles from last branch (or 0 if not supported) >> + * type: branch type >> */ >> struct perf_branch_entry { >> __u64 from; >> @@ -1008,7 +1034,8 @@ struct perf_branch_entry { >> in_tx:1, /* in transaction */ >> abort:1, /* transaction abort */ >> cycles:16, /* cycle count to last branch */ >> - reserved:44; >> + type:4, /* branch type */ >> + reserved:40; >> }; >> >> #endif /* _UAPI_LINUX_PERF_EVENT_H */ >> -- >> 2.7.4 >>