All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-25  2:48 ` Athira Rajeev
  0 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-25  2:48 UTC (permalink / raw)
  To: acme, jolsa
  Cc: maddy, rnsastry, linux-perf-users, kjain, namhyung, linuxppc-dev

Sort key p_stage_cyc is used to present the latency
cycles spend in pipeline stages. perf tool has local
p_stage_cyc sort key to display this info. There is no
global variant available for this sort key. local variant
shows latency in a sinlge sample, whereas, global value
will be useful to present the total latency (sum of
latencies) in the hist entry. It represents latency
number multiplied by the number of samples.

Add global (p_stage_cyc) and local variant
(local_p_stage_cyc) for this sort key. Use the
local_p_stage_cyc as default option for "mem" sort mode.
Also add this to list of dynamic sort keys.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reported-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/hist.c |  4 +++-
 tools/perf/util/hist.h |  3 ++-
 tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
 tools/perf/util/sort.h |  3 ++-
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b776465e04ef..0a8033b09e28 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
-	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+
 	if (symbol_conf.nanosecs)
 		hists__new_col_len(hists, HISTC_TIME, 16);
 	else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5343b62476e6..2752ce681108 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -75,7 +75,8 @@ enum hist_column {
 	HISTC_MEM_BLOCKED,
 	HISTC_LOCAL_INS_LAT,
 	HISTC_GLOBAL_INS_LAT,
-	HISTC_P_STAGE_CYC,
+	HISTC_LOCAL_P_STAGE_CYC,
+	HISTC_GLOBAL_P_STAGE_CYC,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e9216a292a04..e978f7883e07 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	*default_sort_order = "comm,dso,symbol";
 const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
+const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
 const char	default_top_sort_order[] = "dso,symbol";
 const char	default_diff_sort_order[] = "dso,symbol";
 const char	default_tracepoint_sort_order[] = "trace";
@@ -46,8 +46,8 @@ const char	*field_order;
 regex_t		ignore_callees_regex;
 int		have_ignore_callees = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
-const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
-const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
+const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
 };
 
 static int64_t
-sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	return left->p_stage_cyc - right->p_stage_cyc;
 }
 
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width,
+			he->p_stage_cyc * he->stat.nr_events);
+}
+
+
 static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
 }
 
-struct sort_entry sort_p_stage_cyc = {
-	.se_header      = "Pipeline Stage Cycle",
-	.se_cmp         = sort__global_p_stage_cyc_cmp,
+struct sort_entry sort_local_p_stage_cyc = {
+	.se_header      = "Local Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
-	.se_width_idx	= HISTC_P_STAGE_CYC,
+	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+	.se_header      = "Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
+	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
+	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
 };
 
 struct sort_entry sort_mem_daddr_sym = {
@@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
-	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
+	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3c7518378d62..83abe5e6812a 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,7 +235,8 @@ enum sort_type {
 	SORT_CODE_PAGE_SIZE,
 	SORT_LOCAL_INS_LAT,
 	SORT_GLOBAL_INS_LAT,
-	SORT_PIPELINE_STAGE_CYC,
+	SORT_LOCAL_PIPELINE_STAGE_CYC,
+	SORT_GLOBAL_PIPELINE_STAGE_CYC,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-25  2:48 ` Athira Rajeev
  0 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-25  2:48 UTC (permalink / raw)
  To: acme, jolsa
  Cc: mpe, linux-perf-users, linuxppc-dev, maddy, rnsastry, kjain, namhyung

Sort key p_stage_cyc is used to present the latency
cycles spend in pipeline stages. perf tool has local
p_stage_cyc sort key to display this info. There is no
global variant available for this sort key. local variant
shows latency in a sinlge sample, whereas, global value
will be useful to present the total latency (sum of
latencies) in the hist entry. It represents latency
number multiplied by the number of samples.

Add global (p_stage_cyc) and local variant
(local_p_stage_cyc) for this sort key. Use the
local_p_stage_cyc as default option for "mem" sort mode.
Also add this to list of dynamic sort keys.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reported-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/hist.c |  4 +++-
 tools/perf/util/hist.h |  3 ++-
 tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
 tools/perf/util/sort.h |  3 ++-
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b776465e04ef..0a8033b09e28 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
-	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
+	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
+
 	if (symbol_conf.nanosecs)
 		hists__new_col_len(hists, HISTC_TIME, 16);
 	else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5343b62476e6..2752ce681108 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -75,7 +75,8 @@ enum hist_column {
 	HISTC_MEM_BLOCKED,
 	HISTC_LOCAL_INS_LAT,
 	HISTC_GLOBAL_INS_LAT,
-	HISTC_P_STAGE_CYC,
+	HISTC_LOCAL_P_STAGE_CYC,
+	HISTC_GLOBAL_P_STAGE_CYC,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e9216a292a04..e978f7883e07 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	*default_sort_order = "comm,dso,symbol";
 const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
+const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
 const char	default_top_sort_order[] = "dso,symbol";
 const char	default_diff_sort_order[] = "dso,symbol";
 const char	default_tracepoint_sort_order[] = "trace";
@@ -46,8 +46,8 @@ const char	*field_order;
 regex_t		ignore_callees_regex;
 int		have_ignore_callees = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
-const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
-const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
+const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
+const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
 };
 
 static int64_t
-sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	return left->p_stage_cyc - right->p_stage_cyc;
 }
 
+static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*u", width,
+			he->p_stage_cyc * he->stat.nr_events);
+}
+
+
 static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
 }
 
-struct sort_entry sort_p_stage_cyc = {
-	.se_header      = "Pipeline Stage Cycle",
-	.se_cmp         = sort__global_p_stage_cyc_cmp,
+struct sort_entry sort_local_p_stage_cyc = {
+	.se_header      = "Local Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
-	.se_width_idx	= HISTC_P_STAGE_CYC,
+	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
+};
+
+struct sort_entry sort_global_p_stage_cyc = {
+	.se_header      = "Pipeline Stage Cycle",
+	.se_cmp         = sort__p_stage_cyc_cmp,
+	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
+	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
 };
 
 struct sort_entry sort_mem_daddr_sym = {
@@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
-	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
+	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
+	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3c7518378d62..83abe5e6812a 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,7 +235,8 @@ enum sort_type {
 	SORT_CODE_PAGE_SIZE,
 	SORT_LOCAL_INS_LAT,
 	SORT_GLOBAL_INS_LAT,
-	SORT_PIPELINE_STAGE_CYC,
+	SORT_LOCAL_PIPELINE_STAGE_CYC,
+	SORT_GLOBAL_PIPELINE_STAGE_CYC,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] tools/perf: Update global/local variants for p_stage_cyc in powerpc
  2021-11-25  2:48 ` Athira Rajeev
@ 2021-11-25  2:48   ` Athira Rajeev
  -1 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-25  2:48 UTC (permalink / raw)
  To: acme, jolsa
  Cc: maddy, rnsastry, linux-perf-users, kjain, namhyung, linuxppc-dev

Update the arch_support_sort_key() function in powerpc
to enable presenting local and global variants of sort
key: p_stage_cyc. Update the "se_header" strings for
these in arch_perf_header_entry() function along with
instruction latency.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reported-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/arch/powerpc/util/event.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index 3bf441257466..cf430a4c55b9 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
 {
 	if (!strcmp(se_header, "Local INSTR Latency"))
 		return "Finish Cyc";
-	else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+	else if (!strcmp(se_header, "INSTR Latency"))
+		return "Global Finish_cyc";
+	else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
 		return "Dispatch Cyc";
+	else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+		return "Global Dispatch_cyc";
 	return se_header;
 }
 
@@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
 {
 	if (!strcmp(sort_key, "p_stage_cyc"))
 		return 1;
+	if (!strcmp(sort_key, "local_p_stage_cyc"))
+		return 1;
 	return 0;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] tools/perf: Update global/local variants for p_stage_cyc in powerpc
@ 2021-11-25  2:48   ` Athira Rajeev
  0 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-25  2:48 UTC (permalink / raw)
  To: acme, jolsa
  Cc: mpe, linux-perf-users, linuxppc-dev, maddy, rnsastry, kjain, namhyung

Update the arch_support_sort_key() function in powerpc
to enable presenting local and global variants of sort
key: p_stage_cyc. Update the "se_header" strings for
these in arch_perf_header_entry() function along with
instruction latency.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reported-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/arch/powerpc/util/event.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index 3bf441257466..cf430a4c55b9 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
 {
 	if (!strcmp(se_header, "Local INSTR Latency"))
 		return "Finish Cyc";
-	else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+	else if (!strcmp(se_header, "INSTR Latency"))
+		return "Global Finish_cyc";
+	else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
 		return "Dispatch Cyc";
+	else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+		return "Global Dispatch_cyc";
 	return se_header;
 }
 
@@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
 {
 	if (!strcmp(sort_key, "p_stage_cyc"))
 		return 1;
+	if (!strcmp(sort_key, "local_p_stage_cyc"))
+		return 1;
 	return 0;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-25  2:48 ` Athira Rajeev
@ 2021-11-25  8:10   ` Nageswara Sastry
  -1 siblings, 0 replies; 16+ messages in thread
From: Nageswara Sastry @ 2021-11-25  8:10 UTC (permalink / raw)
  To: Athira Rajeev, acme, jolsa
  Cc: maddy, linux-perf-users, kjain, namhyung, linuxppc-dev



On 25/11/21 8:18 am, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>

Tested the patch on Power10 LPAR and could see the required data.

# Overhead       Samples  Command  Shared Object               Symbol 
                              Dispatch Cyc
# ........  ............  .......  .......................... 
....................................  .............
#
      9.41%           156  dd       [kernel.vmlinux]            [k] 
system_call_common                1
      4.91%            82  dd       [kernel.vmlinux]            [k] 
__fget_light                      1
...

# Overhead       Samples  Command  Shared Object               Symbol 
                              Dispatch Cyc   Global Dispatch_cyc
# ........  ............  .......  .......................... 
....................................  .............  ...................
#
      9.41%           156  dd       [kernel.vmlinux]            [k] 
system_call_common                1              156
      4.91%            82  dd       [kernel.vmlinux]            [k] 
__fget_light                      1              82
...

Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>

> ---
>   tools/perf/util/hist.c |  4 +++-
>   tools/perf/util/hist.h |  3 ++-
>   tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>   tools/perf/util/sort.h |  3 ++-
>   4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>   	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>   	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>   	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>   	if (symbol_conf.nanosecs)
>   		hists__new_col_len(hists, HISTC_TIME, 16);
>   	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>   	HISTC_MEM_BLOCKED,
>   	HISTC_LOCAL_INS_LAT,
>   	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>   	HISTC_NR_COLS, /* Last entry */
>   };
>   
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>   const char	*parent_pattern = default_parent_pattern;
>   const char	*default_sort_order = "comm,dso,symbol";
>   const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>   const char	default_top_sort_order[] = "dso,symbol";
>   const char	default_diff_sort_order[] = "dso,symbol";
>   const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>   regex_t		ignore_callees_regex;
>   int		have_ignore_callees = 0;
>   enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>   
>   /*
>    * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>   };
>   
>   static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>   {
>   	return left->p_stage_cyc - right->p_stage_cyc;
>   }
>   
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>   static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>   					size_t size, unsigned int width)
>   {
>   	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>   }
>   
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>   	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>   };
>   
>   struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>   	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>   	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>   	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>   };
>   
>   #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>   	SORT_CODE_PAGE_SIZE,
>   	SORT_LOCAL_INS_LAT,
>   	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>   
>   	/* branch stack specific sort keys */
>   	__SORT_BRANCH_STACK,
> 

-- 
Thanks and Regards
R.Nageswara Sastry

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-25  8:10   ` Nageswara Sastry
  0 siblings, 0 replies; 16+ messages in thread
From: Nageswara Sastry @ 2021-11-25  8:10 UTC (permalink / raw)
  To: Athira Rajeev, acme, jolsa
  Cc: mpe, linux-perf-users, linuxppc-dev, maddy, kjain, namhyung



On 25/11/21 8:18 am, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>

Tested the patch on Power10 LPAR and could see the required data.

# Overhead       Samples  Command  Shared Object               Symbol 
                              Dispatch Cyc
# ........  ............  .......  .......................... 
....................................  .............
#
      9.41%           156  dd       [kernel.vmlinux]            [k] 
system_call_common                1
      4.91%            82  dd       [kernel.vmlinux]            [k] 
__fget_light                      1
...

# Overhead       Samples  Command  Shared Object               Symbol 
                              Dispatch Cyc   Global Dispatch_cyc
# ........  ............  .......  .......................... 
....................................  .............  ...................
#
      9.41%           156  dd       [kernel.vmlinux]            [k] 
system_call_common                1              156
      4.91%            82  dd       [kernel.vmlinux]            [k] 
__fget_light                      1              82
...

Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>

> ---
>   tools/perf/util/hist.c |  4 +++-
>   tools/perf/util/hist.h |  3 ++-
>   tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>   tools/perf/util/sort.h |  3 ++-
>   4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>   	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>   	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>   	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>   	if (symbol_conf.nanosecs)
>   		hists__new_col_len(hists, HISTC_TIME, 16);
>   	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>   	HISTC_MEM_BLOCKED,
>   	HISTC_LOCAL_INS_LAT,
>   	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>   	HISTC_NR_COLS, /* Last entry */
>   };
>   
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>   const char	*parent_pattern = default_parent_pattern;
>   const char	*default_sort_order = "comm,dso,symbol";
>   const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>   const char	default_top_sort_order[] = "dso,symbol";
>   const char	default_diff_sort_order[] = "dso,symbol";
>   const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>   regex_t		ignore_callees_regex;
>   int		have_ignore_callees = 0;
>   enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>   
>   /*
>    * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>   };
>   
>   static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>   {
>   	return left->p_stage_cyc - right->p_stage_cyc;
>   }
>   
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>   static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>   					size_t size, unsigned int width)
>   {
>   	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>   }
>   
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>   	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>   };
>   
>   struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>   	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>   	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>   	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>   };
>   
>   #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>   	SORT_CODE_PAGE_SIZE,
>   	SORT_LOCAL_INS_LAT,
>   	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>   
>   	/* branch stack specific sort keys */
>   	__SORT_BRANCH_STACK,
> 

-- 
Thanks and Regards
R.Nageswara Sastry

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-25  2:48 ` Athira Rajeev
@ 2021-11-28 16:34   ` Jiri Olsa
  -1 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-28 16:34 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, mpe, linux-perf-users, linuxppc-dev, maddy,
	rnsastry, kjain, namhyung

On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>

I can't apply this to Arnaldo's perf/core, could you please rebase?

patching file util/hist.c
patching file util/hist.h
patching file util/sort.c
Hunk #3 FAILED at 1392.
Hunk #4 succeeded at 1878 (offset 20 lines).
1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
patching file util/sort.h

thanks,
jirka

> ---
>  tools/perf/util/hist.c |  4 +++-
>  tools/perf/util/hist.h |  3 ++-
>  tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>  tools/perf/util/sort.h |  3 ++-
>  4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>  	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>  	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>  	if (symbol_conf.nanosecs)
>  		hists__new_col_len(hists, HISTC_TIME, 16);
>  	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>  	HISTC_MEM_BLOCKED,
>  	HISTC_LOCAL_INS_LAT,
>  	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>  	HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>  const char	*parent_pattern = default_parent_pattern;
>  const char	*default_sort_order = "comm,dso,symbol";
>  const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>  const char	default_top_sort_order[] = "dso,symbol";
>  const char	default_diff_sort_order[] = "dso,symbol";
>  const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>  regex_t		ignore_callees_regex;
>  int		have_ignore_callees = 0;
>  enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>  
>  /*
>   * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>  };
>  
>  static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
>  	return left->p_stage_cyc - right->p_stage_cyc;
>  }
>  
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
>  	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>  }
>  
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>  	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>  };
>  
>  struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>  	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>  	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>  };
>  
>  #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>  	SORT_CODE_PAGE_SIZE,
>  	SORT_LOCAL_INS_LAT,
>  	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>  
>  	/* branch stack specific sort keys */
>  	__SORT_BRANCH_STACK,
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-28 16:34   ` Jiri Olsa
  0 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-28 16:34 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: maddy, rnsastry, acme, linux-perf-users, jolsa, kjain, namhyung,
	linuxppc-dev

On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>

I can't apply this to Arnaldo's perf/core, could you please rebase?

patching file util/hist.c
patching file util/hist.h
patching file util/sort.c
Hunk #3 FAILED at 1392.
Hunk #4 succeeded at 1878 (offset 20 lines).
1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
patching file util/sort.h

thanks,
jirka

> ---
>  tools/perf/util/hist.c |  4 +++-
>  tools/perf/util/hist.h |  3 ++-
>  tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>  tools/perf/util/sort.h |  3 ++-
>  4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>  	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>  	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>  	if (symbol_conf.nanosecs)
>  		hists__new_col_len(hists, HISTC_TIME, 16);
>  	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>  	HISTC_MEM_BLOCKED,
>  	HISTC_LOCAL_INS_LAT,
>  	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>  	HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>  const char	*parent_pattern = default_parent_pattern;
>  const char	*default_sort_order = "comm,dso,symbol";
>  const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>  const char	default_top_sort_order[] = "dso,symbol";
>  const char	default_diff_sort_order[] = "dso,symbol";
>  const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>  regex_t		ignore_callees_regex;
>  int		have_ignore_callees = 0;
>  enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>  
>  /*
>   * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>  };
>  
>  static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
>  	return left->p_stage_cyc - right->p_stage_cyc;
>  }
>  
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
>  	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>  }
>  
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>  	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>  };
>  
>  struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>  	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>  	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>  };
>  
>  #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>  	SORT_CODE_PAGE_SIZE,
>  	SORT_LOCAL_INS_LAT,
>  	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>  
>  	/* branch stack specific sort keys */
>  	__SORT_BRANCH_STACK,
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-28 16:34   ` Jiri Olsa
@ 2021-11-29  9:13     ` Athira Rajeev
  -1 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-29  9:13 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: maddy, rnsastry, Arnaldo Carvalho de Melo, linux-perf-users,
	Jiri Olsa, kjain, Namhyung Kim, linuxppc-dev



> On 28-Nov-2021, at 10:04 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> 
> On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
>> Sort key p_stage_cyc is used to present the latency
>> cycles spend in pipeline stages. perf tool has local
>> p_stage_cyc sort key to display this info. There is no
>> global variant available for this sort key. local variant
>> shows latency in a sinlge sample, whereas, global value
>> will be useful to present the total latency (sum of
>> latencies) in the hist entry. It represents latency
>> number multiplied by the number of samples.
>> 
>> Add global (p_stage_cyc) and local variant
>> (local_p_stage_cyc) for this sort key. Use the
>> local_p_stage_cyc as default option for "mem" sort mode.
>> Also add this to list of dynamic sort keys.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reported-by: Namhyung Kim <namhyung@kernel.org>
> 
> I can't apply this to Arnaldo's perf/core, could you please rebase?
> 
> patching file util/hist.c
> patching file util/hist.h
> patching file util/sort.c
> Hunk #3 FAILED at 1392.
> Hunk #4 succeeded at 1878 (offset 20 lines).
> 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> patching file util/sort.h
> 
> thanks,
> jirka

Hi Jiri,

Thanks for checking this patch. 

Actually these changes are on top of three other fixes from Namhyung which are already part of upstream. Below are the commits.

784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)

I checked in Arnaldo’s perf/core, but these commits are not there. But I could see them in 'tmp.perf/urgent'
I think perf/core is not yet updated.

Thanks
Athira Rajeev

> 
>> ---
>> tools/perf/util/hist.c |  4 +++-
>> tools/perf/util/hist.h |  3 ++-
>> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>> tools/perf/util/sort.h |  3 ++-
>> 4 files changed, 32 insertions(+), 12 deletions(-)
>> 
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index b776465e04ef..0a8033b09e28 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
>> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
>> +
>> 	if (symbol_conf.nanosecs)
>> 		hists__new_col_len(hists, HISTC_TIME, 16);
>> 	else
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 5343b62476e6..2752ce681108 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -75,7 +75,8 @@ enum hist_column {
>> 	HISTC_MEM_BLOCKED,
>> 	HISTC_LOCAL_INS_LAT,
>> 	HISTC_GLOBAL_INS_LAT,
>> -	HISTC_P_STAGE_CYC,
>> +	HISTC_LOCAL_P_STAGE_CYC,
>> +	HISTC_GLOBAL_P_STAGE_CYC,
>> 	HISTC_NR_COLS, /* Last entry */
>> };
>> 
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index e9216a292a04..e978f7883e07 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>> const char	*parent_pattern = default_parent_pattern;
>> const char	*default_sort_order = "comm,dso,symbol";
>> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
>> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
>> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>> const char	default_top_sort_order[] = "dso,symbol";
>> const char	default_diff_sort_order[] = "dso,symbol";
>> const char	default_tracepoint_sort_order[] = "trace";
>> @@ -46,8 +46,8 @@ const char	*field_order;
>> regex_t		ignore_callees_regex;
>> int		have_ignore_callees = 0;
>> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
>> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
>> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
>> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
>> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>> };
>> 
>> static int64_t
>> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> {
>> 	return left->p_stage_cyc - right->p_stage_cyc;
>> }
>> 
>> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> +					size_t size, unsigned int width)
>> +{
>> +	return repsep_snprintf(bf, size, "%-*u", width,
>> +			he->p_stage_cyc * he->stat.nr_events);
>> +}
>> +
>> +
>> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> 					size_t size, unsigned int width)
>> {
>> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>> }
>> 
>> -struct sort_entry sort_p_stage_cyc = {
>> -	.se_header      = "Pipeline Stage Cycle",
>> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
>> +struct sort_entry sort_local_p_stage_cyc = {
>> +	.se_header      = "Local Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
>> -	.se_width_idx	= HISTC_P_STAGE_CYC,
>> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
>> +};
>> +
>> +struct sort_entry sort_global_p_stage_cyc = {
>> +	.se_header      = "Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
>> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>> };
>> 
>> struct sort_entry sort_mem_daddr_sym = {
>> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
>> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
>> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
>> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>> };
>> 
>> #undef DIM
>> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
>> index 3c7518378d62..83abe5e6812a 100644
>> --- a/tools/perf/util/sort.h
>> +++ b/tools/perf/util/sort.h
>> @@ -235,7 +235,8 @@ enum sort_type {
>> 	SORT_CODE_PAGE_SIZE,
>> 	SORT_LOCAL_INS_LAT,
>> 	SORT_GLOBAL_INS_LAT,
>> -	SORT_PIPELINE_STAGE_CYC,
>> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
>> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>> 
>> 	/* branch stack specific sort keys */
>> 	__SORT_BRANCH_STACK,
>> -- 
>> 2.27.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-29  9:13     ` Athira Rajeev
  0 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-29  9:13 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, mpe, linux-perf-users,
	linuxppc-dev, maddy, rnsastry, kjain, Namhyung Kim



> On 28-Nov-2021, at 10:04 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> 
> On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
>> Sort key p_stage_cyc is used to present the latency
>> cycles spend in pipeline stages. perf tool has local
>> p_stage_cyc sort key to display this info. There is no
>> global variant available for this sort key. local variant
>> shows latency in a sinlge sample, whereas, global value
>> will be useful to present the total latency (sum of
>> latencies) in the hist entry. It represents latency
>> number multiplied by the number of samples.
>> 
>> Add global (p_stage_cyc) and local variant
>> (local_p_stage_cyc) for this sort key. Use the
>> local_p_stage_cyc as default option for "mem" sort mode.
>> Also add this to list of dynamic sort keys.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reported-by: Namhyung Kim <namhyung@kernel.org>
> 
> I can't apply this to Arnaldo's perf/core, could you please rebase?
> 
> patching file util/hist.c
> patching file util/hist.h
> patching file util/sort.c
> Hunk #3 FAILED at 1392.
> Hunk #4 succeeded at 1878 (offset 20 lines).
> 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> patching file util/sort.h
> 
> thanks,
> jirka

Hi Jiri,

Thanks for checking this patch. 

Actually these changes are on top of three other fixes from Namhyung which are already part of upstream. Below are the commits.

784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)

I checked in Arnaldo’s perf/core, but these commits are not there. But I could see them in 'tmp.perf/urgent'
I think perf/core is not yet updated.

Thanks
Athira Rajeev

> 
>> ---
>> tools/perf/util/hist.c |  4 +++-
>> tools/perf/util/hist.h |  3 ++-
>> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>> tools/perf/util/sort.h |  3 ++-
>> 4 files changed, 32 insertions(+), 12 deletions(-)
>> 
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index b776465e04ef..0a8033b09e28 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
>> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
>> +
>> 	if (symbol_conf.nanosecs)
>> 		hists__new_col_len(hists, HISTC_TIME, 16);
>> 	else
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 5343b62476e6..2752ce681108 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -75,7 +75,8 @@ enum hist_column {
>> 	HISTC_MEM_BLOCKED,
>> 	HISTC_LOCAL_INS_LAT,
>> 	HISTC_GLOBAL_INS_LAT,
>> -	HISTC_P_STAGE_CYC,
>> +	HISTC_LOCAL_P_STAGE_CYC,
>> +	HISTC_GLOBAL_P_STAGE_CYC,
>> 	HISTC_NR_COLS, /* Last entry */
>> };
>> 
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index e9216a292a04..e978f7883e07 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>> const char	*parent_pattern = default_parent_pattern;
>> const char	*default_sort_order = "comm,dso,symbol";
>> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
>> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
>> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>> const char	default_top_sort_order[] = "dso,symbol";
>> const char	default_diff_sort_order[] = "dso,symbol";
>> const char	default_tracepoint_sort_order[] = "trace";
>> @@ -46,8 +46,8 @@ const char	*field_order;
>> regex_t		ignore_callees_regex;
>> int		have_ignore_callees = 0;
>> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
>> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
>> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
>> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
>> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>> };
>> 
>> static int64_t
>> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> {
>> 	return left->p_stage_cyc - right->p_stage_cyc;
>> }
>> 
>> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> +					size_t size, unsigned int width)
>> +{
>> +	return repsep_snprintf(bf, size, "%-*u", width,
>> +			he->p_stage_cyc * he->stat.nr_events);
>> +}
>> +
>> +
>> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> 					size_t size, unsigned int width)
>> {
>> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>> }
>> 
>> -struct sort_entry sort_p_stage_cyc = {
>> -	.se_header      = "Pipeline Stage Cycle",
>> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
>> +struct sort_entry sort_local_p_stage_cyc = {
>> +	.se_header      = "Local Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
>> -	.se_width_idx	= HISTC_P_STAGE_CYC,
>> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
>> +};
>> +
>> +struct sort_entry sort_global_p_stage_cyc = {
>> +	.se_header      = "Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
>> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>> };
>> 
>> struct sort_entry sort_mem_daddr_sym = {
>> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
>> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
>> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
>> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>> };
>> 
>> #undef DIM
>> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
>> index 3c7518378d62..83abe5e6812a 100644
>> --- a/tools/perf/util/sort.h
>> +++ b/tools/perf/util/sort.h
>> @@ -235,7 +235,8 @@ enum sort_type {
>> 	SORT_CODE_PAGE_SIZE,
>> 	SORT_LOCAL_INS_LAT,
>> 	SORT_GLOBAL_INS_LAT,
>> -	SORT_PIPELINE_STAGE_CYC,
>> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
>> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>> 
>> 	/* branch stack specific sort keys */
>> 	__SORT_BRANCH_STACK,
>> -- 
>> 2.27.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-29  9:13     ` Athira Rajeev
@ 2021-11-29 16:25       ` Jiri Olsa
  -1 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-29 16:25 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: maddy, rnsastry, Arnaldo Carvalho de Melo, linux-perf-users,
	Jiri Olsa, kjain, Namhyung Kim, linuxppc-dev

On Mon, Nov 29, 2021 at 02:43:48PM +0530, Athira Rajeev wrote:
> 
> 
> > On 28-Nov-2021, at 10:04 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> > 
> > On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> >> Sort key p_stage_cyc is used to present the latency
> >> cycles spend in pipeline stages. perf tool has local
> >> p_stage_cyc sort key to display this info. There is no
> >> global variant available for this sort key. local variant
> >> shows latency in a sinlge sample, whereas, global value
> >> will be useful to present the total latency (sum of
> >> latencies) in the hist entry. It represents latency
> >> number multiplied by the number of samples.
> >> 
> >> Add global (p_stage_cyc) and local variant
> >> (local_p_stage_cyc) for this sort key. Use the
> >> local_p_stage_cyc as default option for "mem" sort mode.
> >> Also add this to list of dynamic sort keys.
> >> 
> >> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> >> Reported-by: Namhyung Kim <namhyung@kernel.org>
> > 
> > I can't apply this to Arnaldo's perf/core, could you please rebase?
> > 
> > patching file util/hist.c
> > patching file util/hist.h
> > patching file util/sort.c
> > Hunk #3 FAILED at 1392.
> > Hunk #4 succeeded at 1878 (offset 20 lines).
> > 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> > patching file util/sort.h
> > 
> > thanks,
> > jirka
> 
> Hi Jiri,
> 
> Thanks for checking this patch. 
> 
> Actually these changes are on top of three other fixes from Namhyung which are already part of upstream. Below are the commits.
> 
> 784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
> 4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
> db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)
> 
> I checked in Arnaldo’s perf/core, but these commits are not there. But I could see them in 'tmp.perf/urgent'
> I think perf/core is not yet updated.

ah ok, I got it applied on perf/urgent, thanks

jirka

> 
> Thanks
> Athira Rajeev
> 
> > 
> >> ---
> >> tools/perf/util/hist.c |  4 +++-
> >> tools/perf/util/hist.h |  3 ++-
> >> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
> >> tools/perf/util/sort.h |  3 ++-
> >> 4 files changed, 32 insertions(+), 12 deletions(-)
> >> 
> >> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> >> index b776465e04ef..0a8033b09e28 100644
> >> --- a/tools/perf/util/hist.c
> >> +++ b/tools/perf/util/hist.c
> >> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> >> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
> >> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
> >> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> >> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> >> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> >> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> >> +
> >> 	if (symbol_conf.nanosecs)
> >> 		hists__new_col_len(hists, HISTC_TIME, 16);
> >> 	else
> >> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> >> index 5343b62476e6..2752ce681108 100644
> >> --- a/tools/perf/util/hist.h
> >> +++ b/tools/perf/util/hist.h
> >> @@ -75,7 +75,8 @@ enum hist_column {
> >> 	HISTC_MEM_BLOCKED,
> >> 	HISTC_LOCAL_INS_LAT,
> >> 	HISTC_GLOBAL_INS_LAT,
> >> -	HISTC_P_STAGE_CYC,
> >> +	HISTC_LOCAL_P_STAGE_CYC,
> >> +	HISTC_GLOBAL_P_STAGE_CYC,
> >> 	HISTC_NR_COLS, /* Last entry */
> >> };
> >> 
> >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> >> index e9216a292a04..e978f7883e07 100644
> >> --- a/tools/perf/util/sort.c
> >> +++ b/tools/perf/util/sort.c
> >> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
> >> const char	*parent_pattern = default_parent_pattern;
> >> const char	*default_sort_order = "comm,dso,symbol";
> >> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> >> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> >> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
> >> const char	default_top_sort_order[] = "dso,symbol";
> >> const char	default_diff_sort_order[] = "dso,symbol";
> >> const char	default_tracepoint_sort_order[] = "trace";
> >> @@ -46,8 +46,8 @@ const char	*field_order;
> >> regex_t		ignore_callees_regex;
> >> int		have_ignore_callees = 0;
> >> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> >> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> >> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> >> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> >> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
> >> 
> >> /*
> >>  * Replaces all occurrences of a char used with the:
> >> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
> >> };
> >> 
> >> static int64_t
> >> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> >> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> >> {
> >> 	return left->p_stage_cyc - right->p_stage_cyc;
> >> }
> >> 
> >> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >> +					size_t size, unsigned int width)
> >> +{
> >> +	return repsep_snprintf(bf, size, "%-*u", width,
> >> +			he->p_stage_cyc * he->stat.nr_events);
> >> +}
> >> +
> >> +
> >> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >> 					size_t size, unsigned int width)
> >> {
> >> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
> >> }
> >> 
> >> -struct sort_entry sort_p_stage_cyc = {
> >> -	.se_header      = "Pipeline Stage Cycle",
> >> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> >> +struct sort_entry sort_local_p_stage_cyc = {
> >> +	.se_header      = "Local Pipeline Stage Cycle",
> >> +	.se_cmp         = sort__p_stage_cyc_cmp,
> >> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> >> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> >> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> >> +};
> >> +
> >> +struct sort_entry sort_global_p_stage_cyc = {
> >> +	.se_header      = "Pipeline Stage Cycle",
> >> +	.se_cmp         = sort__p_stage_cyc_cmp,
> >> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> >> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
> >> };
> >> 
> >> struct sort_entry sort_mem_daddr_sym = {
> >> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
> >> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
> >> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
> >> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> >> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> >> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> >> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
> >> };
> >> 
> >> #undef DIM
> >> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> >> index 3c7518378d62..83abe5e6812a 100644
> >> --- a/tools/perf/util/sort.h
> >> +++ b/tools/perf/util/sort.h
> >> @@ -235,7 +235,8 @@ enum sort_type {
> >> 	SORT_CODE_PAGE_SIZE,
> >> 	SORT_LOCAL_INS_LAT,
> >> 	SORT_GLOBAL_INS_LAT,
> >> -	SORT_PIPELINE_STAGE_CYC,
> >> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> >> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
> >> 
> >> 	/* branch stack specific sort keys */
> >> 	__SORT_BRANCH_STACK,
> >> -- 
> >> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-29 16:25       ` Jiri Olsa
  0 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-29 16:25 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, mpe, linux-perf-users,
	linuxppc-dev, maddy, rnsastry, kjain, Namhyung Kim

On Mon, Nov 29, 2021 at 02:43:48PM +0530, Athira Rajeev wrote:
> 
> 
> > On 28-Nov-2021, at 10:04 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> > 
> > On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> >> Sort key p_stage_cyc is used to present the latency
> >> cycles spend in pipeline stages. perf tool has local
> >> p_stage_cyc sort key to display this info. There is no
> >> global variant available for this sort key. local variant
> >> shows latency in a sinlge sample, whereas, global value
> >> will be useful to present the total latency (sum of
> >> latencies) in the hist entry. It represents latency
> >> number multiplied by the number of samples.
> >> 
> >> Add global (p_stage_cyc) and local variant
> >> (local_p_stage_cyc) for this sort key. Use the
> >> local_p_stage_cyc as default option for "mem" sort mode.
> >> Also add this to list of dynamic sort keys.
> >> 
> >> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> >> Reported-by: Namhyung Kim <namhyung@kernel.org>
> > 
> > I can't apply this to Arnaldo's perf/core, could you please rebase?
> > 
> > patching file util/hist.c
> > patching file util/hist.h
> > patching file util/sort.c
> > Hunk #3 FAILED at 1392.
> > Hunk #4 succeeded at 1878 (offset 20 lines).
> > 1 out of 4 hunks FAILED -- saving rejects to file util/sort.c.rej
> > patching file util/sort.h
> > 
> > thanks,
> > jirka
> 
> Hi Jiri,
> 
> Thanks for checking this patch. 
> 
> Actually these changes are on top of three other fixes from Namhyung which are already part of upstream. Below are the commits.
> 
> 784e8adda4cd ("perf sort: Fix the 'weight' sort key behavior”)
> 4d03c75363ee ("perf sort: Fix the 'ins_lat' sort key behavior”)
> db4b28402909 ("perf sort: Fix the 'p_stage_cyc' sort key behavior”)
> 
> I checked in Arnaldo’s perf/core, but these commits are not there. But I could see them in 'tmp.perf/urgent'
> I think perf/core is not yet updated.

ah ok, I got it applied on perf/urgent, thanks

jirka

> 
> Thanks
> Athira Rajeev
> 
> > 
> >> ---
> >> tools/perf/util/hist.c |  4 +++-
> >> tools/perf/util/hist.h |  3 ++-
> >> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
> >> tools/perf/util/sort.h |  3 ++-
> >> 4 files changed, 32 insertions(+), 12 deletions(-)
> >> 
> >> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> >> index b776465e04ef..0a8033b09e28 100644
> >> --- a/tools/perf/util/hist.c
> >> +++ b/tools/perf/util/hist.c
> >> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> >> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
> >> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
> >> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> >> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> >> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> >> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> >> +
> >> 	if (symbol_conf.nanosecs)
> >> 		hists__new_col_len(hists, HISTC_TIME, 16);
> >> 	else
> >> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> >> index 5343b62476e6..2752ce681108 100644
> >> --- a/tools/perf/util/hist.h
> >> +++ b/tools/perf/util/hist.h
> >> @@ -75,7 +75,8 @@ enum hist_column {
> >> 	HISTC_MEM_BLOCKED,
> >> 	HISTC_LOCAL_INS_LAT,
> >> 	HISTC_GLOBAL_INS_LAT,
> >> -	HISTC_P_STAGE_CYC,
> >> +	HISTC_LOCAL_P_STAGE_CYC,
> >> +	HISTC_GLOBAL_P_STAGE_CYC,
> >> 	HISTC_NR_COLS, /* Last entry */
> >> };
> >> 
> >> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> >> index e9216a292a04..e978f7883e07 100644
> >> --- a/tools/perf/util/sort.c
> >> +++ b/tools/perf/util/sort.c
> >> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
> >> const char	*parent_pattern = default_parent_pattern;
> >> const char	*default_sort_order = "comm,dso,symbol";
> >> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> >> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> >> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
> >> const char	default_top_sort_order[] = "dso,symbol";
> >> const char	default_diff_sort_order[] = "dso,symbol";
> >> const char	default_tracepoint_sort_order[] = "trace";
> >> @@ -46,8 +46,8 @@ const char	*field_order;
> >> regex_t		ignore_callees_regex;
> >> int		have_ignore_callees = 0;
> >> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> >> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> >> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> >> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> >> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
> >> 
> >> /*
> >>  * Replaces all occurrences of a char used with the:
> >> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
> >> };
> >> 
> >> static int64_t
> >> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> >> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> >> {
> >> 	return left->p_stage_cyc - right->p_stage_cyc;
> >> }
> >> 
> >> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >> +					size_t size, unsigned int width)
> >> +{
> >> +	return repsep_snprintf(bf, size, "%-*u", width,
> >> +			he->p_stage_cyc * he->stat.nr_events);
> >> +}
> >> +
> >> +
> >> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >> 					size_t size, unsigned int width)
> >> {
> >> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
> >> }
> >> 
> >> -struct sort_entry sort_p_stage_cyc = {
> >> -	.se_header      = "Pipeline Stage Cycle",
> >> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> >> +struct sort_entry sort_local_p_stage_cyc = {
> >> +	.se_header      = "Local Pipeline Stage Cycle",
> >> +	.se_cmp         = sort__p_stage_cyc_cmp,
> >> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> >> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> >> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> >> +};
> >> +
> >> +struct sort_entry sort_global_p_stage_cyc = {
> >> +	.se_header      = "Pipeline Stage Cycle",
> >> +	.se_cmp         = sort__p_stage_cyc_cmp,
> >> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> >> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
> >> };
> >> 
> >> struct sort_entry sort_mem_daddr_sym = {
> >> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
> >> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
> >> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
> >> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> >> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> >> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> >> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
> >> };
> >> 
> >> #undef DIM
> >> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> >> index 3c7518378d62..83abe5e6812a 100644
> >> --- a/tools/perf/util/sort.h
> >> +++ b/tools/perf/util/sort.h
> >> @@ -235,7 +235,8 @@ enum sort_type {
> >> 	SORT_CODE_PAGE_SIZE,
> >> 	SORT_LOCAL_INS_LAT,
> >> 	SORT_GLOBAL_INS_LAT,
> >> -	SORT_PIPELINE_STAGE_CYC,
> >> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> >> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
> >> 
> >> 	/* branch stack specific sort keys */
> >> 	__SORT_BRANCH_STACK,
> >> -- 
> >> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-25  2:48 ` Athira Rajeev
@ 2021-11-29 17:11   ` Jiri Olsa
  -1 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-29 17:11 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: maddy, rnsastry, acme, linux-perf-users, jolsa, kjain, namhyung,
	linuxppc-dev

On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/hist.c |  4 +++-
>  tools/perf/util/hist.h |  3 ++-
>  tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>  tools/perf/util/sort.h |  3 ++-
>  4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>  	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>  	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>  	if (symbol_conf.nanosecs)
>  		hists__new_col_len(hists, HISTC_TIME, 16);
>  	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>  	HISTC_MEM_BLOCKED,
>  	HISTC_LOCAL_INS_LAT,
>  	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>  	HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>  const char	*parent_pattern = default_parent_pattern;
>  const char	*default_sort_order = "comm,dso,symbol";
>  const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>  const char	default_top_sort_order[] = "dso,symbol";
>  const char	default_diff_sort_order[] = "dso,symbol";
>  const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>  regex_t		ignore_callees_regex;
>  int		have_ignore_callees = 0;
>  enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};

so you also add global ins_lat, right? will this change
some default behaviour?

> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};

nit.. both dynamic_headers and arch_specific_sort_keys could be static right?

thanks,
jirka

>  
>  /*
>   * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>  };
>  
>  static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
>  	return left->p_stage_cyc - right->p_stage_cyc;
>  }
>  
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
>  	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>  }
>  
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>  	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>  };
>  
>  struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>  	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>  	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>  };
>  
>  #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>  	SORT_CODE_PAGE_SIZE,
>  	SORT_LOCAL_INS_LAT,
>  	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>  
>  	/* branch stack specific sort keys */
>  	__SORT_BRANCH_STACK,
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-29 17:11   ` Jiri Olsa
  0 siblings, 0 replies; 16+ messages in thread
From: Jiri Olsa @ 2021-11-29 17:11 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, mpe, linux-perf-users, linuxppc-dev, maddy,
	rnsastry, kjain, namhyung

On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
> Sort key p_stage_cyc is used to present the latency
> cycles spend in pipeline stages. perf tool has local
> p_stage_cyc sort key to display this info. There is no
> global variant available for this sort key. local variant
> shows latency in a sinlge sample, whereas, global value
> will be useful to present the total latency (sum of
> latencies) in the hist entry. It represents latency
> number multiplied by the number of samples.
> 
> Add global (p_stage_cyc) and local variant
> (local_p_stage_cyc) for this sort key. Use the
> local_p_stage_cyc as default option for "mem" sort mode.
> Also add this to list of dynamic sort keys.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> Reported-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/hist.c |  4 +++-
>  tools/perf/util/hist.h |  3 ++-
>  tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>  tools/perf/util/sort.h |  3 ++-
>  4 files changed, 32 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index b776465e04ef..0a8033b09e28 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>  	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>  	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
> +
>  	if (symbol_conf.nanosecs)
>  		hists__new_col_len(hists, HISTC_TIME, 16);
>  	else
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 5343b62476e6..2752ce681108 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -75,7 +75,8 @@ enum hist_column {
>  	HISTC_MEM_BLOCKED,
>  	HISTC_LOCAL_INS_LAT,
>  	HISTC_GLOBAL_INS_LAT,
> -	HISTC_P_STAGE_CYC,
> +	HISTC_LOCAL_P_STAGE_CYC,
> +	HISTC_GLOBAL_P_STAGE_CYC,
>  	HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index e9216a292a04..e978f7883e07 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>  const char	*parent_pattern = default_parent_pattern;
>  const char	*default_sort_order = "comm,dso,symbol";
>  const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>  const char	default_top_sort_order[] = "dso,symbol";
>  const char	default_diff_sort_order[] = "dso,symbol";
>  const char	default_tracepoint_sort_order[] = "trace";
> @@ -46,8 +46,8 @@ const char	*field_order;
>  regex_t		ignore_callees_regex;
>  int		have_ignore_callees = 0;
>  enum sort_mode	sort__mode = SORT_MODE__NORMAL;
> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};

so you also add global ins_lat, right? will this change
some default behaviour?

> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};

nit.. both dynamic_headers and arch_specific_sort_keys could be static right?

thanks,
jirka

>  
>  /*
>   * Replaces all occurrences of a char used with the:
> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>  };
>  
>  static int64_t
> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
>  	return left->p_stage_cyc - right->p_stage_cyc;
>  }
>  
> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> +					size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*u", width,
> +			he->p_stage_cyc * he->stat.nr_events);
> +}
> +
> +
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
>  	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>  }
>  
> -struct sort_entry sort_p_stage_cyc = {
> -	.se_header      = "Pipeline Stage Cycle",
> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
> +struct sort_entry sort_local_p_stage_cyc = {
> +	.se_header      = "Local Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
>  	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
> -	.se_width_idx	= HISTC_P_STAGE_CYC,
> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
> +};
> +
> +struct sort_entry sort_global_p_stage_cyc = {
> +	.se_header      = "Pipeline Stage Cycle",
> +	.se_cmp         = sort__p_stage_cyc_cmp,
> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>  };
>  
>  struct sort_entry sort_mem_daddr_sym = {
> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>  	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>  	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>  	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>  };
>  
>  #undef DIM
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 3c7518378d62..83abe5e6812a 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -235,7 +235,8 @@ enum sort_type {
>  	SORT_CODE_PAGE_SIZE,
>  	SORT_LOCAL_INS_LAT,
>  	SORT_GLOBAL_INS_LAT,
> -	SORT_PIPELINE_STAGE_CYC,
> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>  
>  	/* branch stack specific sort keys */
>  	__SORT_BRANCH_STACK,
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
  2021-11-29 17:11   ` Jiri Olsa
@ 2021-11-30 13:29     ` Athira Rajeev
  -1 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-30 13:29 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, mpe, linux-perf-users,
	linuxppc-dev, maddy, rnsastry, kjain, Namhyung Kim



> On 29-Nov-2021, at 10:41 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> 
> On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
>> Sort key p_stage_cyc is used to present the latency
>> cycles spend in pipeline stages. perf tool has local
>> p_stage_cyc sort key to display this info. There is no
>> global variant available for this sort key. local variant
>> shows latency in a sinlge sample, whereas, global value
>> will be useful to present the total latency (sum of
>> latencies) in the hist entry. It represents latency
>> number multiplied by the number of samples.
>> 
>> Add global (p_stage_cyc) and local variant
>> (local_p_stage_cyc) for this sort key. Use the
>> local_p_stage_cyc as default option for "mem" sort mode.
>> Also add this to list of dynamic sort keys.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reported-by: Namhyung Kim <namhyung@kernel.org>
>> ---
>> tools/perf/util/hist.c |  4 +++-
>> tools/perf/util/hist.h |  3 ++-
>> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>> tools/perf/util/sort.h |  3 ++-
>> 4 files changed, 32 insertions(+), 12 deletions(-)
>> 
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index b776465e04ef..0a8033b09e28 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
>> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
>> +
>> 	if (symbol_conf.nanosecs)
>> 		hists__new_col_len(hists, HISTC_TIME, 16);
>> 	else
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 5343b62476e6..2752ce681108 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -75,7 +75,8 @@ enum hist_column {
>> 	HISTC_MEM_BLOCKED,
>> 	HISTC_LOCAL_INS_LAT,
>> 	HISTC_GLOBAL_INS_LAT,
>> -	HISTC_P_STAGE_CYC,
>> +	HISTC_LOCAL_P_STAGE_CYC,
>> +	HISTC_GLOBAL_P_STAGE_CYC,
>> 	HISTC_NR_COLS, /* Last entry */
>> };
>> 
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index e9216a292a04..e978f7883e07 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>> const char	*parent_pattern = default_parent_pattern;
>> const char	*default_sort_order = "comm,dso,symbol";
>> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
>> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
>> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>> const char	default_top_sort_order[] = "dso,symbol";
>> const char	default_diff_sort_order[] = "dso,symbol";
>> const char	default_tracepoint_sort_order[] = "trace";
>> @@ -46,8 +46,8 @@ const char	*field_order;
>> regex_t		ignore_callees_regex;
>> int		have_ignore_callees = 0;
>> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
>> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
>> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
>> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> 
> so you also add global ins_lat, right? will this change
> some default behaviour?

Hi Jiri,

By default, if the architecture doesn’t have a dynamic header entry for this sort key ( taken care by arch_perf_header_entry() function ),
It will use the default se_header value in perf report. So default behaviour will not be changed.

> 
>> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
> 
> nit.. both dynamic_headers and arch_specific_sort_keys could be static right?

Sure, this is used only in util/sort.c 
I will add the change to make these static in next version.

Thanks
Athira
> 
> thanks,
> jirka
> 
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>> };
>> 
>> static int64_t
>> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> {
>> 	return left->p_stage_cyc - right->p_stage_cyc;
>> }
>> 
>> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> +					size_t size, unsigned int width)
>> +{
>> +	return repsep_snprintf(bf, size, "%-*u", width,
>> +			he->p_stage_cyc * he->stat.nr_events);
>> +}
>> +
>> +
>> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> 					size_t size, unsigned int width)
>> {
>> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>> }
>> 
>> -struct sort_entry sort_p_stage_cyc = {
>> -	.se_header      = "Pipeline Stage Cycle",
>> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
>> +struct sort_entry sort_local_p_stage_cyc = {
>> +	.se_header      = "Local Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
>> -	.se_width_idx	= HISTC_P_STAGE_CYC,
>> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
>> +};
>> +
>> +struct sort_entry sort_global_p_stage_cyc = {
>> +	.se_header      = "Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
>> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>> };
>> 
>> struct sort_entry sort_mem_daddr_sym = {
>> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
>> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
>> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
>> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>> };
>> 
>> #undef DIM
>> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
>> index 3c7518378d62..83abe5e6812a 100644
>> --- a/tools/perf/util/sort.h
>> +++ b/tools/perf/util/sort.h
>> @@ -235,7 +235,8 @@ enum sort_type {
>> 	SORT_CODE_PAGE_SIZE,
>> 	SORT_LOCAL_INS_LAT,
>> 	SORT_GLOBAL_INS_LAT,
>> -	SORT_PIPELINE_STAGE_CYC,
>> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
>> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>> 
>> 	/* branch stack specific sort keys */
>> 	__SORT_BRANCH_STACK,
>> -- 
>> 2.27.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key
@ 2021-11-30 13:29     ` Athira Rajeev
  0 siblings, 0 replies; 16+ messages in thread
From: Athira Rajeev @ 2021-11-30 13:29 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: maddy, rnsastry, Arnaldo Carvalho de Melo, linux-perf-users,
	Jiri Olsa, kjain, Namhyung Kim, linuxppc-dev



> On 29-Nov-2021, at 10:41 PM, Jiri Olsa <jolsa@redhat.com> wrote:
> 
> On Thu, Nov 25, 2021 at 08:18:50AM +0530, Athira Rajeev wrote:
>> Sort key p_stage_cyc is used to present the latency
>> cycles spend in pipeline stages. perf tool has local
>> p_stage_cyc sort key to display this info. There is no
>> global variant available for this sort key. local variant
>> shows latency in a sinlge sample, whereas, global value
>> will be useful to present the total latency (sum of
>> latencies) in the hist entry. It represents latency
>> number multiplied by the number of samples.
>> 
>> Add global (p_stage_cyc) and local variant
>> (local_p_stage_cyc) for this sort key. Use the
>> local_p_stage_cyc as default option for "mem" sort mode.
>> Also add this to list of dynamic sort keys.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reported-by: Namhyung Kim <namhyung@kernel.org>
>> ---
>> tools/perf/util/hist.c |  4 +++-
>> tools/perf/util/hist.h |  3 ++-
>> tools/perf/util/sort.c | 34 +++++++++++++++++++++++++---------
>> tools/perf/util/sort.h |  3 ++-
>> 4 files changed, 32 insertions(+), 12 deletions(-)
>> 
>> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
>> index b776465e04ef..0a8033b09e28 100644
>> --- a/tools/perf/util/hist.c
>> +++ b/tools/perf/util/hist.c
>> @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>> 	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
>> 	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
>> 	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
>> -	hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13);
>> +	hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13);
>> +
>> 	if (symbol_conf.nanosecs)
>> 		hists__new_col_len(hists, HISTC_TIME, 16);
>> 	else
>> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
>> index 5343b62476e6..2752ce681108 100644
>> --- a/tools/perf/util/hist.h
>> +++ b/tools/perf/util/hist.h
>> @@ -75,7 +75,8 @@ enum hist_column {
>> 	HISTC_MEM_BLOCKED,
>> 	HISTC_LOCAL_INS_LAT,
>> 	HISTC_GLOBAL_INS_LAT,
>> -	HISTC_P_STAGE_CYC,
>> +	HISTC_LOCAL_P_STAGE_CYC,
>> +	HISTC_GLOBAL_P_STAGE_CYC,
>> 	HISTC_NR_COLS, /* Last entry */
>> };
>> 
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index e9216a292a04..e978f7883e07 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -37,7 +37,7 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
>> const char	*parent_pattern = default_parent_pattern;
>> const char	*default_sort_order = "comm,dso,symbol";
>> const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
>> -const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
>> +const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc";
>> const char	default_top_sort_order[] = "dso,symbol";
>> const char	default_diff_sort_order[] = "dso,symbol";
>> const char	default_tracepoint_sort_order[] = "trace";
>> @@ -46,8 +46,8 @@ const char	*field_order;
>> regex_t		ignore_callees_regex;
>> int		have_ignore_callees = 0;
>> enum sort_mode	sort__mode = SORT_MODE__NORMAL;
>> -const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
>> -const char	*arch_specific_sort_keys[] = {"p_stage_cyc"};
>> +const char	*dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"};
> 
> so you also add global ins_lat, right? will this change
> some default behaviour?

Hi Jiri,

By default, if the architecture doesn’t have a dynamic header entry for this sort key ( taken care by arch_perf_header_entry() function ),
It will use the default se_header value in perf report. So default behaviour will not be changed.

> 
>> +const char	*arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
> 
> nit.. both dynamic_headers and arch_specific_sort_keys could be static right?

Sure, this is used only in util/sort.c 
I will add the change to make these static in next version.

Thanks
Athira
> 
> thanks,
> jirka
> 
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = {
>> };
>> 
>> static int64_t
>> -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>> {
>> 	return left->p_stage_cyc - right->p_stage_cyc;
>> }
>> 
>> +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> +					size_t size, unsigned int width)
>> +{
>> +	return repsep_snprintf(bf, size, "%-*u", width,
>> +			he->p_stage_cyc * he->stat.nr_events);
>> +}
>> +
>> +
>> static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>> 					size_t size, unsigned int width)
>> {
>> 	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
>> }
>> 
>> -struct sort_entry sort_p_stage_cyc = {
>> -	.se_header      = "Pipeline Stage Cycle",
>> -	.se_cmp         = sort__global_p_stage_cyc_cmp,
>> +struct sort_entry sort_local_p_stage_cyc = {
>> +	.se_header      = "Local Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> 	.se_snprintf	= hist_entry__p_stage_cyc_snprintf,
>> -	.se_width_idx	= HISTC_P_STAGE_CYC,
>> +	.se_width_idx	= HISTC_LOCAL_P_STAGE_CYC,
>> +};
>> +
>> +struct sort_entry sort_global_p_stage_cyc = {
>> +	.se_header      = "Pipeline Stage Cycle",
>> +	.se_cmp         = sort__p_stage_cyc_cmp,
>> +	.se_snprintf    = hist_entry__global_p_stage_cyc_snprintf,
>> +	.se_width_idx   = HISTC_GLOBAL_P_STAGE_CYC,
>> };
>> 
>> struct sort_entry sort_mem_daddr_sym = {
>> @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = {
>> 	DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>> 	DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>> 	DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
>> -	DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
>> +	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
>> +	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
>> };
>> 
>> #undef DIM
>> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
>> index 3c7518378d62..83abe5e6812a 100644
>> --- a/tools/perf/util/sort.h
>> +++ b/tools/perf/util/sort.h
>> @@ -235,7 +235,8 @@ enum sort_type {
>> 	SORT_CODE_PAGE_SIZE,
>> 	SORT_LOCAL_INS_LAT,
>> 	SORT_GLOBAL_INS_LAT,
>> -	SORT_PIPELINE_STAGE_CYC,
>> +	SORT_LOCAL_PIPELINE_STAGE_CYC,
>> +	SORT_GLOBAL_PIPELINE_STAGE_CYC,
>> 
>> 	/* branch stack specific sort keys */
>> 	__SORT_BRANCH_STACK,
>> -- 
>> 2.27.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-11-30 13:31 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-25  2:48 [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key Athira Rajeev
2021-11-25  2:48 ` Athira Rajeev
2021-11-25  2:48 ` [PATCH 2/2] tools/perf: Update global/local variants for p_stage_cyc in powerpc Athira Rajeev
2021-11-25  2:48   ` Athira Rajeev
2021-11-25  8:10 ` [PATCH 1/2] tools/perf: Include global and local variants for p_stage_cyc sort key Nageswara Sastry
2021-11-25  8:10   ` Nageswara Sastry
2021-11-28 16:34 ` Jiri Olsa
2021-11-28 16:34   ` Jiri Olsa
2021-11-29  9:13   ` Athira Rajeev
2021-11-29  9:13     ` Athira Rajeev
2021-11-29 16:25     ` Jiri Olsa
2021-11-29 16:25       ` Jiri Olsa
2021-11-29 17:11 ` Jiri Olsa
2021-11-29 17:11   ` Jiri Olsa
2021-11-30 13:29   ` Athira Rajeev
2021-11-30 13:29     ` Athira Rajeev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.