All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf: Make some functions generic
@ 2014-03-24 19:32 Don Zickus
  2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Don Zickus @ 2014-03-24 19:32 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, Don Zickus

This patch just converts some private functions into global ones
that can be used by other tools like the c2c tool I am trying to merge.

Don Zickus (4):
  perf: Allow ability to map cpus to nodes easily
  perf, kmem: Utilize the new generic cpunode_map
  perf, callchain: Add generic report parse callchain callback function
  perf, report: Use new generic report parse callchain callback

 tools/perf/builtin-kmem.c   |  78 +------------------------
 tools/perf/builtin-report.c |  77 +-----------------------
 tools/perf/util/callchain.c |  83 ++++++++++++++++++++++++++
 tools/perf/util/callchain.h |   1 +
 tools/perf/util/cpumap.c    | 139 ++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/cpumap.h    |  35 +++++++++++
 6 files changed, 262 insertions(+), 151 deletions(-)

-- 
1.7.11.7


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-03-24 19:32 [PATCH 0/4] perf: Make some functions generic Don Zickus
@ 2014-03-24 19:32 ` Don Zickus
  2014-03-29 17:10   ` Jiri Olsa
  2014-04-03  5:48   ` Namhyung Kim
  2014-03-24 19:32 ` [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map Don Zickus
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 22+ messages in thread
From: Don Zickus @ 2014-03-24 19:32 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, Don Zickus

This patch figures out the max number of cpus and nodes that are on the
system and creates a map of cpu to node.  This allows us to provide a cpu
and quickly get the node associated with it.

It was mostly copied from builtin-kmem.c and tweaked slightly to use less memory
(use possible cpus instead of max).  It also calculates the max number of nodes.

V3: simplify function names

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/cpumap.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/cpumap.h |  35 ++++++++++++
 2 files changed, 174 insertions(+)

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 7fe4994..2eb528e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -317,3 +317,142 @@ int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
 {
 	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
 }
+
+/* setup simple routines to easily access node numbers given a cpu number */
+static int __set_max_num(FILE *fp, int *max)
+{
+	int num;
+	char buf[256];
+
+	num = fread(&buf, 1, sizeof(buf), fp);
+	if (!num)
+		return -1;
+
+	buf[num] = '\0';
+
+	/* start on the right, to find highest node num */
+	while (--num) {
+		if ((buf[num] == ',') || (buf[num] == '-')) {
+			num++;
+			break;
+		}
+	}
+	if (sscanf(&buf[num], "%d", max) < 1)
+		return -1;
+
+	/* convert from 0-based to 1-based */
+	(*max)++;
+
+	return 0;
+}
+
+/* Determine highest possible cpu in the system for sparse allocation */
+static void set_max_cpu_num(void)
+{
+	FILE *fp;
+	int ret = -1;
+
+	/* set up default */
+	max_cpu_num = 4096;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	fp = fopen("/sys/devices/system/cpu/possible", "r");
+	if (!fp)
+		goto out;
+
+	ret = __set_max_num(fp, &max_cpu_num);
+
+	fclose(fp);
+
+out:
+	if (ret)
+		pr_err("Failed to read max cpus, using default of %d\n",
+			max_cpu_num);
+	return;
+}
+
+/* Determine highest possible node in the system for sparse allocation */
+static void set_max_node_num(void)
+{
+	FILE *fp;
+	int ret = -1;
+
+	/* set up default */
+	max_node_num = 8;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	fp = fopen("/sys/devices/system/node/possible", "r");
+	if (!fp)
+		goto out;
+
+	ret = __set_max_num(fp, &max_node_num);
+
+	fclose(fp);
+	
+out:
+	if (ret)
+		pr_err("Failed to read max nodes, using default of %d\n",
+			max_node_num);
+	return;
+}
+
+static int init_cpunode_map(void)
+{
+	int i;
+
+	set_max_cpu_num();
+	set_max_node_num();
+
+	cpunode_map = calloc(max_cpu_num, sizeof(int));
+	if (!cpunode_map) {
+		pr_err("%s: calloc failed\n", __func__);
+		goto out;
+	}
+
+	for (i = 0; i < max_cpu_num; i++)
+		cpunode_map[i] = -1;
+
+	return 0;
+out:
+	return -1;
+}
+
+#define PATH_SYS_NODE   "/sys/devices/system/node"
+
+int cpu__setup_cpunode_map(void)
+{
+	struct dirent *dent1, *dent2;
+	DIR *dir1, *dir2;
+	unsigned int cpu, mem;
+	char buf[PATH_MAX];
+
+	/* initialize globals */
+	if (init_cpunode_map())
+		return -1;
+
+	dir1 = opendir(PATH_SYS_NODE);
+	if (!dir1)
+		return 0;
+
+	/* walk tree and setup map */
+	while ((dent1 = readdir(dir1)) != NULL) {
+		if (dent1->d_type != DT_DIR ||
+		    sscanf(dent1->d_name, "node%u", &mem) < 1)
+			continue;
+
+		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+		dir2 = opendir(buf);
+		if (!dir2)
+			continue;
+		while ((dent2 = readdir(dir2)) != NULL) {
+			if (dent2->d_type != DT_LNK ||
+			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+				continue;
+			cpunode_map[cpu] = mem;
+		}
+		closedir(dir2);
+	}
+	closedir(dir1);
+	return 0;
+}
+
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index b123bb9..ce3c5a0 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -4,6 +4,9 @@
 #include <stdio.h>
 #include <stdbool.h>
 
+#include "perf.h"
+#include "util/debug.h"
+
 struct cpu_map {
 	int nr;
 	int map[];
@@ -46,4 +49,36 @@ static inline bool cpu_map__empty(const struct cpu_map *map)
 	return map ? map->map[0] == -1 : true;
 }
 
+int max_cpu_num;
+int max_node_num;
+int *cpunode_map;
+
+int cpu__setup_cpunode_map(void);
+
+static inline int cpu__max_node(void)
+{
+	if (unlikely(!max_node_num))
+		pr_debug("cpu_map not initiailzed\n");
+
+	return max_node_num;
+}
+
+static inline int cpu__max_cpu(void)
+{
+	if (unlikely(!max_cpu_num))
+		pr_debug("cpu_map not initiailzed\n");
+
+	return max_cpu_num;
+}
+
+static inline int cpu__get_node(int cpu)
+{
+	if (unlikely(cpunode_map == NULL)) {
+		pr_debug("cpu_map not initialized\n");
+		return -1;
+	}
+
+	return cpunode_map[cpu];
+}
+
 #endif /* __PERF_CPUMAP_H */
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map
  2014-03-24 19:32 [PATCH 0/4] perf: Make some functions generic Don Zickus
  2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
@ 2014-03-24 19:32 ` Don Zickus
  2014-03-29 17:10   ` Jiri Olsa
  2014-03-24 19:32 ` [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function Don Zickus
  2014-03-24 19:32 ` [PATCH 4/4] perf, report: Use new generic report parse callchain callback Don Zickus
  3 siblings, 1 reply; 22+ messages in thread
From: Don Zickus @ 2014-03-24 19:32 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, Don Zickus, Li Zefan

Use the previous patch implementation of cpunode_map for builtin-kmem.c
Should not be any functional difference.

Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/builtin-kmem.c | 78 ++---------------------------------------------
 1 file changed, 3 insertions(+), 75 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 929462a..a61783a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -14,6 +14,7 @@
 #include "util/parse-options.h"
 #include "util/trace-event.h"
 #include "util/data.h"
+#include "util/cpumap.h"
 
 #include "util/debug.h"
 
@@ -31,9 +32,6 @@ static int			caller_lines = -1;
 
 static bool			raw_ip;
 
-static int			*cpunode_map;
-static int			max_cpu_num;
-
 struct alloc_stat {
 	u64	call_site;
 	u64	ptr;
@@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
 static unsigned long total_requested, total_allocated;
 static unsigned long nr_allocs, nr_cross_allocs;
 
-#define PATH_SYS_NODE	"/sys/devices/system/node"
-
-static int init_cpunode_map(void)
-{
-	FILE *fp;
-	int i, err = -1;
-
-	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
-	if (!fp) {
-		max_cpu_num = 4096;
-		return 0;
-	}
-
-	if (fscanf(fp, "%d", &max_cpu_num) < 1) {
-		pr_err("Failed to read 'kernel_max' from sysfs");
-		goto out_close;
-	}
-
-	max_cpu_num++;
-
-	cpunode_map = calloc(max_cpu_num, sizeof(int));
-	if (!cpunode_map) {
-		pr_err("%s: calloc failed\n", __func__);
-		goto out_close;
-	}
-
-	for (i = 0; i < max_cpu_num; i++)
-		cpunode_map[i] = -1;
-
-	err = 0;
-out_close:
-	fclose(fp);
-	return err;
-}
-
-static int setup_cpunode_map(void)
-{
-	struct dirent *dent1, *dent2;
-	DIR *dir1, *dir2;
-	unsigned int cpu, mem;
-	char buf[PATH_MAX];
-
-	if (init_cpunode_map())
-		return -1;
-
-	dir1 = opendir(PATH_SYS_NODE);
-	if (!dir1)
-		return 0;
-
-	while ((dent1 = readdir(dir1)) != NULL) {
-		if (dent1->d_type != DT_DIR ||
-		    sscanf(dent1->d_name, "node%u", &mem) < 1)
-			continue;
-
-		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
-		dir2 = opendir(buf);
-		if (!dir2)
-			continue;
-		while ((dent2 = readdir(dir2)) != NULL) {
-			if (dent2->d_type != DT_LNK ||
-			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
-				continue;
-			cpunode_map[cpu] = mem;
-		}
-		closedir(dir2);
-	}
-	closedir(dir1);
-	return 0;
-}
-
 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
 			     int bytes_req, int bytes_alloc, int cpu)
 {
@@ -235,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
 	int ret = perf_evsel__process_alloc_event(evsel, sample);
 
 	if (!ret) {
-		int node1 = cpunode_map[sample->cpu],
+		int node1 = cpu__get_node(sample->cpu),
 		    node2 = perf_evsel__intval(evsel, sample, "node");
 
 		if (node1 != node2)
@@ -770,7 +698,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (!strncmp(argv[0], "rec", 3)) {
 		return __cmd_record(argc, argv);
 	} else if (!strcmp(argv[0], "stat")) {
-		if (setup_cpunode_map())
+		if (cpu__setup_cpunode_map())
 			return -1;
 
 		if (list_empty(&caller_sort))
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-03-24 19:32 [PATCH 0/4] perf: Make some functions generic Don Zickus
  2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
  2014-03-24 19:32 ` [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map Don Zickus
@ 2014-03-24 19:32 ` Don Zickus
  2014-03-29 17:11   ` Jiri Olsa
  2014-04-03  5:57   ` Namhyung Kim
  2014-03-24 19:32 ` [PATCH 4/4] perf, report: Use new generic report parse callchain callback Don Zickus
  3 siblings, 2 replies; 22+ messages in thread
From: Don Zickus @ 2014-03-24 19:32 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, Don Zickus

This takes the parse_callchain_opt function and copies it into the
callchain.c file.  Now the c2c tool can use it too without duplicating.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/callchain.h |  1 +
 2 files changed, 84 insertions(+)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8d9db45..2320678 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -25,6 +25,89 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
+int
+report_parse_callchain_opt(const char *arg)
+{
+	//struct report *rep = (struct report *)opt->value;
+	char *tok, *tok2;
+	char *endptr;
+
+	symbol_conf.use_callchain = true;
+
+	if (!arg)
+		return 0;
+
+	tok = strtok((char *)arg, ",");
+	if (!tok)
+		return -1;
+
+	/* get the output mode */
+	if (!strncmp(tok, "graph", strlen(arg)))
+		callchain_param.mode = CHAIN_GRAPH_ABS;
+
+	else if (!strncmp(tok, "flat", strlen(arg)))
+		callchain_param.mode = CHAIN_FLAT;
+
+	else if (!strncmp(tok, "fractal", strlen(arg)))
+		callchain_param.mode = CHAIN_GRAPH_REL;
+
+	else if (!strncmp(tok, "none", strlen(arg))) {
+		callchain_param.mode = CHAIN_NONE;
+		symbol_conf.use_callchain = false;
+
+		return 0;
+	}
+
+	else
+		return -1;
+
+	/* get the min percentage */
+	tok = strtok(NULL, ",");
+	if (!tok)
+		goto setup;
+
+	callchain_param.min_percent = strtod(tok, &endptr);
+	if (tok == endptr)
+		return -1;
+
+	/* get the print limit */
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+
+	if (tok2[0] != 'c') {
+		callchain_param.print_limit = strtoul(tok2, &endptr, 0);
+		tok2 = strtok(NULL, ",");
+		if (!tok2)
+			goto setup;
+	}
+
+	/* get the call chain order */
+	if (!strncmp(tok2, "caller", strlen("caller")))
+		callchain_param.order = ORDER_CALLER;
+	else if (!strncmp(tok2, "callee", strlen("callee")))
+		callchain_param.order = ORDER_CALLEE;
+	else
+		return -1;
+
+	/* Get the sort key */
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+	if (!strncmp(tok2, "function", strlen("function")))
+		callchain_param.key = CCKEY_FUNCTION;
+	else if (!strncmp(tok2, "address", strlen("address")))
+		callchain_param.key = CCKEY_ADDRESS;
+	else
+		return -1;
+setup:
+	if (callchain_register_param(&callchain_param) < 0) {
+		pr_err("Can't register callchain params\n");
+		return -1;
+	}
+	return 0;
+}
+
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 8ad97e9..0a828bb 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -157,4 +157,5 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
 
 extern const char record_callchain_help[];
+int report_parse_callchain_opt(const char *arg);
 #endif	/* __PERF_CALLCHAIN_H */
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/4] perf, report: Use new generic report parse callchain callback
  2014-03-24 19:32 [PATCH 0/4] perf: Make some functions generic Don Zickus
                   ` (2 preceding siblings ...)
  2014-03-24 19:32 ` [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function Don Zickus
@ 2014-03-24 19:32 ` Don Zickus
  3 siblings, 0 replies; 22+ messages in thread
From: Don Zickus @ 2014-03-24 19:32 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, Don Zickus

Use the new routine.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/builtin-report.c | 77 +--------------------------------------------
 1 file changed, 1 insertion(+), 76 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c8f2113..c87412b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -576,8 +576,6 @@ static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
 	struct report *rep = (struct report *)opt->value;
-	char *tok, *tok2;
-	char *endptr;
 
 	/*
 	 * --no-call-graph
@@ -587,80 +585,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 		return 0;
 	}
 
-	symbol_conf.use_callchain = true;
-
-	if (!arg)
-		return 0;
-
-	tok = strtok((char *)arg, ",");
-	if (!tok)
-		return -1;
-
-	/* get the output mode */
-	if (!strncmp(tok, "graph", strlen(arg)))
-		callchain_param.mode = CHAIN_GRAPH_ABS;
-
-	else if (!strncmp(tok, "flat", strlen(arg)))
-		callchain_param.mode = CHAIN_FLAT;
-
-	else if (!strncmp(tok, "fractal", strlen(arg)))
-		callchain_param.mode = CHAIN_GRAPH_REL;
-
-	else if (!strncmp(tok, "none", strlen(arg))) {
-		callchain_param.mode = CHAIN_NONE;
-		symbol_conf.use_callchain = false;
-
-		return 0;
-	}
-
-	else
-		return -1;
-
-	/* get the min percentage */
-	tok = strtok(NULL, ",");
-	if (!tok)
-		goto setup;
-
-	callchain_param.min_percent = strtod(tok, &endptr);
-	if (tok == endptr)
-		return -1;
-
-	/* get the print limit */
-	tok2 = strtok(NULL, ",");
-	if (!tok2)
-		goto setup;
-
-	if (tok2[0] != 'c') {
-		callchain_param.print_limit = strtoul(tok2, &endptr, 0);
-		tok2 = strtok(NULL, ",");
-		if (!tok2)
-			goto setup;
-	}
-
-	/* get the call chain order */
-	if (!strncmp(tok2, "caller", strlen("caller")))
-		callchain_param.order = ORDER_CALLER;
-	else if (!strncmp(tok2, "callee", strlen("callee")))
-		callchain_param.order = ORDER_CALLEE;
-	else
-		return -1;
-
-	/* Get the sort key */
-	tok2 = strtok(NULL, ",");
-	if (!tok2)
-		goto setup;
-	if (!strncmp(tok2, "function", strlen("function")))
-		callchain_param.key = CCKEY_FUNCTION;
-	else if (!strncmp(tok2, "address", strlen("address")))
-		callchain_param.key = CCKEY_ADDRESS;
-	else
-		return -1;
-setup:
-	if (callchain_register_param(&callchain_param) < 0) {
-		pr_err("Can't register callchain params\n");
-		return -1;
-	}
-	return 0;
+	return report_parse_callchain_opt(arg);
 }
 
 int
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
@ 2014-03-29 17:10   ` Jiri Olsa
  2014-04-01  2:39     ` Don Zickus
  2014-04-03  5:48   ` Namhyung Kim
  1 sibling, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2014-03-29 17:10 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles

On Mon, Mar 24, 2014 at 03:32:54PM -0400, Don Zickus wrote:
> This patch figures out the max number of cpus and nodes that are on the
> system and creates a map of cpu to node.  This allows us to provide a cpu
> and quickly get the node associated with it.
> 
> It was mostly copied from builtin-kmem.c and tweaked slightly to use less memory
> (use possible cpus instead of max).  It also calculates the max number of nodes.
> 
> V3: simplify function names

some nits below, but I think Arnaldo should
see/check the namespace ;-)

thanks,
jirka

> 
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/util/cpumap.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/cpumap.h |  35 ++++++++++++
>  2 files changed, 174 insertions(+)
> 
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 7fe4994..2eb528e 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -317,3 +317,142 @@ int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
>  {
>  	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
>  }
> +
> +/* setup simple routines to easily access node numbers given a cpu number */
> +static int __set_max_num(FILE *fp, int *max)
> +{
> +	int num;
> +	char buf[256];
> +
> +	num = fread(&buf, 1, sizeof(buf), fp);
> +	if (!num)
> +		return -1;
> +
> +	buf[num] = '\0';
> +
> +	/* start on the right, to find highest node num */
> +	while (--num) {
> +		if ((buf[num] == ',') || (buf[num] == '-')) {
> +			num++;
> +			break;
> +		}
> +	}
> +	if (sscanf(&buf[num], "%d", max) < 1)
> +		return -1;
> +
> +	/* convert from 0-based to 1-based */
> +	(*max)++;
> +
> +	return 0;
> +}
> +
> +/* Determine highest possible cpu in the system for sparse allocation */
> +static void set_max_cpu_num(void)
> +{
> +	FILE *fp;
> +	int ret = -1;
> +
> +	/* set up default */
> +	max_cpu_num = 4096;
> +
> +	/* get the highest possible cpu number for a sparse allocation */
> +	fp = fopen("/sys/devices/system/cpu/possible", "r");
> +	if (!fp)
> +		goto out;
> +
> +	ret = __set_max_num(fp, &max_cpu_num);
> +
> +	fclose(fp);
> +
> +out:
> +	if (ret)
> +		pr_err("Failed to read max cpus, using default of %d\n",
> +			max_cpu_num);
> +	return;
> +}
> +
> +/* Determine highest possible node in the system for sparse allocation */
> +static void set_max_node_num(void)
> +{
> +	FILE *fp;
> +	int ret = -1;
> +
> +	/* set up default */
> +	max_node_num = 8;
> +
> +	/* get the highest possible cpu number for a sparse allocation */
> +	fp = fopen("/sys/devices/system/node/possible", "r");
> +	if (!fp)
> +		goto out;

any reason why not put fopen/fclose into __set_max_num ?

also looks like 'get_max_num' would fit better for __set_max_num ;-)

> +
> +	ret = __set_max_num(fp, &max_node_num);
> +
> +	fclose(fp);
> +	

^^^ whitespace

> +out:
> +	if (ret)
> +		pr_err("Failed to read max nodes, using default of %d\n",
> +			max_node_num);
> +	return;
> +}
> +
> +static int init_cpunode_map(void)
> +{
> +	int i;
> +
> +	set_max_cpu_num();
> +	set_max_node_num();
> +
> +	cpunode_map = calloc(max_cpu_num, sizeof(int));
> +	if (!cpunode_map) {
> +		pr_err("%s: calloc failed\n", __func__);
> +		goto out;
> +	}
> +
> +	for (i = 0; i < max_cpu_num; i++)
> +		cpunode_map[i] = -1;
> +
> +	return 0;
> +out:
> +	return -1;
> +}
> +
> +#define PATH_SYS_NODE   "/sys/devices/system/node"
> +
> +int cpu__setup_cpunode_map(void)
> +{
> +	struct dirent *dent1, *dent2;
> +	DIR *dir1, *dir2;
> +	unsigned int cpu, mem;
> +	char buf[PATH_MAX];
> +
> +	/* initialize globals */
> +	if (init_cpunode_map())
> +		return -1;
> +
> +	dir1 = opendir(PATH_SYS_NODE);
> +	if (!dir1)
> +		return 0;
> +
> +	/* walk tree and setup map */
> +	while ((dent1 = readdir(dir1)) != NULL) {
> +		if (dent1->d_type != DT_DIR ||
> +		    sscanf(dent1->d_name, "node%u", &mem) < 1)
> +			continue;
> +
> +		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
> +		dir2 = opendir(buf);
> +		if (!dir2)
> +			continue;
> +		while ((dent2 = readdir(dir2)) != NULL) {
> +			if (dent2->d_type != DT_LNK ||
> +			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
> +				continue;
> +			cpunode_map[cpu] = mem;
> +		}
> +		closedir(dir2);
> +	}
> +	closedir(dir1);
> +	return 0;
> +}
> +

^^^ white space

> diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
> index b123bb9..ce3c5a0 100644
> --- a/tools/perf/util/cpumap.h
> +++ b/tools/perf/util/cpumap.h
> @@ -4,6 +4,9 @@
>  #include <stdio.h>
>  #include <stdbool.h>
>  
> +#include "perf.h"
> +#include "util/debug.h"
> +
>  struct cpu_map {
>  	int nr;
>  	int map[];
> @@ -46,4 +49,36 @@ static inline bool cpu_map__empty(const struct cpu_map *map)
>  	return map ? map->map[0] == -1 : true;
>  }
>  
> +int max_cpu_num;
> +int max_node_num;
> +int *cpunode_map;
> +
> +int cpu__setup_cpunode_map(void);
> +
> +static inline int cpu__max_node(void)
> +{
> +	if (unlikely(!max_node_num))
> +		pr_debug("cpu_map not initiailzed\n");

typo: initialized

> +
> +	return max_node_num;
> +}
> +
> +static inline int cpu__max_cpu(void)
> +{
> +	if (unlikely(!max_cpu_num))
> +		pr_debug("cpu_map not initiailzed\n");

ditto

> +
> +	return max_cpu_num;
> +}
> +
> +static inline int cpu__get_node(int cpu)
> +{
> +	if (unlikely(cpunode_map == NULL)) {
> +		pr_debug("cpu_map not initialized\n");

ditto

> +		return -1;
> +	}
> +
> +	return cpunode_map[cpu];
> +}
> +
>  #endif /* __PERF_CPUMAP_H */
> -- 
> 1.7.11.7
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map
  2014-03-24 19:32 ` [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map Don Zickus
@ 2014-03-29 17:10   ` Jiri Olsa
  2014-04-01  2:42     ` Don Zickus
  0 siblings, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2014-03-29 17:10 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles, Li Zefan

On Mon, Mar 24, 2014 at 03:32:55PM -0400, Don Zickus wrote:
> Use the previous patch implementation of cpunode_map for builtin-kmem.c
> Should not be any functional difference.
> 
> Cc: Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/builtin-kmem.c | 78 ++---------------------------------------------
>  1 file changed, 3 insertions(+), 75 deletions(-)
> 
> diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
> index 929462a..a61783a 100644
> --- a/tools/perf/builtin-kmem.c
> +++ b/tools/perf/builtin-kmem.c
> @@ -14,6 +14,7 @@
>  #include "util/parse-options.h"
>  #include "util/trace-event.h"
>  #include "util/data.h"
> +#include "util/cpumap.h"
>  
>  #include "util/debug.h"
>  
> @@ -31,9 +32,6 @@ static int			caller_lines = -1;
>  
>  static bool			raw_ip;
>  
> -static int			*cpunode_map;
> -static int			max_cpu_num;
> -
>  struct alloc_stat {
>  	u64	call_site;
>  	u64	ptr;
> @@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
>  static unsigned long total_requested, total_allocated;
>  static unsigned long nr_allocs, nr_cross_allocs;
>  
> -#define PATH_SYS_NODE	"/sys/devices/system/node"
> -
> -static int init_cpunode_map(void)
> -{
> -	FILE *fp;
> -	int i, err = -1;
> -
> -	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");

so the factored code from previous patches now reads
the max_cpu_num value from:
  /sys/devices/system/cpu/possible

is this intentional?

I think we want to have separate patches for code changes
and for changing the file with some comment.

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-03-24 19:32 ` [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function Don Zickus
@ 2014-03-29 17:11   ` Jiri Olsa
  2014-04-03  5:57   ` Namhyung Kim
  1 sibling, 0 replies; 22+ messages in thread
From: Jiri Olsa @ 2014-03-29 17:11 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles

On Mon, Mar 24, 2014 at 03:32:56PM -0400, Don Zickus wrote:
> This takes the parse_callchain_opt function and copies it into the
> callchain.c file.  Now the c2c tool can use it too without duplicating.
> 
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/callchain.h |  1 +
>  2 files changed, 84 insertions(+)
> 
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 8d9db45..2320678 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -25,6 +25,89 @@
>  
>  __thread struct callchain_cursor callchain_cursor;
>  
> +int
> +report_parse_callchain_opt(const char *arg)
> +{
> +	//struct report *rep = (struct report *)opt->value;

please erase ^^^

and I think it's better to merge this with the
actual removal in following patch:
    perf, report: Use new generic report parse callchain callback

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-03-29 17:10   ` Jiri Olsa
@ 2014-04-01  2:39     ` Don Zickus
  2014-04-06 12:19       ` Jiri Olsa
  0 siblings, 1 reply; 22+ messages in thread
From: Don Zickus @ 2014-04-01  2:39 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: acme, LKML, jmario, fowles

On Sat, Mar 29, 2014 at 06:10:33PM +0100, Jiri Olsa wrote:
> > +/* Determine highest possible node in the system for sparse allocation */
> > +static void set_max_node_num(void)
> > +{
> > +	FILE *fp;
> > +	int ret = -1;
> > +
> > +	/* set up default */
> > +	max_node_num = 8;
> > +
> > +	/* get the highest possible cpu number for a sparse allocation */
> > +	fp = fopen("/sys/devices/system/node/possible", "r");
> > +	if (!fp)
> > +		goto out;
> 
> any reason why not put fopen/fclose into __set_max_num ?

I either had to pass in a path string pointer or a file pointer, I just
chose a file pointer.  Thought it fit with the rest of perf passing FPs
around. :-D  It doesn't matter to me either way.

> 
> also looks like 'get_max_num' would fit better for __set_max_num ;-)

Yeah, I split it out based on your suggestion and didn't realize the
details kinda changed.

Cheers,
Don


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map
  2014-03-29 17:10   ` Jiri Olsa
@ 2014-04-01  2:42     ` Don Zickus
  2014-04-06 12:21       ` Jiri Olsa
  0 siblings, 1 reply; 22+ messages in thread
From: Don Zickus @ 2014-04-01  2:42 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: acme, LKML, jmario, fowles, Li Zefan

On Sat, Mar 29, 2014 at 06:10:47PM +0100, Jiri Olsa wrote:
> On Mon, Mar 24, 2014 at 03:32:55PM -0400, Don Zickus wrote:
> > Use the previous patch implementation of cpunode_map for builtin-kmem.c
> > Should not be any functional difference.
> > 
> > Cc: Li Zefan <lizf@cn.fujitsu.com>
> > Signed-off-by: Don Zickus <dzickus@redhat.com>
> > ---
> >  tools/perf/builtin-kmem.c | 78 ++---------------------------------------------
> >  1 file changed, 3 insertions(+), 75 deletions(-)
> > 
> > diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
> > index 929462a..a61783a 100644
> > --- a/tools/perf/builtin-kmem.c
> > +++ b/tools/perf/builtin-kmem.c
> > @@ -14,6 +14,7 @@
> >  #include "util/parse-options.h"
> >  #include "util/trace-event.h"
> >  #include "util/data.h"
> > +#include "util/cpumap.h"
> >  
> >  #include "util/debug.h"
> >  
> > @@ -31,9 +32,6 @@ static int			caller_lines = -1;
> >  
> >  static bool			raw_ip;
> >  
> > -static int			*cpunode_map;
> > -static int			max_cpu_num;
> > -
> >  struct alloc_stat {
> >  	u64	call_site;
> >  	u64	ptr;
> > @@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
> >  static unsigned long total_requested, total_allocated;
> >  static unsigned long nr_allocs, nr_cross_allocs;
> >  
> > -#define PATH_SYS_NODE	"/sys/devices/system/node"
> > -
> > -static int init_cpunode_map(void)
> > -{
> > -	FILE *fp;
> > -	int i, err = -1;
> > -
> > -	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
> 
> so the factored code from previous patches now reads
> the max_cpu_num value from:
>   /sys/devices/system/cpu/possible
> 
> is this intentional?

Yeah, I was trying to save bits.  No need to allocate for 5100 cpus when
only 128 are used.

> 
> I think we want to have separate patches for code changes
> and for changing the file with some comment.

So you want me to split the previous patch into two.  One for code
movement, the other for the path change?

Cheers,
Don

> 
> jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
  2014-03-29 17:10   ` Jiri Olsa
@ 2014-04-03  5:48   ` Namhyung Kim
  2014-04-06 12:15     ` Jiri Olsa
  1 sibling, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2014-04-03  5:48 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jolsa, jmario, fowles, bp

Hi Don,

(Adding Boris to CC as he might be interested)

On Mon, 24 Mar 2014 15:32:54 -0400, Don Zickus wrote:
> This patch figures out the max number of cpus and nodes that are on the
> system and creates a map of cpu to node.  This allows us to provide a cpu
> and quickly get the node associated with it.
>
> It was mostly copied from builtin-kmem.c and tweaked slightly to use less memory
> (use possible cpus instead of max).  It also calculates the max number of nodes.
>
> V3: simplify function names
>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/util/cpumap.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/cpumap.h |  35 ++++++++++++
>  2 files changed, 174 insertions(+)
>
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 7fe4994..2eb528e 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -317,3 +317,142 @@ int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
>  {
>  	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
>  }
> +
> +/* setup simple routines to easily access node numbers given a cpu number */
> +static int __set_max_num(FILE *fp, int *max)
> +{
> +	int num;
> +	char buf[256];
> +
> +	num = fread(&buf, 1, sizeof(buf), fp);
> +	if (!num)
> +		return -1;
> +
> +	buf[num] = '\0';
> +
> +	/* start on the right, to find highest node num */
> +	while (--num) {
> +		if ((buf[num] == ',') || (buf[num] == '-')) {
> +			num++;
> +			break;
> +		}
> +	}
> +	if (sscanf(&buf[num], "%d", max) < 1)
> +		return -1;
> +
> +	/* convert from 0-based to 1-based */
> +	(*max)++;
> +
> +	return 0;
> +}
> +
> +/* Determine highest possible cpu in the system for sparse allocation */
> +static void set_max_cpu_num(void)
> +{
> +	FILE *fp;
> +	int ret = -1;
> +
> +	/* set up default */
> +	max_cpu_num = 4096;
> +
> +	/* get the highest possible cpu number for a sparse allocation */
> +	fp = fopen("/sys/devices/system/cpu/possible", "r");

More generally, this sysfs access needs to check actual mountpoint using
sysfs__mountpoint() IMHO.

Also this API can be generalized like reading int value from a sysfs
file as the filename itself represents the content in most cases.

So how about changing this way?  It might reside on somewhere in tools/lib/api/fs/.

  max_cpu_num = sysfs__read_int("devices/system/cpu/possible");

  max_node_num = sysfs__read_int("devices/system/node/possible");


Hmm.. looking at the code, perf already has filename__read_{int,str} API
in util/util.c.  Maybe you can just use it instead.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-03-24 19:32 ` [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function Don Zickus
  2014-03-29 17:11   ` Jiri Olsa
@ 2014-04-03  5:57   ` Namhyung Kim
  2014-04-04 19:31     ` Don Zickus
  1 sibling, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2014-04-03  5:57 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jolsa, jmario, fowles

On Mon, 24 Mar 2014 15:32:56 -0400, Don Zickus wrote:
> This takes the parse_callchain_opt function and copies it into the
> callchain.c file.  Now the c2c tool can use it too without duplicating.
>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/callchain.h |  1 +
>  2 files changed, 84 insertions(+)
>
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 8d9db45..2320678 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -25,6 +25,89 @@
>  
>  __thread struct callchain_cursor callchain_cursor;
>  
> +int
> +report_parse_callchain_opt(const char *arg)

As it eliminated any dependency to the report code, the name can omit
'report' as well.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-04-03  5:57   ` Namhyung Kim
@ 2014-04-04 19:31     ` Don Zickus
  2014-04-06 12:11       ` Jiri Olsa
  0 siblings, 1 reply; 22+ messages in thread
From: Don Zickus @ 2014-04-04 19:31 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: acme, LKML, jolsa, jmario, fowles

On Thu, Apr 03, 2014 at 02:57:32PM +0900, Namhyung Kim wrote:
> On Mon, 24 Mar 2014 15:32:56 -0400, Don Zickus wrote:
> > This takes the parse_callchain_opt function and copies it into the
> > callchain.c file.  Now the c2c tool can use it too without duplicating.
> >
> > Signed-off-by: Don Zickus <dzickus@redhat.com>
> > ---
> >  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
> >  tools/perf/util/callchain.h |  1 +
> >  2 files changed, 84 insertions(+)
> >
> > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> > index 8d9db45..2320678 100644
> > --- a/tools/perf/util/callchain.c
> > +++ b/tools/perf/util/callchain.c
> > @@ -25,6 +25,89 @@
> >  
> >  __thread struct callchain_cursor callchain_cursor;
> >  
> > +int
> > +report_parse_callchain_opt(const char *arg)
> 
> As it eliminated any dependency to the report code, the name can omit
> 'report' as well.

Hmm, shrinking this down to 'parse_callchain_opt' seems to conflict with
builtin-top.c's defintion which is a wrapper around
record_parse_callchain_opt.

I can modify builtin-top.c's defintion to top_parse_callchain_opt, but do
we want to make the function parse_callchain_opt name more obvious that it is
for 'report' style parsing (which means it looks for the embedded
callchain flag)?  Vs. the record_parse_callchain_opt function, which is for
setting up the callchain flag?

Cheers,
Don

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-04-04 19:31     ` Don Zickus
@ 2014-04-06 12:11       ` Jiri Olsa
  2014-04-07  5:15         ` Namhyung Kim
  0 siblings, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2014-04-06 12:11 UTC (permalink / raw)
  To: Don Zickus; +Cc: Namhyung Kim, acme, LKML, jmario, fowles

On Fri, Apr 04, 2014 at 03:31:19PM -0400, Don Zickus wrote:
> On Thu, Apr 03, 2014 at 02:57:32PM +0900, Namhyung Kim wrote:
> > On Mon, 24 Mar 2014 15:32:56 -0400, Don Zickus wrote:
> > > This takes the parse_callchain_opt function and copies it into the
> > > callchain.c file.  Now the c2c tool can use it too without duplicating.
> > >
> > > Signed-off-by: Don Zickus <dzickus@redhat.com>
> > > ---
> > >  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
> > >  tools/perf/util/callchain.h |  1 +
> > >  2 files changed, 84 insertions(+)
> > >
> > > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> > > index 8d9db45..2320678 100644
> > > --- a/tools/perf/util/callchain.c
> > > +++ b/tools/perf/util/callchain.c
> > > @@ -25,6 +25,89 @@
> > >  
> > >  __thread struct callchain_cursor callchain_cursor;
> > >  
> > > +int
> > > +report_parse_callchain_opt(const char *arg)
> > 
> > As it eliminated any dependency to the report code, the name can omit
> > 'report' as well.
> 
> Hmm, shrinking this down to 'parse_callchain_opt' seems to conflict with
> builtin-top.c's defintion which is a wrapper around
> record_parse_callchain_opt.
> 
> I can modify builtin-top.c's defintion to top_parse_callchain_opt, but do
> we want to make the function parse_callchain_opt name more obvious that it is
> for 'report' style parsing (which means it looks for the embedded
> callchain flag)?  Vs. the record_parse_callchain_opt function, which is for
> setting up the callchain flag?

that sounds good to me.. (record|report)_parse_callchain_opt

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-04-03  5:48   ` Namhyung Kim
@ 2014-04-06 12:15     ` Jiri Olsa
  2014-04-07  5:28       ` Namhyung Kim
  0 siblings, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2014-04-06 12:15 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: Don Zickus, acme, LKML, jmario, fowles, bp

On Thu, Apr 03, 2014 at 02:48:51PM +0900, Namhyung Kim wrote:
> Hi Don,

SNIP

> 
> More generally, this sysfs access needs to check actual mountpoint using
> sysfs__mountpoint() IMHO.
> 
> Also this API can be generalized like reading int value from a sysfs
> file as the filename itself represents the content in most cases.
> 
> So how about changing this way?  It might reside on somewhere in tools/lib/api/fs/.
> 
>   max_cpu_num = sysfs__read_int("devices/system/cpu/possible");
> 
>   max_node_num = sysfs__read_int("devices/system/node/possible");
> 
> 
> Hmm.. looking at the code, perf already has filename__read_{int,str} API
> in util/util.c.  Maybe you can just use it instead.

I think those just read the file, while Don needs to parse
the map to get max cpu number

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-04-01  2:39     ` Don Zickus
@ 2014-04-06 12:19       ` Jiri Olsa
  0 siblings, 0 replies; 22+ messages in thread
From: Jiri Olsa @ 2014-04-06 12:19 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles

On Mon, Mar 31, 2014 at 10:39:44PM -0400, Don Zickus wrote:
> On Sat, Mar 29, 2014 at 06:10:33PM +0100, Jiri Olsa wrote:
> > > +/* Determine highest possible node in the system for sparse allocation */
> > > +static void set_max_node_num(void)
> > > +{
> > > +	FILE *fp;
> > > +	int ret = -1;
> > > +
> > > +	/* set up default */
> > > +	max_node_num = 8;
> > > +
> > > +	/* get the highest possible cpu number for a sparse allocation */
> > > +	fp = fopen("/sys/devices/system/node/possible", "r");
> > > +	if (!fp)
> > > +		goto out;
> > 
> > any reason why not put fopen/fclose into __set_max_num ?
> 
> I either had to pass in a path string pointer or a file pointer, I just
> chose a file pointer.  Thought it fit with the rest of perf passing FPs
> around. :-D  It doesn't matter to me either way.

well, if there's no need to call it extra then the function can do it

jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map
  2014-04-01  2:42     ` Don Zickus
@ 2014-04-06 12:21       ` Jiri Olsa
  2014-04-07 18:18         ` Don Zickus
  0 siblings, 1 reply; 22+ messages in thread
From: Jiri Olsa @ 2014-04-06 12:21 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles, Li Zefan

On Mon, Mar 31, 2014 at 10:42:30PM -0400, Don Zickus wrote:

SNIP

> > > -#define PATH_SYS_NODE	"/sys/devices/system/node"
> > > -
> > > -static int init_cpunode_map(void)
> > > -{
> > > -	FILE *fp;
> > > -	int i, err = -1;
> > > -
> > > -	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
> > 
> > so the factored code from previous patches now reads
> > the max_cpu_num value from:
> >   /sys/devices/system/cpu/possible
> > 
> > is this intentional?
> 
> Yeah, I was trying to save bits.  No need to allocate for 5100 cpus when
> only 128 are used.
> 
> > 
> > I think we want to have separate patches for code changes
> > and for changing the file with some comment.
> 
> So you want me to split the previous patch into two.  One for code
> movement, the other for the path change?

I think thats the proper way.. single logic change for patch

thanks,
jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-04-06 12:11       ` Jiri Olsa
@ 2014-04-07  5:15         ` Namhyung Kim
  2014-04-07  7:01           ` Jiri Olsa
  0 siblings, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2014-04-07  5:15 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Don Zickus, acme, LKML, jmario, fowles

Hi Jiri and Don,

On Sun, 6 Apr 2014 14:11:29 +0200, Jiri Olsa wrote:
> On Fri, Apr 04, 2014 at 03:31:19PM -0400, Don Zickus wrote:
>> On Thu, Apr 03, 2014 at 02:57:32PM +0900, Namhyung Kim wrote:
>> > On Mon, 24 Mar 2014 15:32:56 -0400, Don Zickus wrote:
>> > > This takes the parse_callchain_opt function and copies it into the
>> > > callchain.c file.  Now the c2c tool can use it too without duplicating.
>> > >
>> > > Signed-off-by: Don Zickus <dzickus@redhat.com>
>> > > ---
>> > >  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
>> > >  tools/perf/util/callchain.h |  1 +
>> > >  2 files changed, 84 insertions(+)
>> > >
>> > > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
>> > > index 8d9db45..2320678 100644
>> > > --- a/tools/perf/util/callchain.c
>> > > +++ b/tools/perf/util/callchain.c
>> > > @@ -25,6 +25,89 @@
>> > >  
>> > >  __thread struct callchain_cursor callchain_cursor;
>> > >  
>> > > +int
>> > > +report_parse_callchain_opt(const char *arg)
>> > 
>> > As it eliminated any dependency to the report code, the name can omit
>> > 'report' as well.
>> 
>> Hmm, shrinking this down to 'parse_callchain_opt' seems to conflict with
>> builtin-top.c's defintion which is a wrapper around
>> record_parse_callchain_opt.
>> 
>> I can modify builtin-top.c's defintion to top_parse_callchain_opt, but do
>> we want to make the function parse_callchain_opt name more obvious that it is
>> for 'report' style parsing (which means it looks for the embedded
>> callchain flag)?  Vs. the record_parse_callchain_opt function, which is for
>> setting up the callchain flag?
>
> that sounds good to me.. (record|report)_parse_callchain_opt

Agreed.  Maybe parse_callchain_(record|report)_opt?

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-04-06 12:15     ` Jiri Olsa
@ 2014-04-07  5:28       ` Namhyung Kim
  2014-04-07 18:20         ` Don Zickus
  0 siblings, 1 reply; 22+ messages in thread
From: Namhyung Kim @ 2014-04-07  5:28 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Don Zickus, acme, LKML, jmario, fowles, bp

On Sun, 6 Apr 2014 14:15:46 +0200, Jiri Olsa wrote:
> On Thu, Apr 03, 2014 at 02:48:51PM +0900, Namhyung Kim wrote:
>> More generally, this sysfs access needs to check actual mountpoint using
>> sysfs__mountpoint() IMHO.
>> 
>> Also this API can be generalized like reading int value from a sysfs
>> file as the filename itself represents the content in most cases.
>> 
>> So how about changing this way?  It might reside on somewhere in tools/lib/api/fs/.
>> 
>>   max_cpu_num = sysfs__read_int("devices/system/cpu/possible");
>> 
>>   max_node_num = sysfs__read_int("devices/system/node/possible");
>> 
>> 
>> Hmm.. looking at the code, perf already has filename__read_{int,str} API
>> in util/util.c.  Maybe you can just use it instead.
>
> I think those just read the file, while Don needs to parse
> the map to get max cpu number

Ah, missed that... sorry for noise.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function
  2014-04-07  5:15         ` Namhyung Kim
@ 2014-04-07  7:01           ` Jiri Olsa
  0 siblings, 0 replies; 22+ messages in thread
From: Jiri Olsa @ 2014-04-07  7:01 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: Don Zickus, acme, LKML, jmario, fowles

On Mon, Apr 07, 2014 at 02:15:21PM +0900, Namhyung Kim wrote:
> Hi Jiri and Don,
> 
> On Sun, 6 Apr 2014 14:11:29 +0200, Jiri Olsa wrote:
> > On Fri, Apr 04, 2014 at 03:31:19PM -0400, Don Zickus wrote:
> >> On Thu, Apr 03, 2014 at 02:57:32PM +0900, Namhyung Kim wrote:
> >> > On Mon, 24 Mar 2014 15:32:56 -0400, Don Zickus wrote:
> >> > > This takes the parse_callchain_opt function and copies it into the
> >> > > callchain.c file.  Now the c2c tool can use it too without duplicating.
> >> > >
> >> > > Signed-off-by: Don Zickus <dzickus@redhat.com>
> >> > > ---
> >> > >  tools/perf/util/callchain.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
> >> > >  tools/perf/util/callchain.h |  1 +
> >> > >  2 files changed, 84 insertions(+)
> >> > >
> >> > > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> >> > > index 8d9db45..2320678 100644
> >> > > --- a/tools/perf/util/callchain.c
> >> > > +++ b/tools/perf/util/callchain.c
> >> > > @@ -25,6 +25,89 @@
> >> > >  
> >> > >  __thread struct callchain_cursor callchain_cursor;
> >> > >  
> >> > > +int
> >> > > +report_parse_callchain_opt(const char *arg)
> >> > 
> >> > As it eliminated any dependency to the report code, the name can omit
> >> > 'report' as well.
> >> 
> >> Hmm, shrinking this down to 'parse_callchain_opt' seems to conflict with
> >> builtin-top.c's defintion which is a wrapper around
> >> record_parse_callchain_opt.
> >> 
> >> I can modify builtin-top.c's defintion to top_parse_callchain_opt, but do
> >> we want to make the function parse_callchain_opt name more obvious that it is
> >> for 'report' style parsing (which means it looks for the embedded
> >> callchain flag)?  Vs. the record_parse_callchain_opt function, which is for
> >> setting up the callchain flag?
> >
> > that sounds good to me.. (record|report)_parse_callchain_opt
> 
> Agreed.  Maybe parse_callchain_(record|report)_opt?

ook, jirka

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map
  2014-04-06 12:21       ` Jiri Olsa
@ 2014-04-07 18:18         ` Don Zickus
  0 siblings, 0 replies; 22+ messages in thread
From: Don Zickus @ 2014-04-07 18:18 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: acme, LKML, jmario, fowles, Li Zefan

On Sun, Apr 06, 2014 at 02:21:26PM +0200, Jiri Olsa wrote:
> On Mon, Mar 31, 2014 at 10:42:30PM -0400, Don Zickus wrote:
> 
> SNIP
> 
> > > > -#define PATH_SYS_NODE	"/sys/devices/system/node"
> > > > -
> > > > -static int init_cpunode_map(void)
> > > > -{
> > > > -	FILE *fp;
> > > > -	int i, err = -1;
> > > > -
> > > > -	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
> > > 
> > > so the factored code from previous patches now reads
> > > the max_cpu_num value from:
> > >   /sys/devices/system/cpu/possible
> > > 
> > > is this intentional?
> > 
> > Yeah, I was trying to save bits.  No need to allocate for 5100 cpus when
> > only 128 are used.
> > 
> > > 
> > > I think we want to have separate patches for code changes
> > > and for changing the file with some comment.
> > 
> > So you want me to split the previous patch into two.  One for code
> > movement, the other for the path change?
> 
> I think thats the proper way.. single logic change for patch

Hmm, the whole change is getting completely messy with the breakdown of
common functions, renames, use of internal functions, etc...  I'll just
break out the small change (kernel_max -> possible) from that mess.

Cheers,
Don

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] perf: Allow ability to map cpus to nodes easily
  2014-04-07  5:28       ` Namhyung Kim
@ 2014-04-07 18:20         ` Don Zickus
  0 siblings, 0 replies; 22+ messages in thread
From: Don Zickus @ 2014-04-07 18:20 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: Jiri Olsa, acme, LKML, jmario, fowles, bp

On Mon, Apr 07, 2014 at 02:28:35PM +0900, Namhyung Kim wrote:
> On Sun, 6 Apr 2014 14:15:46 +0200, Jiri Olsa wrote:
> > On Thu, Apr 03, 2014 at 02:48:51PM +0900, Namhyung Kim wrote:
> >> More generally, this sysfs access needs to check actual mountpoint using
> >> sysfs__mountpoint() IMHO.
> >> 
> >> Also this API can be generalized like reading int value from a sysfs
> >> file as the filename itself represents the content in most cases.
> >> 
> >> So how about changing this way?  It might reside on somewhere in tools/lib/api/fs/.
> >> 
> >>   max_cpu_num = sysfs__read_int("devices/system/cpu/possible");
> >> 
> >>   max_node_num = sysfs__read_int("devices/system/node/possible");
> >> 
> >> 
> >> Hmm.. looking at the code, perf already has filename__read_{int,str} API
> >> in util/util.c.  Maybe you can just use it instead.
> >
> > I think those just read the file, while Don needs to parse
> > the map to get max cpu number
> 
> Ah, missed that... sorry for noise.

Well, I already implemented using filename__read_str.  It is just added
overhead, but the end result is the same, a string needs to be read from
the file.  I just had to remember to free the memory at the end of the
function.

Cheers,
Don

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2014-04-07 18:20 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-24 19:32 [PATCH 0/4] perf: Make some functions generic Don Zickus
2014-03-24 19:32 ` [PATCH 1/4] perf: Allow ability to map cpus to nodes easily Don Zickus
2014-03-29 17:10   ` Jiri Olsa
2014-04-01  2:39     ` Don Zickus
2014-04-06 12:19       ` Jiri Olsa
2014-04-03  5:48   ` Namhyung Kim
2014-04-06 12:15     ` Jiri Olsa
2014-04-07  5:28       ` Namhyung Kim
2014-04-07 18:20         ` Don Zickus
2014-03-24 19:32 ` [PATCH 2/4] perf, kmem: Utilize the new generic cpunode_map Don Zickus
2014-03-29 17:10   ` Jiri Olsa
2014-04-01  2:42     ` Don Zickus
2014-04-06 12:21       ` Jiri Olsa
2014-04-07 18:18         ` Don Zickus
2014-03-24 19:32 ` [PATCH 3/4] perf, callchain: Add generic report parse callchain callback function Don Zickus
2014-03-29 17:11   ` Jiri Olsa
2014-04-03  5:57   ` Namhyung Kim
2014-04-04 19:31     ` Don Zickus
2014-04-06 12:11       ` Jiri Olsa
2014-04-07  5:15         ` Namhyung Kim
2014-04-07  7:01           ` Jiri Olsa
2014-03-24 19:32 ` [PATCH 4/4] perf, report: Use new generic report parse callchain callback Don Zickus

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.