linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events
@ 2013-03-13  9:42 chenggang
  2013-03-13  9:42 ` [PATCH v3 7/8]Perf: changed the method to traverse mmap list chenggang
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Many applications will fork threads on-the-fly, these threads could exit before
the main thread exit. The perf top tool should perceive the new forked threads
while we profile a special application.
If the target process fork a thread or a thread exit, we will get a PERF_RECORD_FORK
 or PERF_RECORD_EXIT events. The following callback functions can process these events.
1) perf_top__process_event_fork()
   Open a new fd for the new forked, and expend the related data structures.
2) perf_top__process_event_exit()
   Close the fd of exit threadsd, and destroy the nodes in the related data structures.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/builtin-top.c |  109 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index cff58e5..a591b96 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -800,7 +800,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	return;
 }
 
-static void perf_top__mmap_read_idx(struct perf_top *top, struct perf_mmap *md)
+static int perf_top__mmap_read_idx(struct perf_top *top, struct perf_mmap *md,
+				    int idx)
 {
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
@@ -825,6 +826,20 @@ static void perf_top__mmap_read_idx(struct perf_top *top, struct perf_mmap *md)
 		if (event->header.type == PERF_RECORD_SAMPLE)
 			++top->samples;
 
+		if (cpu_map__all(top->evlist->cpus) &&
+		    event->header.type == PERF_RECORD_FORK)
+			(&top->tool)->fork(&top->tool, event, &sample, NULL);
+
+		if (cpu_map__all(top->evlist->cpus) &&
+		    event->header.type == PERF_RECORD_EXIT) {
+			int tidx;
+
+			tidx = (&top->tool)->exit(&top->tool, event,
+						  &sample, NULL);
+			if (tidx == idx)
+				return -1;
+		}
+
 		switch (origin) {
 		case PERF_RECORD_MISC_USER:
 			++top->us_samples;
@@ -863,14 +878,18 @@ static void perf_top__mmap_read_idx(struct perf_top *top, struct perf_mmap *md)
 		} else
 			++session->stats.nr_unknown_events;
 	}
+	return 0;
 }
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
 	struct perf_mmap *md;
+	int i = 0;
 
 	for_each_mmap(md, top->evlist) {
-		perf_top__mmap_read_idx(top, md);
+		if (perf_top__mmap_read_idx(top, md, i) == -1)
+			break;
+		i++;
 	}
 }
 
@@ -1025,11 +1044,97 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return record_parse_callchain_opt(opt, arg, unset);
 }
 
+static int perf_top__append_thread(struct perf_top *top, pid_t pid)
+{
+	char msg[512];
+	struct perf_evsel *counter, *counter_err;
+	struct perf_evlist *evlist = top->evlist;
+	struct cpu_map *cpus = evlist->cpus;
+
+	counter_err = list_entry(evlist->entries.prev, struct perf_evsel, node);
+
+	list_for_each_entry(counter, &evlist->entries, node) {
+		if (perf_evsel__open_single_thread(counter, cpus, pid) < 0) {
+			if (verbose) {
+				perf_evsel__open_strerror(counter,
+							  &top->record_opts.target,
+							  errno, msg, sizeof(msg));
+				ui__warning("%s\n", msg);
+			}
+			counter_err = counter;
+			goto close_opened_fd;
+		}
+	}
+
+	if (perf_evlist__mmap_thread(evlist, false) < 0)
+		goto close_opened_fd;
+
+	return 0;
+
+close_opened_fd:
+	list_for_each_entry(counter, &evlist->entries, node) {
+		perf_evsel__close_single_thread(counter, cpus->nr, -1);
+		if (counter == counter_err)
+			break;
+	}
+	return -1;
+}
+
+static int perf_top__process_event_fork(struct perf_tool *tool __maybe_unused,
+					union perf_event *event __maybe_unused,
+					struct perf_sample *sample __maybe_unused,
+					struct machine *machine __maybe_unused)
+{
+	pid_t tid = event->fork.tid;
+	struct perf_top *top = container_of(tool, struct perf_top, tool);
+	struct thread_map *threads = top->evlist->threads;
+	int ret;
+
+	ret = thread_map__append(threads, tid); 
+	if (ret != 0)                   
+		return ret;             
+
+	if (perf_top__append_thread(top, tid) < 0)
+		goto free_new_thread;
+
+	return 0; 
+
+free_new_thread:
+	thread_map__remove(threads, -1);
+	return -1;
+}
+
+static int perf_top__process_event_exit(struct perf_tool *tool __maybe_unused,
+					union perf_event *event __maybe_unused,
+					struct perf_sample *sample __maybe_unused,
+					struct machine *machine __maybe_unused)
+{
+	pid_t tid = event->fork.tid;
+	struct perf_top *top = container_of(tool, struct perf_top, tool);
+	struct perf_evsel *evsel;
+	struct thread_map *threads = top->evlist->threads;
+	int tidx = thread_map__get_idx_by_pid(threads, tid);
+
+	if (tidx < 0)
+		return -1;
+
+	perf_evlist__munmap_thread(top->evlist, tidx);
+	list_for_each_entry(evsel, &top->evlist->entries, node) {
+		perf_evsel__close_single_thread(evsel, top->evlist->cpus->nr, tidx);
+	}
+	thread_map__remove(threads, tidx);
+	return tidx;
+}
+
 int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int status;
 	char errbuf[BUFSIZ];
 	struct perf_top top = {
+		.tool = {
+			.fork		= perf_top__process_event_fork,
+			.exit		= perf_top__process_event_exit,
+		},
 		.count_filter	     = 5,
 		.delay_secs	     = 2,
 		.record_opts = {
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 7/8]Perf: changed the method to traverse mmap list
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-13  9:42 ` [PATCH v3 6/8]Perf: Add extend mechanism for mmap & pollfd chenggang
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Changed the method to traverse the evlist->mmap list. The evlist->mmap
list is traversed very frequently. So we need to be more efficient to do
it.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/builtin-top.c                  |   11 ++++++-----
 tools/perf/tests/mmap-basic.c             |    4 +++-
 tools/perf/tests/open-syscall-tp-fields.c |    7 ++++---
 tools/perf/tests/perf-record.c            |    7 ++++---
 tools/perf/util/evlist.c                  |    4 ++--
 tools/perf/util/evlist.h                  |    3 ++-
 tools/perf/util/python.c                  |    4 +++-
 7 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 72f6eb7..cff58e5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -800,7 +800,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	return;
 }
 
-static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
+static void perf_top__mmap_read_idx(struct perf_top *top, struct perf_mmap *md)
 {
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
@@ -810,7 +810,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	u8 origin;
 	int ret;
 
-	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
+	while ((event = perf_evlist__mmap_read(top->evlist, md)) != NULL) {
 		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
 		if (ret) {
 			pr_err("Can't parse sample, err = %d\n", ret);
@@ -867,10 +867,11 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
-	int i;
+	struct perf_mmap *md;
 
-	for (i = 0; i < top->evlist->nr_mmaps; i++)
-		perf_top__mmap_read_idx(top, i);
+	for_each_mmap(md, top->evlist) {
+		perf_top__mmap_read_idx(top, md);
+	}
 }
 
 static int perf_top__start_counters(struct perf_top *top)
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index cdd5075..93639a8 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -19,6 +19,7 @@ int test__basic_mmap(void)
 {
 	int err = -1;
 	union perf_event *event;
+	struct perf_mmap *md;
 	struct thread_map *threads;
 	struct cpu_map *cpus;
 	struct perf_evlist *evlist;
@@ -97,7 +98,8 @@ int test__basic_mmap(void)
 			++foo;
 		}
 
-	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
+	md = perf_evlist__get_mmap(evlist, 0);
+	while ((event = perf_evlist__mmap_read(evlist, md)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE) {
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
index 39eb770..cb12e82 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -20,7 +20,7 @@ int test__syscall_open_tp_fields(void)
 	int flags = O_RDONLY | O_DIRECTORY;
 	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
 	struct perf_evsel *evsel;
-	int err = -1, i, nr_events = 0, nr_polls = 0;
+	int err = -1, nr_events = 0, nr_polls = 0;
 
 	if (evlist == NULL) {
 		pr_debug("%s: perf_evlist__new\n", __func__);
@@ -66,11 +66,12 @@ int test__syscall_open_tp_fields(void)
 
 	while (1) {
 		int before = nr_events;
+		struct perf_mmap *md;
 
-		for (i = 0; i < evlist->nr_mmaps; i++) {
+		for_each_mmap(md, evlist) {
 			union perf_event *event;
 
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			while ((event = perf_evlist__mmap_read(evlist, md)) != NULL) {
 				const u32 type = event->header.type;
 				int tp_flags;
 				struct perf_sample sample;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 1e8e512..8aef6d2 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -56,7 +56,7 @@ int test__PERF_RECORD(void)
 	     found_libc_mmap = false,
 	     found_vdso_mmap = false,
 	     found_ld_mmap = false;
-	int err = -1, errs = 0, i, wakeups = 0;
+	int err = -1, errs = 0, wakeups = 0;
 	u32 cpu;
 	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
 
@@ -158,11 +158,12 @@ int test__PERF_RECORD(void)
 
 	while (1) {
 		int before = total_events;
+		struct perf_mmap *md;
 
-		for (i = 0; i < evlist->nr_mmaps; i++) {
+		for_each_mmap(md, evlist) {
 			union perf_event *event;
 
-			while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			while ((event = perf_evlist__mmap_read(evlist, md)) != NULL) {
 				const u32 type = event->header.type;
 				const char *name = perf_event__name(type);
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 74af9bb..25c3cb9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -362,9 +362,9 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 	return NULL;
 }
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist,
+					 struct perf_mmap *md)
 {
-	struct perf_mmap *md = perf_evlist__get_mmap(evlist, idx);
 	unsigned int head = perf_mmap__read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8693c11..1c74fa0 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -80,7 +80,8 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
 
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *self,
+					 struct perf_mmap *md);
 
 int perf_evlist__open(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index e3f3f1b..91b00e8 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -789,6 +789,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 {
 	struct perf_evlist *evlist = &pevlist->evlist;
 	union perf_event *event;
+	struct perf_mmap *md;
 	int sample_id_all = 1, cpu;
 	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
 	int err;
@@ -797,7 +798,8 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 					 &cpu, &sample_id_all))
 		return NULL;
 
-	event = perf_evlist__mmap_read(evlist, cpu);
+	md = perf_evlist__get_mmap(evlist, cpu);
+	event = perf_evlist__mmap_read(evlist, md);
 	if (event != NULL) {
 		PyObject *pyevent = pyrf_event__new(event);
 		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 6/8]Perf: Add extend mechanism for mmap & pollfd.
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
  2013-03-13  9:42 ` [PATCH v3 7/8]Perf: changed the method to traverse mmap list chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-13  9:42 ` [PATCH v3 5/8]Perf: add extend mechanism for evsel->id & evsel->fd chenggang
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Add extend mechanism for mmap & pollfd. Then we can adjust them while threads
are forked or exited.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/util/evlist.c |  151 +++++++++++++++++++++++++++++++++++++++++++++-
 tools/perf/util/evlist.h |    3 +
 tools/perf/util/evsel.c  |    7 ++-
 3 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c1cd8f9..74af9bb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,7 +85,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
 
 void perf_evlist__exit(struct perf_evlist *evlist)
 {
-	free(evlist->mmap);
+	xyarray__delete(evlist->mmap);
 	free(evlist->pollfd);
 	evlist->mmap = NULL;
 	evlist->pollfd = NULL;
@@ -256,6 +256,32 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 	}
 }
 
+/*
+ * If threads->nr > 1, the cpu_map__nr() must be 1.
+ * If the cpu_map__nr() > 1, we should not append pollfd.
+ */
+static int perf_evlist__extend_pollfd(struct perf_evlist *evlist)
+{
+	int new_nfds;
+
+	if (cpu_map__all(evlist->cpus)) {
+		struct pollfd *pfd;
+
+		new_nfds = evlist->threads->nr * evlist->nr_entries;
+		pfd = zalloc(sizeof(struct pollfd) * new_nfds);
+
+		if (!pfd)
+			return -1;
+
+		memcpy(pfd, evlist->pollfd, (evlist->threads->nr - 1) * evlist->nr_entries);
+
+		evlist->pollfd = pfd;
+		return 0;
+	}
+
+	return 1;
+}
+
 static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 {
 	int nfds = cpu_map__nr(evlist->cpus) * evlist->threads->nr * evlist->nr_entries;
@@ -416,6 +442,20 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
 	evlist->mmap = NULL;
 }
 
+static struct perf_mmap * perf_evlist__extend_mmap(struct perf_evlist *evlist)
+{
+	struct perf_mmap **new_mmap = NULL;
+
+	new_mmap = (struct perf_mmap **)xyarray__append(evlist->mmap, NULL);
+
+	if (new_mmap != NULL) {
+		evlist->nr_mmaps++;
+		return *new_mmap;
+	}
+
+	return NULL;
+}
+
 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 {
 	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
@@ -433,7 +473,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
 	pmmap->prev = 0;
 	pmmap->mask = mask;
 	pmmap->base = mmap(NULL, evlist->mmap_len, prot,
-				      MAP_SHARED, fd, 0);
+			   MAP_SHARED, fd, 0);
 	if (pmmap->base == MAP_FAILED) {
 		pmmap->base = NULL;
 		return -1;
@@ -527,6 +567,111 @@ out_unmap:
 	return -1;
 }
 
+int perf_evlist__mmap_thread(struct perf_evlist *evlist, bool overwrite)
+{
+	struct perf_evsel *evsel;
+	int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+	int mask = evlist->mmap_len - page_size -1;
+	int output = -1;
+	struct pollfd *old_pollfd = evlist->pollfd;
+	struct perf_mmap *pmmap;
+
+	if (!cpu_map__all(evlist->cpus))
+		return 1;
+
+	if ((pmmap = perf_evlist__extend_mmap(evlist)) == NULL)
+		return -ENOMEM;
+
+	if (perf_evlist__extend_pollfd(evlist) < 0)
+		goto free_append_mmap;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		if (evsel->attr.read_format & PERF_FORMAT_ID) {
+			if (perf_evsel__extend_id(evsel) < 0)
+				goto free_append_pollfd;
+		}
+	}
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		int fd = FD(evsel, 0, -1);
+
+		if (output == -1) {
+			output = fd;
+
+			pmmap->prev = 0;
+			pmmap->mask = mask;
+			pmmap->base = mmap(NULL, evlist->mmap_len, prot,
+					   MAP_SHARED, fd, 0);
+
+			if (pmmap->base == MAP_FAILED) {
+				pmmap->base = NULL;
+				goto out_unmap;
+			}
+			perf_evlist__add_pollfd(evlist, fd);
+		} else {
+			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
+				goto out_unmap;
+		}
+		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+		    perf_evlist__id_add_fd(evlist, evsel, 0, -1, fd) < 0)
+			goto out_unmap;
+	}
+
+	free(old_pollfd);
+	return 0;
+out_unmap:
+	pmmap = perf_evlist__get_mmap(evlist, -1);
+
+	if (pmmap->base != NULL) {
+		munmap(pmmap->base, evlist->mmap_len);
+		pmmap->base = NULL;
+	}
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		xyarray__remove(evsel->id, -1);
+		xyarray__remove(evsel->sample_id, -1);
+	}
+
+free_append_pollfd:
+	free(evlist->pollfd);
+	evlist->pollfd = old_pollfd;
+
+free_append_mmap:
+	xyarray__remove(evlist->mmap, -1);
+	return -1;
+}
+
+void perf_evlist__munmap_thread(struct perf_evlist *evlist, int tidx)
+{
+	struct perf_evsel *evsel;
+	struct pollfd *pfd;
+	struct perf_mmap *pmmap = perf_evlist__get_mmap(evlist, tidx);
+	int old_nfds = evlist->threads->nr * evlist->nr_entries;
+	int new_nfds = (evlist->threads->nr -1 ) * evlist->nr_entries;
+
+	if (pmmap->base != NULL) {
+		munmap(pmmap->base, evlist->mmap_len);
+		evlist->nr_mmaps--;
+		pmmap->base = NULL;
+		xyarray__remove(evlist->mmap, tidx);
+	}
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		perf_evsel__remove_id(evsel, tidx);
+	}
+
+	pfd = zalloc(new_nfds * sizeof(struct pollfd));
+	memcpy(pfd, evlist->pollfd, tidx * evlist->nr_entries * sizeof(struct pollfd));
+	memcpy(pfd + (tidx * evlist->nr_entries),
+	       evlist->pollfd + (tidx + 1) * evlist->nr_entries,
+	       old_nfds - (tidx + 1) * evlist->nr_entries);
+
+	evlist->nr_fds--;
+
+	free(evlist->pollfd);
+	evlist->pollfd = pfd;
+}
+
 /** perf_evlist__mmap - Create per cpu maps to receive events
  *
  * @evlist - list of events
@@ -580,7 +725,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
-struct perf_mmap *perf_evlist__get_mmap(struct perf_evlist *evlist, int idx) 
+struct perf_mmap *perf_evlist__get_mmap(struct perf_evlist *evlist, int idx)
 {
 	return (struct perf_mmap *)xyarray__entry(evlist->mmap, 0, idx);
 }
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index eb22e49..8693c11 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -96,6 +96,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
+int perf_evlist__mmap_thread(struct perf_evlist *evlist, bool overwrite);
+void perf_evlist__munmap_thread(struct perf_evlist *evlist, int tidx);
+
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2eb75f9..5671ee9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -720,7 +720,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
 	assert(list_empty(&evsel->node));
 	xyarray__delete(evsel->fd);
 	xyarray__delete(evsel->sample_id);
-	free(evsel->id);
+	xyarray__delete(evsel->id);
 }
 
 void perf_evsel__delete(struct perf_evsel *evsel)
@@ -845,7 +845,10 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
 	 */
 	BUG_ON(!leader->fd);
 
-	fd = FD(leader, cpu, thread);
+	if (thread == -1)
+		fd = *(int *)xyarray__entry(leader->fd, cpu, -1);
+	else
+		fd = FD(leader, cpu, thread);
 	BUG_ON(fd == -1);
 
 	return fd;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 5/8]Perf: add extend mechanism for evsel->id & evsel->fd
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
  2013-03-13  9:42 ` [PATCH v3 7/8]Perf: changed the method to traverse mmap list chenggang
  2013-03-13  9:42 ` [PATCH v3 6/8]Perf: Add extend mechanism for mmap & pollfd chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-13  9:42 ` [PATCH v3 4/8]perf: Transform evsel->id to xyarray chenggang
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Add extend mechanism for evsel->id & evsel->fd.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/util/evsel.c      |   76 ++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/evsel.h      |    8 +++++
 tools/perf/util/thread_map.c |    2 +-
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 015321f..2eb75f9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -599,6 +599,16 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 	return evsel->fd != NULL ? 0 : -ENOMEM;
 }
 
+/*
+ * Return the pointer to new fds (fds for the new thread at all cpus).
+ */
+static int** perf_evsel__extend_fd(struct perf_evsel *evsel)
+{
+	int init_fd = -1;
+
+	return (int**)xyarray__append(evsel->fd, (char *)&init_fd);
+}
+
 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			   const char *filter)
 {
@@ -617,6 +627,26 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 	return 0;
 }
 
+int perf_evsel__extend_id(struct perf_evsel *evsel)
+{
+	if (xyarray__append(evsel->sample_id, NULL) == NULL)
+		return -ENOMEM;
+
+	if (xyarray__append(evsel->id, NULL) == NULL) {
+		xyarray__remove(evsel->sample_id, -1);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void perf_evsel__remove_id(struct perf_evsel *evsel, int tidx)
+{
+	xyarray__remove(evsel->id, tidx);
+	evsel->ids--;
+	xyarray__remove(evsel->sample_id, tidx);
+}
+
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 {
 	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
@@ -937,6 +967,52 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
 	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
 }
 
+void perf_evsel__close_single_thread(struct perf_evsel *evsel, int cpu_nr,
+				     int tidx)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < cpu_nr; cpu++) {
+		if (FD(evsel, cpu, tidx) >= 0)
+			close(FD(evsel, cpu, tidx));
+	}
+	xyarray__remove(evsel->fd, tidx); 
+}
+
+int perf_evsel__open_single_thread(struct perf_evsel *evsel,
+                                   struct cpu_map *cpus, int tid)
+{
+	int cpu;
+	int pid = -1;
+	unsigned long flags = 0;
+	int **new_fds;
+
+	if ((new_fds = perf_evsel__extend_fd(evsel)) == NULL)
+		return -1;
+
+	if (evsel->cgrp) {
+		flags = PERF_FLAG_PID_CGROUP;
+		pid = evsel->cgrp->fd;
+	}
+
+	for (cpu = 0; cpu < cpus->nr; cpu++) {
+		int group_fd;
+
+		if (!evsel->cgrp)
+			pid = tid;
+
+		group_fd = get_group_fd(evsel, cpu, -1);
+		evsel->attr.disabled = 0;
+		*new_fds[cpu] = sys_perf_event_open(&evsel->attr, pid,
+						    cpus->map[cpu], group_fd,
+						    flags);
+		if (*new_fds[cpu] < 0)
+			return -errno;
+	}
+
+	return 0;
+}
+
 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
 				       const union perf_event *event,
 				       struct perf_sample *sample)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 7adb116..ae391d4 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -128,6 +128,9 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 void perf_evsel__id_new(struct perf_evsel *evsel, int nr);
 u64 *perf_evsel__get_id(struct perf_evsel *evsel, int idx);
 
+int perf_evsel__extend_id(struct perf_evsel *evsel);
+void perf_evsel__remove_id(struct perf_evsel *evsel, int tidx);
+
 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
 				  enum perf_event_sample_format bit);
 void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
@@ -152,6 +155,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 		     struct thread_map *threads);
 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
 
+int perf_evsel__open_single_thread(struct perf_evsel *evsel,
+				   struct cpu_map *cpus, int tid);
+void perf_evsel__close_single_thread(struct perf_evsel *evsel, int cpu_nr,
+				     int tidx);
+
 struct perf_sample;
 
 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 301f4ce..76f3da7 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -64,7 +64,7 @@ int thread_map__has_pid(struct thread_map *threads, pid_t pid)
 }
 
 /*
- * Append a thread_pid at the last of @threads.
+ * Append a thread_pid at the tail of @threads.
  */
 int thread_map__append(struct thread_map *threads, pid_t pid)
 {
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 4/8]perf: Transform evsel->id to xyarray
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
                   ` (2 preceding siblings ...)
  2013-03-13  9:42 ` [PATCH v3 5/8]Perf: add extend mechanism for evsel->id & evsel->fd chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-17 23:45   ` David Ahern
  2013-03-13  9:42 ` [PATCH v3 3/8]Perf: Transform evlist->mmap " chenggang
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Transform evsel->id to xyarray, so it is transformed to a linked list
instead an array.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/util/evlist.c |    4 +++-
 tools/perf/util/evsel.c  |   19 +++++++++++++++++--
 tools/perf/util/evsel.h  |    5 ++++-
 tools/perf/util/header.c |   28 ++++++++++++++++++----------
 tools/perf/util/header.h |    3 ++-
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7515651..c1cd8f9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -287,8 +287,10 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id)
 {
+	u64* idp = perf_evsel__get_id(evsel, -1);
 	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
-	evsel->id[evsel->ids++] = id;
+	*idp = id;
+	evsel->ids++;
 }
 
 static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 57c569d..015321f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -623,7 +623,7 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 	if (evsel->sample_id == NULL)
 		return -ENOMEM;
 
-	evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+	evsel->id = xyarray__new(1, ncpus * nthreads, sizeof(u64));
 	if (evsel->id == NULL) {
 		xyarray__delete(evsel->sample_id);
 		evsel->sample_id = NULL;
@@ -633,6 +633,21 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 	return 0;
 }
 
+void perf_evsel__id_new(struct perf_evsel *evsel, int nr)
+{
+	if (evsel->id)
+		xyarray__delete(evsel->id);
+
+	evsel->id = NULL;
+
+	evsel->id = xyarray__new(1, nr, sizeof(u64));
+}
+
+u64 *perf_evsel__get_id(struct perf_evsel *evsel, int idx)
+{
+	return (u64 *)xyarray__entry(evsel->id, 0, idx);
+}
+
 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
 {
 	evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -650,7 +665,7 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
 {
 	xyarray__delete(evsel->sample_id);
 	evsel->sample_id = NULL;
-	free(evsel->id);
+	xyarray__delete(evsel->id);
 	evsel->id = NULL;
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 52021c3..7adb116 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -51,7 +51,7 @@ struct perf_evsel {
 	char			*filter;
 	struct xyarray		*fd;
 	struct xyarray		*sample_id;
-	u64			*id;
+	struct xyarray		*id;
 	struct perf_counts	*counts;
 	struct perf_counts	*prev_raw_counts;
 	int			idx;
@@ -125,6 +125,9 @@ void perf_evsel__free_id(struct perf_evsel *evsel);
 void perf_evsel__free_counts(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
+void perf_evsel__id_new(struct perf_evsel *evsel, int nr);
+u64 *perf_evsel__get_id(struct perf_evsel *evsel, int idx);
+
 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
 				  enum perf_event_sample_format bit);
 void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f4bfd79..d344e61 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1325,19 +1325,18 @@ read_event_desc(struct perf_header *ph, int fd)
 		if (!nr)
 			continue;
 
-		id = calloc(nr, sizeof(*id));
-		if (!id)
-			goto error;
 		evsel->ids = nr;
-		evsel->id = id;
+		perf_evsel__id_new(evsel, nr);
+		if (!evsel->id)
+			goto error;
 
 		for (j = 0 ; j < nr; j++) {
+			id = perf_evsel__get_id(evsel, j);
 			ret = readn(fd, id, sizeof(*id));
 			if (ret != (ssize_t)sizeof(*id))
 				goto error;
 			if (ph->needs_swap)
 				*id = bswap_64(*id);
-			id++;
 		}
 	}
 out:
@@ -1384,7 +1383,8 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
 
 		if (evsel->ids) {
 			fprintf(fp, ", id = {");
-			for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
+			for (j = 0; j < evsel->ids; j++) {
+				id = perf_evsel__get_id(evsel, j);
 				if (j)
 					fputc(',', fp);
 				fprintf(fp, " %"PRIu64, *id);
@@ -2880,12 +2880,15 @@ out_delete_evlist:
 }
 
 int perf_event__synthesize_attr(struct perf_tool *tool,
-				struct perf_event_attr *attr, u32 ids, u64 *id,
+				struct perf_event_attr *attr,
+				struct perf_evsel *evsel,
 				perf_event__handler_t process)
 {
 	union perf_event *ev;
 	size_t size;
 	int err;
+	u32 ids = evsel->ids;
+	u32 i;
 
 	size = sizeof(struct perf_event_attr);
 	size = PERF_ALIGN(size, sizeof(u64));
@@ -2898,7 +2901,12 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
 		return -ENOMEM;
 
 	ev->attr.attr = *attr;
-	memcpy(ev->attr.id, id, ids * sizeof(u64));
+	for (i = 0; i < ids; i++) {
+		u64 *id;
+
+		id = perf_evsel__get_id(evsel, i);
+		ev->attr.id[i] = *id;
+        }
 
 	ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
 	ev->attr.header.size = (u16)size;
@@ -2921,8 +2929,8 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
 	int err = 0;
 
 	list_for_each_entry(evsel, &session->evlist->entries, node) {
-		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
-						  evsel->id, process);
+		err = perf_event__synthesize_attr(tool, &evsel->attr,
+						  evsel, process);
 		if (err) {
 			pr_debug("failed to create perf header attribute\n");
 			return err;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c9fc55c..60f85ca 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -126,7 +126,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
 int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
 
 int perf_event__synthesize_attr(struct perf_tool *tool,
-				struct perf_event_attr *attr, u32 ids, u64 *id,
+				struct perf_event_attr *attr,
+				struct perf_evsel *evsel,
 				perf_event__handler_t process);
 int perf_event__synthesize_attrs(struct perf_tool *tool,
 				 struct perf_session *session,
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/8]Perf: Transform evlist->mmap to xyarray
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
                   ` (3 preceding siblings ...)
  2013-03-13  9:42 ` [PATCH v3 4/8]perf: Transform evsel->id to xyarray chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-17 23:42   ` David Ahern
  2013-03-13  9:42 ` [PATCH v3 2/8]Perf: Transform xyarray to linked list chenggang
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

Transformed evlist->mmap to xyarray. Then the evlist->mmap is transformed
to a linked list too.

1) perf_evlist__mmap_thread()
   mmap a new fd for a new thread forked on-the-fly.
2) void perf_evlist__munmap_thread()
   munmap a fd for a exited thread on-the-fly.
3) perf_evlist__get_mmap()
   get a perf_mmap struct in the evlist->mmap list by its index.
4) for_each_mmap(md, evlist)
   traverse all perf_mmap structures in the evlist->mmap list.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/Makefile         |    3 ++-
 tools/perf/builtin-record.c |    8 +++----
 tools/perf/util/evlist.c    |   49 ++++++++++++++++++++++++++-----------------
 tools/perf/util/evlist.h    |    8 ++++++-
 4 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a2108ca..7f3f066 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -209,7 +209,8 @@ BASIC_CFLAGS = \
 	-Iutil \
 	-I. \
 	-I$(TRACE_EVENT_DIR) \
-	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+	-std=gnu99
 
 BASIC_LDFLAGS =
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 774c907..3bca0b2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -363,12 +363,12 @@ static struct perf_event_header finished_round_event = {
 
 static int perf_record__mmap_read_all(struct perf_record *rec)
 {
-	int i;
 	int rc = 0;
+	struct perf_mmap *pmmap = NULL;
 
-	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-		if (rec->evlist->mmap[i].base) {
-			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
+	for_each_mmap(pmmap, rec->evlist) {
+		if (pmmap->base) {
+			if (perf_record__mmap_read(rec, pmmap) != 0) {
 				rc = -1;
 				goto out;
 			}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d5063d6..7515651 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -336,7 +336,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
-	struct perf_mmap *md = &evlist->mmap[idx];
+	struct perf_mmap *md = perf_evlist__get_mmap(evlist, idx);
 	unsigned int head = perf_mmap__read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
@@ -401,16 +401,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 
 void perf_evlist__munmap(struct perf_evlist *evlist)
 {
-	int i;
+	struct perf_mmap *pmmap = NULL;
 
-	for (i = 0; i < evlist->nr_mmaps; i++) {
-		if (evlist->mmap[i].base != NULL) {
-			munmap(evlist->mmap[i].base, evlist->mmap_len);
-			evlist->mmap[i].base = NULL;
+	for_each_mmap(pmmap, evlist) {
+		if (pmmap->base != NULL) {
+			munmap(pmmap->base, evlist->mmap_len);
+			pmmap->base = NULL;
 		}
 	}
 
-	free(evlist->mmap);
+	xyarray__delete(evlist->mmap);
 	evlist->mmap = NULL;
 }
 
@@ -419,19 +419,21 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
 	if (cpu_map__all(evlist->cpus))
 		evlist->nr_mmaps = evlist->threads->nr;
-	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+	evlist->mmap = xyarray__new(1, evlist->nr_mmaps, sizeof(struct perf_mmap));
 	return evlist->mmap != NULL ? 0 : -ENOMEM;
 }
 
 static int __perf_evlist__mmap(struct perf_evlist *evlist,
 			       int idx, int prot, int mask, int fd)
 {
-	evlist->mmap[idx].prev = 0;
-	evlist->mmap[idx].mask = mask;
-	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
+	struct perf_mmap *pmmap = perf_evlist__get_mmap(evlist, idx);
+
+	pmmap->prev = 0;
+	pmmap->mask = mask;
+	pmmap->base = mmap(NULL, evlist->mmap_len, prot,
 				      MAP_SHARED, fd, 0);
-	if (evlist->mmap[idx].base == MAP_FAILED) {
-		evlist->mmap[idx].base = NULL;
+	if (pmmap->base == MAP_FAILED) {
+		pmmap->base = NULL;
 		return -1;
 	}
 
@@ -472,9 +474,11 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
 
 out_unmap:
 	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
-		if (evlist->mmap[cpu].base != NULL) {
-			munmap(evlist->mmap[cpu].base, evlist->mmap_len);
-			evlist->mmap[cpu].base = NULL;
+		struct perf_mmap *pmmap = perf_evlist__get_mmap(evlist, cpu);
+
+		if (pmmap->base != NULL) {
+			munmap(pmmap->base, evlist->mmap_len);
+			pmmap->base = NULL;
 		}
 	}
 	return -1;
@@ -511,9 +515,11 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
 
 out_unmap:
 	for (thread = 0; thread < evlist->threads->nr; thread++) {
-		if (evlist->mmap[thread].base != NULL) {
-			munmap(evlist->mmap[thread].base, evlist->mmap_len);
-			evlist->mmap[thread].base = NULL;
+		struct perf_mmap *pmmap = perf_evlist__get_mmap(evlist, thread);
+
+		if (pmmap->base != NULL) {
+			munmap(pmmap->base, evlist->mmap_len);
+			pmmap->base = NULL;
 		}
 	}
 	return -1;
@@ -572,6 +578,11 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
+struct perf_mmap *perf_evlist__get_mmap(struct perf_evlist *evlist, int idx) 
+{
+	return (struct perf_mmap *)xyarray__entry(evlist->mmap, 0, idx);
+}
+
 int perf_evlist__create_maps(struct perf_evlist *evlist,
 			     struct perf_target *target)
 {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 2dd07bd..eb22e49 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -7,6 +7,7 @@
 #include "event.h"
 #include "evsel.h"
 #include "util.h"
+#include "xyarray.h"
 #include <unistd.h>
 
 struct pollfd;
@@ -37,7 +38,7 @@ struct perf_evlist {
 		pid_t	pid;
 	} workload;
 	bool		 overwrite;
-	struct perf_mmap *mmap;
+	struct xyarray	 *mmap;
 	struct pollfd	 *pollfd;
 	struct thread_map *threads;
 	struct cpu_map	  *cpus;
@@ -131,6 +132,8 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 				   struct list_head *list,
 				   int nr_entries);
 
+struct perf_mmap *perf_evlist__get_mmap(struct perf_evlist *evlist, int idx);
+
 static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
 {
 	return list_entry(evlist->entries.next, struct perf_evsel, node);
@@ -163,4 +166,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md,
 	pc->data_tail = tail;
 }
 
+#define for_each_mmap(pmmap, evlist) \
+	xyarray_for_each_content(pmmap, &evlist->mmap->rows[0].head, struct perf_mmap)
+
 #endif /* __PERF_EVLIST_H */
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/8]Perf: Transform xyarray to linked list
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
                   ` (4 preceding siblings ...)
  2013-03-13  9:42 ` [PATCH v3 3/8]Perf: Transform evlist->mmap " chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-13  9:42 ` [PATCH v3 1/8]Perf: Transform thread_map " chenggang
  2013-03-13  9:42 ` [PATCH v3 0/8]Perf: Make the 'perf top -p $pid' can perceive the new forked threads chenggang
  7 siblings, 0 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

The 2-dimensional array cannot expand and shrink easily while we want to
perceive the thread's fork and exit events on-the-fly.
We transform xyarray to a 2-demesional linked list. The x dimension is cpus and
is still a array. The y dimension is threads of interest and is transformed to
linked list.
The interface to append and shrink a exist xyarray is provided.
1) xyarray__append()
   append a column for all rows.
2) xyarray__remove()
   remove a column for all rows.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/util/xyarray.c |  125 +++++++++++++++++++++++++++++++++++++++++++--
 tools/perf/util/xyarray.h |   68 ++++++++++++++++++++++--
 2 files changed, 185 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
index 22afbf6..ddb3bff 100644
--- a/tools/perf/util/xyarray.c
+++ b/tools/perf/util/xyarray.c
@@ -1,20 +1,135 @@
 #include "xyarray.h"
 #include "util.h"
 
+/*
+ * Add a column for all rows;
+ * @init_cont stores the initialize value for new entries.
+ * The return value is the array of new contents.
+ */
+char** xyarray__append(struct xyarray *xy, char *init_cont)
+{
+	struct xyentry *new_entry;
+	unsigned int x;
+	char **new_conts;
+
+	new_conts = zalloc(sizeof(char *) * xy->row_count);
+	if (new_conts == NULL)
+		return NULL;
+
+	for (x = 0; x < xy->row_count; x++) {
+		new_entry = zalloc(sizeof(*new_entry));
+		if (new_entry == NULL) {
+			free(new_conts);
+			return NULL;
+		}
+
+		new_entry->contents = zalloc(xy->entry_size);
+		if (new_entry->contents == NULL) {
+			free(new_entry);
+			free(new_conts);
+			return NULL;
+		}
+
+		if (init_cont)
+			memcpy(new_entry->contents, init_cont, xy->entry_size);
+
+		new_conts[x] = new_entry->contents;
+
+		list_add_tail(&new_entry->next, &xy->rows[x].head);
+	}
+
+	return new_conts;
+}
+
 struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
 {
-	size_t row_size = ylen * entry_size;
-	struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+	struct xyarray *xy = zalloc(sizeof(*xy) + xlen * sizeof(struct row));
+	int i;
+
+	if (xy == NULL)
+		return NULL;
+
+	xy->row_count = xlen;
+	xy->entry_size = entry_size;
 
-	if (xy != NULL) {
-		xy->entry_size = entry_size;
-		xy->row_size   = row_size;
+	for (i = 0; i < xlen; i++)
+		INIT_LIST_HEAD(&xy->rows[i].head);
+
+	for (i = 0; i< ylen; i++) {
+		if (xyarray__append(xy, NULL) == NULL) {
+			xyarray__delete(xy);
+			return NULL;
+		}
 	}
 
 	return xy;
 }
 
+static inline int xyarray__remove_last(struct xyarray *xy)
+{
+	struct xyentry *entry;
+	unsigned int x;
+
+	if (xy == NULL)
+		return -1; 
+
+	for (x = 0; x < xy->row_count; x++) {
+		if (!list_empty(&xy->rows[x].head)) {
+			entry = list_entry(xy->rows[x].head.prev,
+					   struct xyentry, next);
+			list_del(&entry->next);
+			free(entry);
+		}
+	}
+
+	return 0;
+}
+
+/*              
+ * remove a column for all rows;
+ */
+int xyarray__remove(struct xyarray *xy, int y)
+{
+	struct xyentry *entry, *tmp;
+	unsigned int x;
+	int count;
+
+	if (xy == NULL)
+		return -1;
+
+	if (y == -1)
+		return xyarray__remove_last(xy);
+
+	for (x = 0; x < xy->row_count; x++) {
+		count = 0;
+		list_for_each_entry_safe(entry, tmp, &xy->rows[x].head, next) {
+			if (count++ == y) {
+				list_del(&entry->next);
+				free(entry);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * delete @xy and all its nodes.
+ */
 void xyarray__delete(struct xyarray *xy)
 {
+	unsigned int i;
+	struct xyentry *entry, *tmp;
+
+	if (!xy)
+		return;
+
+	for (i = 0; i < xy->row_count; i++) {
+		list_for_each_entry_safe(entry, tmp, &xy->rows[i].head, next) {
+			list_del(&entry->next);
+			free(entry);
+		}
+	}
+
 	free(xy);
 }
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
index c488a07..b7a0819 100644
--- a/tools/perf/util/xyarray.h
+++ b/tools/perf/util/xyarray.h
@@ -2,19 +2,81 @@
 #define _PERF_XYARRAY_H_ 1
 
 #include <sys/types.h>
+#include <linux/list.h>
+
+struct row {
+	struct list_head head;
+};
+
+struct xyentry {
+	struct list_head next;
+	char *contents;
+};
 
 struct xyarray {
-	size_t row_size;
+	size_t row_count;
 	size_t entry_size;
-	char contents[];
+	struct row rows[];
 };
 
 struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
 void xyarray__delete(struct xyarray *xy);
+char** xyarray__append(struct xyarray *xy, char *init_cont);
+int xyarray__remove(struct xyarray *xy, int y);
+
+/*
+ * Avoid list walk while only the last entry is needed.
+ */
+static inline void *xyarray__last_entry(struct xyarray *xy, int x)
+{
+	struct xyentry *entry;
+
+	if (!list_empty(&xy->rows[x].head)) {
+		entry = list_entry(xy->rows[x].head.prev, struct xyentry, next);
+		return entry->contents;
+	}
+
+	return NULL;
+}
 
 static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
 {
-	return &xy->contents[x * xy->row_size + y * xy->entry_size];
+	struct xyentry *entry;
+	int columns = 0;
+
+	if (y == -1)
+		return xyarray__last_entry(xy, x);
+
+	list_for_each_entry(entry, &xy->rows[x].head, next) {
+		if (columns++ == y)
+			return entry->contents;
+	}
+
+	return NULL;
 }
 
+static inline int xyarray__columns(struct xyarray *xy)
+{
+	struct xyentry *entry;
+	int columns = 0;
+
+	list_for_each_entry(entry, &xy->rows[0].head, next)
+		columns ++;
+
+	return columns;
+}
+
+/*
+ * @p points to xyentry->contents
+ * @head points to xyarray->rows[].head
+ * @type is the type of *p
+ */
+#define xyarray_for_each_content(pe, head, type) \
+	for (char **p = &(((struct xyentry*)list_entry((head)->next, struct xyentry, next))->contents); \
+	     &(((struct xyentry *)(container_of(p, struct xyentry, contents)))->next) != (head) \
+	     && (pe = (type *)(*p)); \
+	     p = (char **)container_of(p, struct xyentry, contents), \
+	     p = &(((struct xyentry *)list_entry(((struct xyentry *)p)->next.next, \
+		 struct xyentry, next))->contents))
+
 #endif /* _PERF_XYARRAY_H_ */
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 1/8]Perf: Transform thread_map to linked list
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
                   ` (5 preceding siblings ...)
  2013-03-13  9:42 ` [PATCH v3 2/8]Perf: Transform xyarray to linked list chenggang
@ 2013-03-13  9:42 ` chenggang
  2013-03-17 23:37   ` David Ahern
  2013-03-13  9:42 ` [PATCH v3 0/8]Perf: Make the 'perf top -p $pid' can perceive the new forked threads chenggang
  7 siblings, 1 reply; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

The size of thread_map is fixed at initialized phase according to the
files in /proc/{$pid}. It cannot be expanded and shrinked while we want
to perceive the thread fork and exit events.
We transform the thread_map structure to a linked list, and implement some
interfaces to expend and shrink it. In order to improve compatibility with
the existing code, we can get a thread by its index in the thread_map also.
1) thread_map__append()
   Append a new thread into thread_map according to new thread's pid.
2) thread_map__remove()
   Remove a exist thread from thread_map according to the index of the
   thread in thread_map.
3) thread_map__init()
   Alloc a thread_map, and initialize it. But the thread_map is empty after
   we called this function. We should call thread_map__append() to insert
   threads.
4) thread_map__delete()
   Delete a exist thread_map.
5) thread_map__set_pid()
   Set the pid of a thread by its index in the thread_map.
6) thread_map__get_pid()
   Got a thread's pid by its index in the thread_map.
7) thread_map__get_idx_by_pid()
   Got a thread's index in the thread_map according to its pid.
   While we got a PERF_RECORD_EXIT event, we only know the pid of the thread.
8) thread_map__empty_thread_map()
   Return a empty thread_map, there is only a dumb thread in it.
   This function is used to instead of the global varible empty_thread_map.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

---
 tools/perf/builtin-stat.c                 |    2 +-
 tools/perf/tests/open-syscall-tp-fields.c |    2 +-
 tools/perf/util/event.c                   |   12 +-
 tools/perf/util/evlist.c                  |    2 +-
 tools/perf/util/evsel.c                   |   16 +-
 tools/perf/util/python.c                  |    2 +-
 tools/perf/util/thread_map.c              |  281 ++++++++++++++++++++++-------
 tools/perf/util/thread_map.h              |   17 +-
 8 files changed, 244 insertions(+), 90 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9984876..293b09c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -401,7 +401,7 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 		}
 
 		if (perf_target__none(&target))
-			evsel_list->threads->map[0] = child_pid;
+			thread_map__set_pid(evsel_list->threads, 0, child_pid);
 
 		/*
 		 * Wait for the child to be ready to exec.
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
index 1c52fdc..39eb770 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -43,7 +43,7 @@ int test__syscall_open_tp_fields(void)
 
 	perf_evsel__config(evsel, &opts);
 
-	evlist->threads->map[0] = getpid();
+	thread_map__append(evlist->threads, getpid());
 
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 5cd13d7..d093460 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -326,9 +326,11 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 
 	err = 0;
 	for (thread = 0; thread < threads->nr; ++thread) {
+		pid_t pid = thread_map__get_pid(threads, thread);
+
 		if (__event__synthesize_thread(comm_event, mmap_event,
-					       threads->map[thread], 0,
-					       process, tool, machine)) {
+					       pid, 0, process, tool, 
+					       machine)) {
 			err = -1;
 			break;
 		}
@@ -337,12 +339,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 		 * comm.pid is set to thread group id by
 		 * perf_event__synthesize_comm
 		 */
-		if ((int) comm_event->comm.pid != threads->map[thread]) {
+		if ((int) comm_event->comm.pid != pid) {
 			bool need_leader = true;
 
 			/* is thread group leader in thread_map? */
 			for (j = 0; j < threads->nr; ++j) {
-				if ((int) comm_event->comm.pid == threads->map[j]) {
+				pid_t pidj = thread_map__get_pid(threads, j);
+
+				if ((int) comm_event->comm.pid == pidj) {
 					need_leader = false;
 					break;
 				}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index bc4ad79..d5063d6 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -793,7 +793,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 	}
 
 	if (perf_target__none(&opts->target))
-		evlist->threads->map[0] = evlist->workload.pid;
+		thread_map__append(evlist->threads, evlist->workload.pid);
 
 	close(child_ready_pipe[1]);
 	close(go_pipe[0]);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9c82f98f..57c569d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -835,7 +835,7 @@ retry_sample_id:
 			int group_fd;
 
 			if (!evsel->cgrp)
-				pid = threads->map[thread];
+				pid = thread_map__get_pid(threads, thread);
 
 			group_fd = get_group_fd(evsel, cpu, thread);
 
@@ -894,14 +894,6 @@ static struct {
 	.cpus	= { -1, },
 };
 
-static struct {
-	struct thread_map map;
-	int threads[1];
-} empty_thread_map = {
-	.map.nr	 = 1,
-	.threads = { -1, },
-};
-
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 		     struct thread_map *threads)
 {
@@ -911,7 +903,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 	}
 
 	if (threads == NULL)
-		threads = &empty_thread_map.map;
+		threads = thread_map__empty_thread_map();
 
 	return __perf_evsel__open(evsel, cpus, threads);
 }
@@ -919,7 +911,9 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 			     struct cpu_map *cpus)
 {
-	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+	struct thread_map *empty_thread_map = thread_map__empty_thread_map();
+
+	return __perf_evsel__open(evsel, cpus, empty_thread_map);
 }
 
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 925e0c3..e3f3f1b 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -458,7 +458,7 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
 	if (i >= pthreads->threads->nr)
 		return NULL;
 
-	return Py_BuildValue("i", pthreads->threads->map[i]);
+	return Py_BuildValue("i", thread_map__get_pid(pthreads->threads, i));
 }
 
 static PySequenceMethods pyrf_thread_map__sequence_methods = {
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 9b5f856..301f4ce 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -19,9 +19,116 @@ static int filter(const struct dirent *dir)
 		return 1;
 }
 
-struct thread_map *thread_map__new_by_pid(pid_t pid)
+/*
+ * Initialize a thread_map, but the thread_map is empty after calling this
+ * function. thread_pid should be inserted into it by thread_map__append().
+ */
+struct thread_map *thread_map__init(void)
 {
 	struct thread_map *threads;
+
+	threads = malloc(sizeof(*threads));
+	if (threads == NULL)
+		return NULL;
+
+	threads->nr = 0;
+	INIT_LIST_HEAD(&threads->head);
+	return threads;
+}
+
+/*
+ * Delete @threads, include its all thread_pids.
+ */
+void thread_map__delete(struct thread_map *threads)
+{
+	struct thread_pid *tp, *tmp;
+
+	list_for_each_entry_safe(tp, tmp, &threads->head, next) {
+		list_del(&tp->next);
+		free(tp);
+	}
+
+	free(threads);
+}
+
+int thread_map__has_pid(struct thread_map *threads, pid_t pid)
+{
+        struct thread_pid *tp;
+
+        list_for_each_entry(tp, &threads->head, next) {
+                if (tp->pid == pid)
+                        return 1;
+        }
+
+        return 0;
+}
+
+/*
+ * Append a thread_pid at the last of @threads.
+ */
+int thread_map__append(struct thread_map *threads, pid_t pid)
+{
+	struct thread_pid *tp;
+
+	if (threads == NULL)
+                return -1;
+
+	if (thread_map__has_pid(threads, pid) != 0)
+		return 1;
+
+	tp = malloc(sizeof(*tp));
+	if (tp == NULL)
+		return -1;
+
+	tp->pid = pid;
+	list_add_tail(&tp->next, &threads->head);
+	threads->nr++;
+
+	return 0;
+}
+
+static inline int thread_map__remove_last(struct thread_map *threads)
+{       
+        struct thread_pid *tp;
+        
+        if (!list_empty(&threads->head)) {
+                tp = list_entry(threads->head.prev, struct thread_pid, next);
+                list_del(&tp->next);
+                free(tp);
+                threads->nr--;
+                return 0;
+        }
+        
+        return -1;
+}       
+
+/*
+ * remove a thread_pid from threads, the thread_pid is indexed by @idx.
+ * if @idx is -1, the last thread_pid is removed.
+ */
+int thread_map__remove(struct thread_map *threads, int idx)
+{
+	struct thread_pid *tp, *tmp;
+	int count = 0;
+
+	if (idx == -1)
+		return thread_map__remove_last(threads);
+
+	list_for_each_entry_safe(tp, tmp, &threads->head, next) {
+		if (count++ == idx) {
+			list_del(&tp->next);
+			free(tp);
+			threads->nr--;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+	struct thread_map *threads = NULL;
 	char name[256];
 	int items;
 	struct dirent **namelist = NULL;
@@ -32,11 +139,14 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
 	if (items <= 0)
 		return NULL;
 
-	threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+	threads = thread_map__init();
 	if (threads != NULL) {
-		for (i = 0; i < items; i++)
-			threads->map[i] = atoi(namelist[i]->d_name);
-		threads->nr = items;
+		for (i = 0; i < items; i++) {
+			pid_t pid_num = atoi(namelist[i]->d_name);
+
+			if (thread_map__append(threads, pid_num) == -1)
+				goto out_free_threads;
+		}
 	}
 
 	for (i=0; i<items; i++)
@@ -44,28 +154,38 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
 	free(namelist);
 
 	return threads;
+
+out_free_threads:
+	thread_map__delete(threads);
+	return NULL;
 }
 
 struct thread_map *thread_map__new_by_tid(pid_t tid)
 {
-	struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+	struct thread_map *threads = NULL;
 
+	threads = thread_map__init();
 	if (threads != NULL) {
-		threads->map[0] = tid;
-		threads->nr	= 1;
+		if (thread_map__append(threads, tid) == -1)
+			goto out_free_threads;
 	}
 
 	return threads;
+
+out_free_threads:
+	thread_map__delete(threads);
+	return NULL;
 }
 
 struct thread_map *thread_map__new_by_uid(uid_t uid)
 {
 	DIR *proc;
-	int max_threads = 32, items, i;
+	int items, i;
 	char path[256];
 	struct dirent dirent, *next, **namelist = NULL;
-	struct thread_map *threads = malloc(sizeof(*threads) +
-					    max_threads * sizeof(pid_t));
+	struct thread_map *threads = NULL;
+
+	threads = thread_map__init();
 	if (threads == NULL)
 		goto out;
 
@@ -73,11 +193,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
 	if (proc == NULL)
 		goto out_free_threads;
 
-	threads->nr = 0;
-
 	while (!readdir_r(proc, &dirent, &next) && next) {
 		char *end;
-		bool grow = false;
 		struct stat st;
 		pid_t pid = strtol(dirent.d_name, &end, 10);
 
@@ -97,30 +214,14 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
 		if (items <= 0)
 			goto out_free_closedir;
 
-		while (threads->nr + items >= max_threads) {
-			max_threads *= 2;
-			grow = true;
-		}
-
-		if (grow) {
-			struct thread_map *tmp;
-
-			tmp = realloc(threads, (sizeof(*threads) +
-						max_threads * sizeof(pid_t)));
-			if (tmp == NULL)
+		for (i = 0; i < items; i++) {
+			if (thread_map__append(threads, atoi(namelist[i]->d_name) < 0))
 				goto out_free_namelist;
-
-			threads = tmp;
 		}
 
 		for (i = 0; i < items; i++)
-			threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
-
-		for (i = 0; i < items; i++)
 			free(namelist[i]);
 		free(namelist);
-
-		threads->nr += items;
 	}
 
 out_closedir:
@@ -129,7 +230,7 @@ out:
 	return threads;
 
 out_free_threads:
-	free(threads);
+	thread_map__delete(threads);
 	return NULL;
 
 out_free_namelist:
@@ -138,7 +239,7 @@ out_free_namelist:
 	free(namelist);
 
 out_free_closedir:
-	free(threads);
+	thread_map__delete(threads);
 	threads = NULL;
 	goto out_closedir;
 }
@@ -156,11 +257,11 @@ struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
 
 static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
 {
-	struct thread_map *threads = NULL, *nt;
+	struct thread_map *threads = NULL;
 	char name[256];
-	int items, total_tasks = 0;
+	int items;
 	struct dirent **namelist = NULL;
-	int i, j = 0;
+	int i;
 	pid_t pid, prev_pid = INT_MAX;
 	char *end_ptr;
 	struct str_node *pos;
@@ -169,6 +270,10 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
 	if (!slist)
 		return NULL;
 
+	threads = thread_map__init();
+	if (threads == NULL)
+		return NULL;
+
 	strlist__for_each(pos, slist) {
 		pid = strtol(pos->s, &end_ptr, 10);
 
@@ -184,19 +289,13 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
 		if (items <= 0)
 			goto out_free_threads;
 
-		total_tasks += items;
-		nt = realloc(threads, (sizeof(*threads) +
-				       sizeof(pid_t) * total_tasks));
-		if (nt == NULL)
-			goto out_free_namelist;
-
-		threads = nt;
-
 		for (i = 0; i < items; i++) {
-			threads->map[j++] = atoi(namelist[i]->d_name);
-			free(namelist[i]);
+			if (thread_map__append(threads, atoi(namelist[i]->d_name)) < 0)
+				goto out_free_namelist;
 		}
-		threads->nr = total_tasks;
+
+		for (i = 0; i < items; i++)
+			free(namelist[i]);
 		free(namelist);
 	}
 
@@ -210,15 +309,14 @@ out_free_namelist:
 	free(namelist);
 
 out_free_threads:
-	free(threads);
+	thread_map__delete(threads);
 	threads = NULL;
 	goto out;
 }
 
 static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 {
-	struct thread_map *threads = NULL, *nt;
-	int ntasks = 0;
+	struct thread_map *threads = NULL;
 	pid_t tid, prev_tid = INT_MAX;
 	char *end_ptr;
 	struct str_node *pos;
@@ -226,14 +324,16 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 
 	/* perf-stat expects threads to be generated even if tid not given */
 	if (!tid_str) {
-		threads = malloc(sizeof(*threads) + sizeof(pid_t));
-		if (threads != NULL) {
-			threads->map[0] = -1;
-			threads->nr	= 1;
-		}
+		threads = thread_map__init();
+		if (threads != NULL)
+			thread_map__append(threads, -1);
 		return threads;
 	}
 
+	threads = thread_map__init();
+	if (!threads)
+		goto out;
+
 	slist = strlist__new(false, tid_str);
 	if (!slist)
 		return NULL;
@@ -248,21 +348,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 		if (tid == prev_tid)
 			continue;
 
-		ntasks++;
-		nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
-
-		if (nt == NULL)
+		if (thread_map__append(threads, tid) == -1)
 			goto out_free_threads;
-
-		threads = nt;
-		threads->map[ntasks - 1] = tid;
-		threads->nr		 = ntasks;
 	}
 out:
 	return threads;
 
 out_free_threads:
-	free(threads);
+	thread_map__delete(threads);
 	threads = NULL;
 	goto out;
 }
@@ -279,18 +372,66 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
 	return thread_map__new_by_tid_str(tid);
 }
 
-void thread_map__delete(struct thread_map *threads)
+int thread_map__get_idx_by_pid(struct thread_map *threads, pid_t pid)
 {
-	free(threads);
+	struct thread_pid *tp;
+	int count = 0;
+
+	list_for_each_entry(tp, &threads->head, next) {
+		if (tp->pid == pid)
+			return count;
+		count++;
+	}
+
+	return -1; 
+}
+
+struct thread_map *thread_map__empty_thread_map(void)
+{
+	struct thread_map *empty_thread_map = NULL;
+
+	empty_thread_map = thread_map__init();
+	if (empty_thread_map)
+		thread_map__append(empty_thread_map, -1);
+
+	return empty_thread_map;
+}
+
+int thread_map__set_pid(struct thread_map *threads, int idx, pid_t pid)
+{
+	struct thread_pid *tp;
+	int count = 0;
+
+	list_for_each_entry(tp, &threads->head, next) {
+		if (count++ == idx) {
+			tp->pid = pid;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+int thread_map__get_pid(struct thread_map *threads, int idx)
+{
+	struct thread_pid *tp;
+	int count = 0;                 
+
+	list_for_each_entry(tp, &threads->head, next) {
+		if (count++ == idx)
+			return tp->pid;
+	}
+
+	return -1;
 }
 
 size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
 {
-	int i;
+	int i = 0;
+	struct thread_pid *tp;
 	size_t printed = fprintf(fp, "%d thread%s: ",
 				 threads->nr, threads->nr > 1 ? "s" : "");
-	for (i = 0; i < threads->nr; ++i)
-		printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+	list_for_each_entry(tp, &threads->head, next)
+		printed += fprintf(fp, "%s%d", i++ ? ", " : "", tp->pid);
 
 	return printed + fprintf(fp, "\n");
 }
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index f718df8..f6691ad 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -3,20 +3,35 @@
 
 #include <sys/types.h>
 #include <stdio.h>
+#include <linux/list.h>
+
+struct thread_pid {
+        struct list_head next;
+        pid_t pid;
+};
 
 struct thread_map {
 	int nr;
-	pid_t map[];
+	struct list_head head;
 };
 
+struct thread_map *thread_map__init(void);
 struct thread_map *thread_map__new_by_pid(pid_t pid);
 struct thread_map *thread_map__new_by_tid(pid_t tid);
 struct thread_map *thread_map__new_by_uid(uid_t uid);
 struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__empty_thread_map(void);
 
 struct thread_map *thread_map__new_str(const char *pid,
 		const char *tid, uid_t uid);
 
+int thread_map__append(struct thread_map *threads, pid_t pid);
+int thread_map__remove(struct thread_map *threads, int idx);
+int thread_map__set_pid(struct thread_map *threads, int idx, pid_t pid);
+int thread_map__get_pid(struct thread_map *threads, int index);
+int thread_map__get_idx_by_pid(struct thread_map *threads, pid_t pid);
+int thread_map__has_pid(struct thread_map *threadsd, pid_t pid);
+
 void thread_map__delete(struct thread_map *threads);
 
 size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 0/8]Perf: Make the 'perf top -p $pid' can perceive the new forked threads.
  2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
                   ` (6 preceding siblings ...)
  2013-03-13  9:42 ` [PATCH v3 1/8]Perf: Transform thread_map " chenggang
@ 2013-03-13  9:42 ` chenggang
  7 siblings, 0 replies; 12+ messages in thread
From: chenggang @ 2013-03-13  9:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: chenggang, David Ahern, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

From: chenggang <chenggang.qcg@taobao.com>

This patch set base on the 3.8.rc7 kernel.

Here is the version 3, I optimized the performance and structure in this version.

This patch set add a function that make the 'perf top -p $pid' is able to perceive
the new threads that is forked by target processes. 'perf top{record} -p $pid' can
perceive the threads are forked before we execute perf, but it cannot perceive the
new threads are forked after we started perf. This is perf's important defect, because
the applications who will fork new threads on-the-fly are very much.
For performance reasons, the event inherit mechanism is forbidden while we use per-task
counters. Some internal data structures, such as, thread_map, evlist->mmap, evsel->fd,
evsel->id, evsel->sample_id are implemented as arrays at the initialization phase.
Their size is fixed, and they cannot be extended easily while we want to expend them
for new forked threads.

So, we have done the following work:
1) Transformed thread_map to linked list.
   Implemented the interfaces to extand and shrink a exist thread_map.
2) Transformed xyarray to linked list. Implementd the interfaces to extand and shrink
   a exist xyarray.
   The xyarray is a 2-dimensional structure.
   The x-dimension is cpus, and the x-dimension is a array still.
   The y-dimension is threads of interest, and the y-dimension are linked list.
3) Implemented evlist->mmap, evsel->fd, evsel->id and evsel->sample_id with the new xyarray.
   Implemented interfaces to expand and shrink these structures.
4) Added 2 callback functions to top->perf_tool, they are called while the PERF_RECORD_FORK
   & PERF_RECORD_EXIT events are got.
   While a PERF_RECORD_FORK event is got, all related data structures are expanded, a new
   fd and mmap are opened.
   While a PERF_RECORD_EXIT event is got, all nodes in the related data structures are
   removed.

The linked list is flexible, list_add & list_del can be used easily. Additional, performance
penalty (especially the CPU utilization) is low.

At the last of this coverletter, I attached a test program and its Makefile. After it is 
executed, we will get its pid. Then, use this command:
'perf top -p *pid*'
The perf top will perceive the functions that called by the threads forked on-the-fly.
We could use 'top' tool to monitor the overhead of 'perf'. The result shows the cpu overhead
of this patch set is less than 3%. I think this overhead can be accepted.

My test environment is as follows:
# ========
# captured on: Wed Mar 13 15:23:55 2013
# perf version : 3.8.rc7.ga39f52
# arch : x86_64
# nrcpus online : 2
# nrcpus avail : 2
# cpudesc : Intel(R) Core(TM)2 Duo CPU P8700 @ 2.53GHz
# cpuid : GenuineIntel,6,23,10
# total memory : 3034932 kB
#========

This function has been already implemented for 'perf top -p $pid' in the patch
[8/8] of this patch set. Next step, the 'perf record -p $pid' should be modified
with the same method.

Thanks for David Ahern's suggestion.

Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Yanmin Zhang <yanmin.zhang@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chenggang Qin <chenggang.qcg@taobao.com>

chenggang (8):
  changed thread_map to list
  changed xyarray to list
  hanged mmap to xyarray
  changed evsel->id to xyarray
  extend mechanism for evsel->id & evsel->fd
  add some operations for mmap
  changed the method to traverse mmap list
  fork & exit event perceived

 tools/perf/Makefile                       |    3 +-
 tools/perf/builtin-record.c               |    8 +-
 tools/perf/builtin-stat.c                 |    2 +-
 tools/perf/builtin-top.c                  |  116 ++++++++++++-
 tools/perf/tests/mmap-basic.c             |    4 +-
 tools/perf/tests/open-syscall-tp-fields.c |    9 +-
 tools/perf/tests/perf-record.c            |    7 +-
 tools/perf/util/event.c                   |   12 +-
 tools/perf/util/evlist.c                  |  206 +++++++++++++++++++---
 tools/perf/util/evlist.h                  |   14 +-
 tools/perf/util/evsel.c                   |  118 +++++++++++--
 tools/perf/util/evsel.h                   |   13 +-
 tools/perf/util/header.c                  |   28 +--
 tools/perf/util/header.h                  |    3 +-
 tools/perf/util/python.c                  |    6 +-
 tools/perf/util/thread_map.c              |  265 +++++++++++++++++++++--------
 tools/perf/util/thread_map.h              |   16 +-
 tools/perf/util/xyarray.c                 |  125 +++++++++++++-
 tools/perf/util/xyarray.h                 |   68 +++++++-
 19 files changed, 866 insertions(+), 157 deletions(-)

---
Here is a program to test the patch set.

---
#include <time.h>
#include <stdio.h>
#include <pthread.h>
#include <math.h>
#include <sys/types.h>
#include <linux/unistd.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <fcntl.h>

#define CHILDREN_NUM 15000
#define UINT_MAX        (~0U)

unsigned int new_rand(unsigned int min, unsigned int max)
{
	int fd;
	unsigned int n = 0;

	fd = open("/dev/urandom", O_RDONLY);

	if (fd > 0) {
		read(fd, &n, sizeof (n));
	}
	close(fd);

	return (unsigned int)((double)n / UINT_MAX * (max - min) + min);
}

pid_t gettid(void)
{
	return syscall(SYS_gettid);
}

static inline unsigned long long rdclock(void)
{
	struct timespec ts; 

	clock_gettime(CLOCK_MONOTONIC, &ts);
	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}

int do_pi(int p){
	double mypi,h,sum,x;
	long long  n,i;

	double cost_time;
	unsigned int exec_time;
	unsigned long long start, end;

	int ret;
	pthread_t	new_thread_id;

	printf("new thread[%d]: %d tid: %d ppid: %d\n", getpid(), p, gettid(), getppid());

	exec_time = new_rand(50, 10000000);
	start = rdclock();

	while(1) {
		n = 5000;
		h = 1.0/n;
		sum=0.0;

		for (i = 1; i <= n; i+=1 ) {
			x = h * ( i - 0.5 ) ;
			sum += 4.0 / ( 1.0 + pow(x,2) ) ;
		}

		mypi = h * sum;

		end = rdclock();

		cost_time = (double)(end-start) / 1e3;
		if (cost_time > (double) exec_time) //microsecond
			break;
	}

	return 0;
}

int main()
{
	int i=0, ret=0;
	int j;	

	pthread_t id[CHILDREN_NUM];
	pthread_t id2[CHILDREN_NUM];

	printf("pid: %d\n", getpid());

	sleep(8);

	for(j=0; j<CHILDREN_NUM; j++){
		ret = pthread_create(id+j, NULL, (void*)do_pi, j);
		if (ret){
			printf("Create pthread error!\n");
			return 1;
		}
		usleep(new_rand(500, 1000)); 
	}	

	for(j=0; j<CHILDREN_NUM; j++)
		pthread_join(id[j], NULL);

	return 0;
}

---
If the filename of the last program file is "thread", follow is the Makefile for it.

---
EXEC = thread

OBJS = thread.o

HEADERS =

CC = gcc

INC = -I. -I/usr/include

CFLAGS = ${INC} -L/usr/lib/x86_64-linux-gnu -lpthread -g -ldl -lrt

all:${EXEC}
${EXEC} : ${OBJS}
	${CC} -o $@ ${OBJS}  ${CFLAGS} ${LDFLAGS}

${OBJS} : ${HEADERS}

.PHONY : clean
clean :
	rm -f ${OBJS} ${EXEC}

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/8]Perf: Transform thread_map to linked list
  2013-03-13  9:42 ` [PATCH v3 1/8]Perf: Transform thread_map " chenggang
@ 2013-03-17 23:37   ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2013-03-17 23:37 UTC (permalink / raw)
  To: chenggang
  Cc: linux-kernel, chenggang, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

Hi:

On 3/13/13 3:42 AM, chenggang wrote:
> ---
>   tools/perf/builtin-stat.c                 |    2 +-
>   tools/perf/tests/open-syscall-tp-fields.c |    2 +-
>   tools/perf/util/event.c                   |   12 +-
>   tools/perf/util/evlist.c                  |    2 +-
>   tools/perf/util/evsel.c                   |   16 +-
>   tools/perf/util/python.c                  |    2 +-
>   tools/perf/util/thread_map.c              |  281 ++++++++++++++++++++++-------
>   tools/perf/util/thread_map.h              |   17 +-
>   8 files changed, 244 insertions(+), 90 deletions(-)

You need to target smaller changes per patch. Think small, bisectable 
changes that evolve the code to where you want it to be.

For example, start with a patch that introduces the API 
thread_map__set_pid_by_idx:

int thread_map__set_pid_by_idx(struct thread_map *map, int idx, pid_t pid)
{
	if (idx >= map->nr)
		return -EINVAL;

	map[idx] = pid;

	return 0;
}

and use that method for the perf-stat change, 
tests/open-syscall-tp-fields.c and util/evlist.c. That's patch 1 -- 
small and focused.

>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 9984876..293b09c 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -401,7 +401,7 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
>   		}
>
>   		if (perf_target__none(&target))
> -			evsel_list->threads->map[0] = child_pid;
> +			thread_map__set_pid(evsel_list->threads, 0, child_pid);
>
>   		/*
>   		 * Wait for the child to be ready to exec.
> diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
> index 1c52fdc..39eb770 100644
> --- a/tools/perf/tests/open-syscall-tp-fields.c
> +++ b/tools/perf/tests/open-syscall-tp-fields.c
> @@ -43,7 +43,7 @@ int test__syscall_open_tp_fields(void)
>
>   	perf_evsel__config(evsel, &opts);
>
> -	evlist->threads->map[0] = getpid();
> +	thread_map__append(evlist->threads, getpid());
>
>   	err = perf_evlist__open(evlist);
>   	if (err < 0) {


The second small patch introduces the method thread_map__get_pid_by_idx 
which is the counterpart to thread_map__set_pid_by_idx -- this time 
returning the pid for a given index. This new method fixes this use in 
util/event.c and the one in python.c below.

> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index 5cd13d7..d093460 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -326,9 +326,11 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
>
>   	err = 0;
>   	for (thread = 0; thread < threads->nr; ++thread) {
> +		pid_t pid = thread_map__get_pid(threads, thread);
> +
>   		if (__event__synthesize_thread(comm_event, mmap_event,
> -					       threads->map[thread], 0,
> -					       process, tool, machine)) {
> +					       pid, 0, process, tool,
> +					       machine)) {
>   			err = -1;
>   			break;
>   		}
> @@ -337,12 +339,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
>   		 * comm.pid is set to thread group id by
>   		 * perf_event__synthesize_comm
>   		 */
> -		if ((int) comm_event->comm.pid != threads->map[thread]) {
> +		if ((int) comm_event->comm.pid != pid) {
>   			bool need_leader = true;
>
>   			/* is thread group leader in thread_map? */
>   			for (j = 0; j < threads->nr; ++j) {
> -				if ((int) comm_event->comm.pid == threads->map[j]) {
> +				pid_t pidj = thread_map__get_pid(threads, j);
> +
> +				if ((int) comm_event->comm.pid == pidj) {
>   					need_leader = false;
>   					break;
>   				}
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index bc4ad79..d5063d6 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -793,7 +793,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
>   	}
>
>   	if (perf_target__none(&opts->target))
> -		evlist->threads->map[0] = evlist->workload.pid;
> +		thread_map__append(evlist->threads, evlist->workload.pid);

Here you can use thread_map__set_pid_by_idx. When you convert the 
xyarray implementation you can come back to this call and change it to 
thread_map__append or have set_pid_by_idx do the append internally if 
the idx == threads->nr

>
>   	close(child_ready_pipe[1]);
>   	close(go_pipe[0]);
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 9c82f98f..57c569d 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -835,7 +835,7 @@ retry_sample_id:
>   			int group_fd;
>
>   			if (!evsel->cgrp)
> -				pid = threads->map[thread];
> +				pid = thread_map__get_pid(threads, thread);
>
>   			group_fd = get_group_fd(evsel, cpu, thread);
>

This part here can be a separate stand-alone patch. In thread_map.c 
introduce the method:

struct thread_map *thread_map__empty_thread_map(void)
{
	static struct {
		struct thread_map map;
		int threads[1];
	} empty_thread_map = {
		.map.nr  = 1,
		.threads = { -1, },
	};

	return &empty_thread_map.map;
}

In a follow up patch you can change the implementation of this method 
but for now you want a small bisectable change.

> @@ -894,14 +894,6 @@ static struct {
>   	.cpus	= { -1, },
>   };
>
> -static struct {
> -	struct thread_map map;
> -	int threads[1];
> -} empty_thread_map = {
> -	.map.nr	 = 1,
> -	.threads = { -1, },
> -};
> -

keep the above code removal and fix the 2 below fixes.

>   int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>   		     struct thread_map *threads)
>   {
> @@ -911,7 +903,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>   	}
>
>   	if (threads == NULL)
> -		threads = &empty_thread_map.map;
> +		threads = thread_map__empty_thread_map();
>
>   	return __perf_evsel__open(evsel, cpus, threads);
>   }
> @@ -919,7 +911,9 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>   int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
>   			     struct cpu_map *cpus)
>   {
> -	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
> +	struct thread_map *empty_thread_map = thread_map__empty_thread_map();
> +
> +	return __perf_evsel__open(evsel, cpus, empty_thread_map);
>   }
>
>   int perf_evsel__open_per_thread(struct perf_evsel *evsel,
> diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
> index 925e0c3..e3f3f1b 100644
> --- a/tools/perf/util/python.c
> +++ b/tools/perf/util/python.c
> @@ -458,7 +458,7 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
>   	if (i >= pthreads->threads->nr)
>   		return NULL;
>
> -	return Py_BuildValue("i", pthreads->threads->map[i]);
> +	return Py_BuildValue("i", thread_map__get_pid(pthreads->threads, i));
>   }
>
>   static PySequenceMethods pyrf_thread_map__sequence_methods = {


Once existing uses of threads->map are consolidated you can create a 
patch to convert the thread_map code to xyarray and introduce new 
methods needed.

> diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
> index 9b5f856..301f4ce 100644
> --- a/tools/perf/util/thread_map.c
> +++ b/tools/perf/util/thread_map.c
> @@ -19,9 +19,116 @@ static int filter(const struct dirent *dir)
>   		return 1;
>   }
>

Does that make sense?

David


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 3/8]Perf: Transform evlist->mmap to xyarray
  2013-03-13  9:42 ` [PATCH v3 3/8]Perf: Transform evlist->mmap " chenggang
@ 2013-03-17 23:42   ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2013-03-17 23:42 UTC (permalink / raw)
  To: chenggang
  Cc: linux-kernel, chenggang, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

On 3/13/13 3:42 AM, chenggang wrote:
> diff --git a/tools/perf/Makefile b/tools/perf/Makefile
> index a2108ca..7f3f066 100644
> --- a/tools/perf/Makefile
> +++ b/tools/perf/Makefile
> @@ -209,7 +209,8 @@ BASIC_CFLAGS = \
>   	-Iutil \
>   	-I. \
>   	-I$(TRACE_EVENT_DIR) \
> -	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
> +	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
> +	-std=gnu99

Why is this needed? at best it should be a separate patch just changing 
the Makefile.


>
>   BASIC_LDFLAGS =
>

Take my comments from your patch 1 of this version and apply here as 
well. e.g., in smaller patches introduce APIs to consolidate existing 
references. Once done change the implementation.


David

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 4/8]perf: Transform evsel->id to xyarray
  2013-03-13  9:42 ` [PATCH v3 4/8]perf: Transform evsel->id to xyarray chenggang
@ 2013-03-17 23:45   ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2013-03-17 23:45 UTC (permalink / raw)
  To: chenggang
  Cc: linux-kernel, chenggang, Peter Zijlstra, Paul Mackerras,
	Ingo Molnar, Arnaldo Carvalho de Melo, Arjan van de Ven,
	Namhyung Kim, Yanmin Zhang, Wu Fengguang, Mike Galbraith,
	Andrew Morton

On 3/13/13 3:42 AM, chenggang wrote:

>   tools/perf/util/evlist.c |    4 +++-
>   tools/perf/util/evsel.c  |   19 +++++++++++++++++--
>   tools/perf/util/evsel.h  |    5 ++++-
>   tools/perf/util/header.c |   28 ++++++++++++++++++----------
>   tools/perf/util/header.h |    3 ++-
>   5 files changed, 44 insertions(+), 15 deletions(-)

Same comment here too -- small patches the move one reference at a time 
and then a final one that flips the implementation.

David


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-03-17 23:45 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-13  9:42 [PATCH v3 8/8]Perf: Add some callback functions to process fork & exit events chenggang
2013-03-13  9:42 ` [PATCH v3 7/8]Perf: changed the method to traverse mmap list chenggang
2013-03-13  9:42 ` [PATCH v3 6/8]Perf: Add extend mechanism for mmap & pollfd chenggang
2013-03-13  9:42 ` [PATCH v3 5/8]Perf: add extend mechanism for evsel->id & evsel->fd chenggang
2013-03-13  9:42 ` [PATCH v3 4/8]perf: Transform evsel->id to xyarray chenggang
2013-03-17 23:45   ` David Ahern
2013-03-13  9:42 ` [PATCH v3 3/8]Perf: Transform evlist->mmap " chenggang
2013-03-17 23:42   ` David Ahern
2013-03-13  9:42 ` [PATCH v3 2/8]Perf: Transform xyarray to linked list chenggang
2013-03-13  9:42 ` [PATCH v3 1/8]Perf: Transform thread_map " chenggang
2013-03-17 23:37   ` David Ahern
2013-03-13  9:42 ` [PATCH v3 0/8]Perf: Make the 'perf top -p $pid' can perceive the new forked threads chenggang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).