linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GIT PULL] perf fixes
@ 2011-01-24 13:34 Ingo Molnar
  2011-01-24 19:48 ` Linus Torvalds
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2011-01-24 13:34 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

Linus,

Please pull the latest perf-fixes-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf-fixes-for-linus

The diffstat is larger than usual - in good part due to the PRIx64 format string 
fixes to tools/perf/.

 Thanks,

	Ingo

------------------>
Anton Blanchard (1):
      powerpc, perf: Fix frequency calculation for overflowing counters (FSL version)

Arnaldo Carvalho de Melo (5):
      perf test: Fix build on older glibcs
      perf tools: Fix 64 bit integer format strings
      perf tools: Add missing header, fixes build
      perf tools: Fix build when using gcc 3.4.6
      perf tools: Fix build by checking if extra warnings are supported

Dr. David Alan Gilbert (1):
      perf symbols: Fix annotation of thumb code

Han Pingtian (1):
      perf test: Use cpu_map->[cpu] when setting affinity

Oleg Nesterov (3):
      perf: Fix find_get_context() vs perf_event_exit_task() race
      perf: Fix perf_event_init_task()/perf_event_free_task() interaction
      perf: perf_event_exit_task_context: s/rcu_dereference/rcu_dereference_raw/

Peter Zijlstra (1):
      perf: Annotate cpuctx->ctx.mutex to avoid a lockdep splat

Thomas Renninger (1):
      perf tools: Fix time function double declaration with glibc


 arch/powerpc/kernel/perf_event_fsl_emb.c |    1 +
 kernel/perf_event.c                      |   46 +++++++++++++++----------
 tools/perf/Makefile                      |    9 ++++-
 tools/perf/builtin-annotate.c            |    6 ++--
 tools/perf/builtin-kmem.c                |    4 +-
 tools/perf/builtin-lock.c                |    6 ++--
 tools/perf/builtin-record.c              |    2 +-
 tools/perf/builtin-report.c              |    2 +-
 tools/perf/builtin-sched.c               |   20 +++++-----
 tools/perf/builtin-script.c              |    6 ++--
 tools/perf/builtin-stat.c                |    4 +-
 tools/perf/builtin-test.c                |   54 +++++++++++++++++------------
 tools/perf/builtin-top.c                 |    9 +++--
 tools/perf/util/event.c                  |    5 ++-
 tools/perf/util/header.c                 |    4 +-
 tools/perf/util/hist.c                   |   17 +++++----
 tools/perf/util/include/linux/bitops.h   |    1 +
 tools/perf/util/map.c                    |    3 +-
 tools/perf/util/parse-events.c           |    2 +-
 tools/perf/util/parse-events.h           |    2 +-
 tools/perf/util/probe-event.c            |    2 +-
 tools/perf/util/session.c                |   28 ++++++++-------
 tools/perf/util/svghelper.c              |    9 +++--
 tools/perf/util/symbol.c                 |   16 +++++++--
 tools/perf/util/types.h                  |   10 +++--
 tools/perf/util/ui/browsers/hists.c      |    2 +-
 tools/perf/util/ui/browsers/map.c        |    5 ++-
 tools/perf/util/values.c                 |   10 +++---
 28 files changed, 164 insertions(+), 121 deletions(-)

diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 4dcf5f8..b0dc8f7 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -596,6 +596,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			if (left <= 0)
 				left = period;
 			record = 1;
+			event->hw.last_period = event->hw.sample_period;
 		}
 		if (left < 0x80000000LL)
 			val = 0x80000000LL - left;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 84522c7..126a302 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid)
 	if (!task)
 		return ERR_PTR(-ESRCH);
 
-	/*
-	 * Can't attach events to a dying task.
-	 */
-	err = -ESRCH;
-	if (task->flags & PF_EXITING)
-		goto errout;
-
 	/* Reuse ptrace permission checks for now. */
 	err = -EACCES;
 	if (!ptrace_may_access(task, PTRACE_MODE_READ))
@@ -2268,14 +2261,27 @@ retry:
 
 		get_ctx(ctx);
 
-		if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
-			/*
-			 * We raced with some other task; use
-			 * the context they set.
-			 */
+		err = 0;
+		mutex_lock(&task->perf_event_mutex);
+		/*
+		 * If it has already passed perf_event_exit_task().
+		 * we must see PF_EXITING, it takes this mutex too.
+		 */
+		if (task->flags & PF_EXITING)
+			err = -ESRCH;
+		else if (task->perf_event_ctxp[ctxn])
+			err = -EAGAIN;
+		else
+			rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
+		mutex_unlock(&task->perf_event_mutex);
+
+		if (unlikely(err)) {
 			put_task_struct(task);
 			kfree(ctx);
-			goto retry;
+
+			if (err == -EAGAIN)
+				goto retry;
+			goto errout;
 		}
 	}
 
@@ -5374,6 +5380,8 @@ free_dev:
 	goto out;
 }
 
+static struct lock_class_key cpuctx_mutex;
+
 int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
 	int cpu, ret;
@@ -5422,6 +5430,7 @@ skip_type:
 
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx);
+		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
 		cpuctx->ctx.type = cpu_context;
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->jiffies_interval = 1;
@@ -6127,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * scheduled, so we are now safe from rescheduling changing
 	 * our context.
 	 */
-	child_ctx = child->perf_event_ctxp[ctxn];
+	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
 	task_ctx_sched_out(child_ctx, EVENT_ALL);
 
 	/*
@@ -6440,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
 	unsigned long flags;
 	int ret = 0;
 
-	child->perf_event_ctxp[ctxn] = NULL;
-
-	mutex_init(&child->perf_event_mutex);
-	INIT_LIST_HEAD(&child->perf_event_list);
-
 	if (likely(!parent->perf_event_ctxp[ctxn]))
 		return 0;
 
@@ -6533,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child)
 {
 	int ctxn, ret;
 
+	memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
+	mutex_init(&child->perf_event_mutex);
+	INIT_LIST_HEAD(&child->perf_event_list);
+
 	for_each_task_context_nr(ctxn) {
 		ret = perf_event_init_context(child, ctxn);
 		if (ret)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2b5387d..7141c42 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -204,13 +204,11 @@ EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wshadow
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Winit-self
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wpacked
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wredundant-decls
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstack-protector
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-aliasing=3
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-default
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-enum
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wno-system-headers
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wundef
-EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wvolatile-register-var
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wwrite-strings
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wbad-function-cast
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-declarations
@@ -294,6 +292,13 @@ ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
 	CFLAGS := $(CFLAGS) -fstack-protector-all
 endif
 
+ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wstack-protector),y)
+       CFLAGS := $(CFLAGS) -Wstack-protector
+endif
+
+ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wvolatile-register-var),y)
+       CFLAGS := $(CFLAGS) -Wvolatile-register-var
+endif
 
 ### --- END CONFIGURATION SECTION ---
 
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index c056cdc..8879463 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -212,7 +212,7 @@ get_source_line(struct hist_entry *he, int len, const char *filename)
 			continue;
 
 		offset = start + i;
-		sprintf(cmd, "addr2line -e %s %016llx", filename, offset);
+		sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
 		fp = popen(cmd, "r");
 		if (!fp)
 			continue;
@@ -270,9 +270,9 @@ static void hist_entry__print_hits(struct hist_entry *self)
 
 	for (offset = 0; offset < len; ++offset)
 		if (h->ip[offset] != 0)
-			printf("%*Lx: %Lu\n", BITS_PER_LONG / 2,
+			printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
 			       sym->start + offset, h->ip[offset]);
-	printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum);
+	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
 }
 
 static int hist_entry__tty_annotate(struct hist_entry *he)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index def7ddc..d97256d 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -371,10 +371,10 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 			addr = data->ptr;
 
 		if (sym != NULL)
-			snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
+			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
 				 addr - map->unmap_ip(map, sym->start));
 		else
-			snprintf(buf, sizeof(buf), "%#Lx", addr);
+			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
 		printf(" %-34s |", buf);
 
 		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index b9c6e54..2b36def 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -782,9 +782,9 @@ static void print_result(void)
 		pr_info("%10u ", st->nr_acquired);
 		pr_info("%10u ", st->nr_contended);
 
-		pr_info("%15llu ", st->wait_time_total);
-		pr_info("%15llu ", st->wait_time_max);
-		pr_info("%15llu ", st->wait_time_min == ULLONG_MAX ?
+		pr_info("%15" PRIu64 " ", st->wait_time_total);
+		pr_info("%15" PRIu64 " ", st->wait_time_max);
+		pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
 		       0 : st->wait_time_min);
 		pr_info("\n");
 	}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index fcd29e8..b2f729f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -817,7 +817,7 @@ static int __cmd_record(int argc, const char **argv)
 	 * Approximate RIP event size: 24 bytes.
 	 */
 	fprintf(stderr,
-		"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
+		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
 		(double)bytes_written / 1024.0 / 1024.0,
 		output_name,
 		bytes_written / 24);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 75183a4..c27e31f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -197,7 +197,7 @@ static int process_read_event(event_t *event, struct sample_data *sample __used,
 					   event->read.value);
 	}
 
-	dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
+	dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
 		    attr ? __event_name(attr->type, attr->config) : "FAIL",
 		    event->read.value);
 
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 29e7ffd..29acb89 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -193,7 +193,7 @@ static void calibrate_run_measurement_overhead(void)
 	}
 	run_measurement_overhead = min_delta;
 
-	printf("run measurement overhead: %Ld nsecs\n", min_delta);
+	printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
 }
 
 static void calibrate_sleep_measurement_overhead(void)
@@ -211,7 +211,7 @@ static void calibrate_sleep_measurement_overhead(void)
 	min_delta -= 10000;
 	sleep_measurement_overhead = min_delta;
 
-	printf("sleep measurement overhead: %Ld nsecs\n", min_delta);
+	printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
 }
 
 static struct sched_atom *
@@ -617,13 +617,13 @@ static void test_calibrations(void)
 	burn_nsecs(1e6);
 	T1 = get_nsecs();
 
-	printf("the run test took %Ld nsecs\n", T1-T0);
+	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
 
 	T0 = get_nsecs();
 	sleep_nsecs(1e6);
 	T1 = get_nsecs();
 
-	printf("the sleep test took %Ld nsecs\n", T1-T0);
+	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
 }
 
 #define FILL_FIELD(ptr, field, event, data)	\
@@ -816,10 +816,10 @@ replay_switch_event(struct trace_switch_event *switch_event,
 		delta = 0;
 
 	if (delta < 0)
-		die("hm, delta: %Ld < 0 ?\n", delta);
+		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
 
 	if (verbose) {
-		printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
+		printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
 			switch_event->prev_comm, switch_event->prev_pid,
 			switch_event->next_comm, switch_event->next_pid,
 			delta);
@@ -1048,7 +1048,7 @@ latency_switch_event(struct trace_switch_event *switch_event,
 		delta = 0;
 
 	if (delta < 0)
-		die("hm, delta: %Ld < 0 ?\n", delta);
+		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
 
 
 	sched_out = perf_session__findnew(session, switch_event->prev_pid);
@@ -1221,7 +1221,7 @@ static void output_lat_thread(struct work_atoms *work_list)
 
 	avg = work_list->total_lat / work_list->nb_atoms;
 
-	printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
+	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
 	      (double)work_list->total_runtime / 1e6,
 		 work_list->nb_atoms, (double)avg / 1e6,
 		 (double)work_list->max_lat / 1e6,
@@ -1423,7 +1423,7 @@ map_switch_event(struct trace_switch_event *switch_event,
 		delta = 0;
 
 	if (delta < 0)
-		die("hm, delta: %Ld < 0 ?\n", delta);
+		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
 
 
 	sched_out = perf_session__findnew(session, switch_event->prev_pid);
@@ -1713,7 +1713,7 @@ static void __cmd_lat(void)
 	}
 
 	printf(" -----------------------------------------------------------------------------------------\n");
-	printf("  TOTAL:                |%11.3f ms |%9Ld |\n",
+	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
 		(double)all_runtime/1e6, all_count);
 
 	printf(" ---------------------------------------------------\n");
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 150a606..b766c2a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -77,8 +77,8 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
 	if (session->sample_type & PERF_SAMPLE_RAW) {
 		if (debug_mode) {
 			if (sample->time < last_timestamp) {
-				pr_err("Samples misordered, previous: %llu "
-					"this: %llu\n", last_timestamp,
+				pr_err("Samples misordered, previous: %" PRIu64
+					" this: %" PRIu64 "\n", last_timestamp,
 					sample->time);
 				nr_unordered++;
 			}
@@ -126,7 +126,7 @@ static int __cmd_script(struct perf_session *session)
 	ret = perf_session__process_events(session, &event_ops);
 
 	if (debug_mode)
-		pr_err("Misordered timestamps: %llu\n", nr_unordered);
+		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
 
 	return ret;
 }
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0ff11d9..a482a19 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -206,8 +206,8 @@ static int read_counter_aggr(struct perf_evsel *counter)
 		update_stats(&ps->res_stats[i], count[i]);
 
 	if (verbose) {
-		fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
-				count[0], count[1], count[2]);
+		fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+			event_name(counter), count[0], count[1], count[2]);
 	}
 
 	/*
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index ed56961..5dcdba6 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -146,7 +146,7 @@ next_pair:
 				if (llabs(skew) < page_size)
 					continue;
 
-				pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
+				pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
 					 sym->start, sym->name, sym->end, pair->end);
 			} else {
 				struct rb_node *nnd;
@@ -168,11 +168,11 @@ detour:
 					goto detour;
 				}
 
-				pr_debug("%#Lx: diff name v: %s k: %s\n",
+				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
 					 sym->start, sym->name, pair->name);
 			}
 		} else
-			pr_debug("%#Lx: %s not on kallsyms\n", sym->start, sym->name);
+			pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name);
 
 		err = -1;
 	}
@@ -211,10 +211,10 @@ detour:
 
 		if (pair->start == pos->start) {
 			pair->priv = 1;
-			pr_info(" %Lx-%Lx %Lx %s in kallsyms as",
+			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				pos->start, pos->end, pos->pgoff, pos->dso->name);
 			if (pos->pgoff != pair->pgoff || pos->end != pair->end)
-				pr_info(": \n*%Lx-%Lx %Lx",
+				pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "",
 					pair->start, pair->end, pair->pgoff);
 			pr_info(" %s\n", pair->dso->name);
 			pair->priv = 1;
@@ -307,7 +307,7 @@ static int test__open_syscall_event(void)
 	}
 
 	if (evsel->counts->cpu[0].val != nr_open_calls) {
-		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %Ld\n",
+		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
 			 nr_open_calls, evsel->counts->cpu[0].val);
 		goto out_close_fd;
 	}
@@ -332,8 +332,7 @@ static int test__open_syscall_event_on_all_cpus(void)
 	struct perf_evsel *evsel;
 	struct perf_event_attr attr;
 	unsigned int nr_open_calls = 111, i;
-	cpu_set_t *cpu_set;
-	size_t cpu_set_size;
+	cpu_set_t cpu_set;
 	int id = trace_event__id("sys_enter_open");
 
 	if (id < 0) {
@@ -353,13 +352,8 @@ static int test__open_syscall_event_on_all_cpus(void)
 		return -1;
 	}
 
-	cpu_set = CPU_ALLOC(cpus->nr);
 
-	if (cpu_set == NULL)
-		goto out_thread_map_delete;
-
-	cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
-	CPU_ZERO_S(cpu_set_size, cpu_set);
+	CPU_ZERO(&cpu_set);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.type = PERF_TYPE_TRACEPOINT;
@@ -367,7 +361,7 @@ static int test__open_syscall_event_on_all_cpus(void)
 	evsel = perf_evsel__new(&attr, 0);
 	if (evsel == NULL) {
 		pr_debug("perf_evsel__new\n");
-		goto out_cpu_free;
+		goto out_thread_map_delete;
 	}
 
 	if (perf_evsel__open(evsel, cpus, threads) < 0) {
@@ -379,14 +373,29 @@ static int test__open_syscall_event_on_all_cpus(void)
 
 	for (cpu = 0; cpu < cpus->nr; ++cpu) {
 		unsigned int ncalls = nr_open_calls + cpu;
+		/*
+		 * XXX eventually lift this restriction in a way that
+		 * keeps perf building on older glibc installations
+		 * without CPU_ALLOC. 1024 cpus in 2010 still seems
+		 * a reasonable upper limit tho :-)
+		 */
+		if (cpus->map[cpu] >= CPU_SETSIZE) {
+			pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+			continue;
+		}
 
-		CPU_SET(cpu, cpu_set);
-		sched_setaffinity(0, cpu_set_size, cpu_set);
+		CPU_SET(cpus->map[cpu], &cpu_set);
+		if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+			pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+				 cpus->map[cpu],
+				 strerror(errno));
+			goto out_close_fd;
+		}
 		for (i = 0; i < ncalls; ++i) {
 			fd = open("/etc/passwd", O_RDONLY);
 			close(fd);
 		}
-		CPU_CLR(cpu, cpu_set);
+		CPU_CLR(cpus->map[cpu], &cpu_set);
 	}
 
 	/*
@@ -402,6 +411,9 @@ static int test__open_syscall_event_on_all_cpus(void)
 	for (cpu = 0; cpu < cpus->nr; ++cpu) {
 		unsigned int expected;
 
+		if (cpus->map[cpu] >= CPU_SETSIZE)
+			continue;
+
 		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
 			pr_debug("perf_evsel__open_read_on_cpu\n");
 			goto out_close_fd;
@@ -409,8 +421,8 @@ static int test__open_syscall_event_on_all_cpus(void)
 
 		expected = nr_open_calls + cpu;
 		if (evsel->counts->cpu[cpu].val != expected) {
-			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
-				 expected, cpu, evsel->counts->cpu[cpu].val);
+			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+				 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
 			goto out_close_fd;
 		}
 	}
@@ -420,8 +432,6 @@ out_close_fd:
 	perf_evsel__close_fd(evsel, 1, threads->nr);
 out_evsel_delete:
 	perf_evsel__delete(evsel);
-out_cpu_free:
-	CPU_FREE(cpu_set);
 out_thread_map_delete:
 	thread_map__delete(threads);
 	return err;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 05344c6..b6998e0 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
 #include <stdio.h>
 #include <termios.h>
 #include <unistd.h>
+#include <inttypes.h>
 
 #include <errno.h>
 #include <time.h>
@@ -214,7 +215,7 @@ static int parse_source(struct sym_entry *syme)
 	len = sym->end - sym->start;
 
 	sprintf(command,
-		"objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
+		"objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
 		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
 		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
 
@@ -308,7 +309,7 @@ static void lookup_sym_source(struct sym_entry *syme)
 	struct source_line *line;
 	char pattern[PATTERN_LEN + 1];
 
-	sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
+	sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
 		map__rip_2objdump(syme->map, symbol->start));
 
 	pthread_mutex_lock(&syme->src->lock);
@@ -537,7 +538,7 @@ static void print_sym_table(void)
 	if (nr_counters == 1 || !display_weighted) {
 		struct perf_evsel *first;
 		first = list_entry(evsel_list.next, struct perf_evsel, node);
-		printf("%Ld", first->attr.sample_period);
+		printf("%" PRIu64, (uint64_t)first->attr.sample_period);
 		if (freq)
 			printf("Hz ");
 		else
@@ -640,7 +641,7 @@ static void print_sym_table(void)
 
 		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
 		if (verbose)
-			printf(" %016llx", sym->start);
+			printf(" %016" PRIx64, sym->start);
 		printf(" %-*.*s", sym_width, sym_width, sym->name);
 		printf(" %-*.*s\n", dso_width, dso_width,
 		       dso_width >= syme->map->dso->long_name_len ?
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 2302ec0..1478ab4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -459,7 +459,8 @@ int event__process_comm(event_t *self, struct sample_data *sample __used,
 int event__process_lost(event_t *self, struct sample_data *sample __used,
 			struct perf_session *session)
 {
-	dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
+	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
+		    self->lost.id, self->lost.lost);
 	session->hists.stats.total_lost += self->lost.lost;
 	return 0;
 }
@@ -575,7 +576,7 @@ int event__process_mmap(event_t *self, struct sample_data *sample __used,
 	u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	int ret = 0;
 
-	dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
+	dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
 			self->mmap.pid, self->mmap.tid, self->mmap.start,
 			self->mmap.len, self->mmap.pgoff, self->mmap.filename);
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 989fa2d..f6a929e7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -798,8 +798,8 @@ static int perf_file_section__process(struct perf_file_section *self,
 				      int feat, int fd)
 {
 	if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) {
-		pr_debug("Failed to lseek to %Ld offset for feature %d, "
-			 "continuing...\n", self->offset, feat);
+		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+			  "%d, continuing...\n", self->offset, feat);
 		return 0;
 	}
 
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c749ba6..32f4f1f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -636,13 +636,13 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
 			}
 		}
 	} else
-		ret = snprintf(s, size, sep ? "%lld" : "%12lld ", period);
+		ret = snprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period);
 
 	if (symbol_conf.show_nr_samples) {
 		if (sep)
-			ret += snprintf(s + ret, size - ret, "%c%lld", *sep, period);
+			ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period);
 		else
-			ret += snprintf(s + ret, size - ret, "%11lld", period);
+			ret += snprintf(s + ret, size - ret, "%11" PRIu64, period);
 	}
 
 	if (pair_hists) {
@@ -971,7 +971,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
 	sym_size = sym->end - sym->start;
 	offset = ip - sym->start;
 
-	pr_debug3("%s: ip=%#Lx\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
+	pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
 
 	if (offset >= sym_size)
 		return 0;
@@ -980,8 +980,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
 	h->sum++;
 	h->ip[offset]++;
 
-	pr_debug3("%#Lx %s: period++ [ip: %#Lx, %#Lx] => %Ld\n", self->ms.sym->start,
-		  self->ms.sym->name, ip, ip - self->ms.sym->start, h->ip[offset]);
+	pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64
+		  "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name,
+		  ip, ip - self->ms.sym->start, h->ip[offset]);
 	return 0;
 }
 
@@ -1132,7 +1133,7 @@ fallback:
 		goto out_free_filename;
 	}
 
-	pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__,
+	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
 		 filename, sym->name, map->unmap_ip(map, sym->start),
 		 map->unmap_ip(map, sym->end));
 
@@ -1142,7 +1143,7 @@ fallback:
 		 dso, dso->long_name, sym, sym->name);
 
 	snprintf(command, sizeof(command),
-		 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
+		 "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
 		 symfs_filename, filename);
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index 8be0b96..305c848 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -2,6 +2,7 @@
 #define _PERF_LINUX_BITOPS_H_
 
 #include <linux/kernel.h>
+#include <linux/compiler.h>
 #include <asm/hweight.h>
 
 #define BITS_PER_LONG __WORDSIZE
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 3a7eb6e..a16ecab 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,5 +1,6 @@
 #include "symbol.h"
 #include <errno.h>
+#include <inttypes.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
@@ -195,7 +196,7 @@ int map__overlap(struct map *l, struct map *r)
 
 size_t map__fprintf(struct map *self, FILE *fp)
 {
-	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bc2732e..135f69b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -279,7 +279,7 @@ const char *__event_name(int type, u64 config)
 	static char buf[32];
 
 	if (type == PERF_TYPE_RAW) {
-		sprintf(buf, "raw 0x%llx", config);
+		sprintf(buf, "raw 0x%" PRIx64, config);
 		return buf;
 	}
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index b82cafb..458e3ec 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -23,7 +23,7 @@ struct tracepoint_path {
 };
 
 extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern bool have_tracepoints(struct list_head *evsel_list);
+extern bool have_tracepoints(struct list_head *evlist);
 
 extern int			nr_counters;
 
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 128aaab..6e29d9c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -172,7 +172,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
 	sym = __find_kernel_function_by_name(tp->symbol, &map);
 	if (sym) {
 		addr = map->unmap_ip(map, sym->start + tp->offset);
-		pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
+		pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol,
 			 tp->offset, addr);
 		ret = find_perf_probe_point((unsigned long)addr, pp);
 	}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 313dac2..105f00b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -652,10 +652,11 @@ static void callchain__printf(struct sample_data *sample)
 {
 	unsigned int i;
 
-	printf("... chain: nr:%Lu\n", sample->callchain->nr);
+	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
 
 	for (i = 0; i < sample->callchain->nr; i++)
-		printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
+		printf("..... %2d: %016" PRIx64 "\n",
+		       i, sample->callchain->ips[i]);
 }
 
 static void perf_session__print_tstamp(struct perf_session *session,
@@ -672,7 +673,7 @@ static void perf_session__print_tstamp(struct perf_session *session,
 		printf("%u ", sample->cpu);
 
 	if (session->sample_type & PERF_SAMPLE_TIME)
-		printf("%Lu ", sample->time);
+		printf("%" PRIu64 " ", sample->time);
 }
 
 static void dump_event(struct perf_session *session, event_t *event,
@@ -681,16 +682,16 @@ static void dump_event(struct perf_session *session, event_t *event,
 	if (!dump_trace)
 		return;
 
-	printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
-	       event->header.type);
+	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
+	       file_offset, event->header.size, event->header.type);
 
 	trace_event(event);
 
 	if (sample)
 		perf_session__print_tstamp(session, event, sample);
 
-	printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
-	       event__get_event_name(event->header.type));
+	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
+	       event->header.size, event__get_event_name(event->header.type));
 }
 
 static void dump_sample(struct perf_session *session, event_t *event,
@@ -699,8 +700,9 @@ static void dump_sample(struct perf_session *session, event_t *event,
 	if (!dump_trace)
 		return;
 
-	printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-	       sample->pid, sample->tid, sample->ip, sample->period);
+	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
+	       event->header.misc, sample->pid, sample->tid, sample->ip,
+	       sample->period);
 
 	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
 		callchain__printf(sample);
@@ -843,8 +845,8 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
 {
 	if (ops->lost == event__process_lost &&
 	    session->hists.stats.total_lost != 0) {
-		ui__warning("Processed %Lu events and LOST %Lu!\n\n"
-			    "Check IO/CPU overload!\n\n",
+		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
+			    "!\n\nCheck IO/CPU overload!\n\n",
 			    session->hists.stats.total_period,
 			    session->hists.stats.total_lost);
 	}
@@ -918,7 +920,7 @@ more:
 
 	if (size == 0 ||
 	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
-		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
+		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
 			    head, event.header.size, event.header.type);
 		/*
 		 * assume we lost track of the stream, check alignment, and
@@ -1023,7 +1025,7 @@ more:
 
 	if (size == 0 ||
 	    perf_session__process_event(session, event, ops, file_pos) < 0) {
-		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
+		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
 			    file_offset + head, event->header.size,
 			    event->header.type);
 		/*
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index b3637db..fb737fe 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -12,6 +12,7 @@
  * of the License.
  */
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -43,11 +44,11 @@ static double cpu2y(int cpu)
 	return cpu2slot(cpu) * SLOT_MULT;
 }
 
-static double time2pixels(u64 time)
+static double time2pixels(u64 __time)
 {
 	double X;
 
-	X = 1.0 * svg_page_width * (time - first_time) / (last_time - first_time);
+	X = 1.0 * svg_page_width * (__time - first_time) / (last_time - first_time);
 	return X;
 }
 
@@ -94,7 +95,7 @@ void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
 
 	total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
 	fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
-	fprintf(svgfile, "<svg width=\"%i\" height=\"%llu\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
+	fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
 
 	fprintf(svgfile, "<defs>\n  <style type=\"text/css\">\n    <![CDATA[\n");
 
@@ -483,7 +484,7 @@ void svg_time_grid(void)
 			color = 128;
 		}
 
-		fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%llu\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%1.3f\"/>\n",
+		fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%" PRIu64 "\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%1.3f\"/>\n",
 			time2pixels(i), SLOT_MULT/2, time2pixels(i), total_height, color, color, color, thickness);
 
 		i += 10000000;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 15ccfba..7821d0e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -11,6 +11,7 @@
 #include <sys/param.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <inttypes.h>
 #include "build-id.h"
 #include "debug.h"
 #include "symbol.h"
@@ -153,7 +154,7 @@ static struct symbol *symbol__new(u64 start, u64 len, u8 binding,
 	self->binding = binding;
 	self->namelen = namelen - 1;
 
-	pr_debug4("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
+	pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n", __func__, name, start, self->end);
 
 	memcpy(self->name, name, namelen);
 
@@ -167,7 +168,7 @@ void symbol__delete(struct symbol *self)
 
 static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 {
-	return fprintf(fp, " %llx-%llx %c %s\n",
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
 		       self->start, self->end,
 		       self->binding == STB_GLOBAL ? 'g' :
 		       self->binding == STB_LOCAL  ? 'l' : 'w',
@@ -1161,6 +1162,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 
 		section_name = elf_sec__name(&shdr, secstrs);
 
+		/* On ARM, symbols for thumb functions have 1 added to
+		 * the symbol address as a flag - remove it */
+		if ((ehdr.e_machine == EM_ARM) &&
+		    (map->type == MAP__FUNCTION) &&
+		    (sym.st_value & 1))
+			--sym.st_value;
+
 		if (self->kernel != DSO_TYPE_USER || kmodule) {
 			char dso_name[PATH_MAX];
 
@@ -1208,8 +1216,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 		}
 
 		if (curr_dso->adjust_symbols) {
-			pr_debug4("%s: adjusting symbol: st_value: %#Lx "
-				  "sh_addr: %#Lx sh_offset: %#Lx\n", __func__,
+			pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+				  "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
 				  (u64)sym.st_value, (u64)shdr.sh_addr,
 				  (u64)shdr.sh_offset);
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h
index 7d6b833..5f3689a 100644
--- a/tools/perf/util/types.h
+++ b/tools/perf/util/types.h
@@ -1,12 +1,14 @@
 #ifndef __PERF_TYPES_H
 #define __PERF_TYPES_H
 
+#include <stdint.h>
+
 /*
- * We define u64 as unsigned long long for every architecture
- * so that we can print it with %Lx without getting warnings.
+ * We define u64 as uint64_t for every architecture
+ * so that we can print it with "%"PRIx64 without getting warnings.
  */
-typedef unsigned long long u64;
-typedef signed long long   s64;
+typedef uint64_t	   u64;
+typedef int64_t		   s64;
 typedef unsigned int	   u32;
 typedef signed int	   s32;
 typedef unsigned short	   u16;
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index ebda8c3..60c463c 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -350,7 +350,7 @@ static char *callchain_list__sym_name(struct callchain_list *self,
 	if (self->ms.sym)
 		return self->ms.sym->name;
 
-	snprintf(bf, bfsize, "%#Lx", self->ip);
+	snprintf(bf, bfsize, "%#" PRIx64, self->ip);
 	return bf;
 }
 
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index e35437d..e515836 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -1,5 +1,6 @@
 #include "../libslang.h"
 #include <elf.h>
+#include <inttypes.h>
 #include <sys/ttydefaults.h>
 #include <ctype.h>
 #include <string.h>
@@ -57,7 +58,7 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row)
 	int width;
 
 	ui_browser__set_percent_color(self, 0, current_entry);
-	slsmg_printf("%*llx %*llx %c ",
+	slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ",
 		     mb->addrlen, sym->start, mb->addrlen, sym->end,
 		     sym->binding == STB_GLOBAL ? 'g' :
 		     sym->binding == STB_LOCAL  ? 'l' : 'w');
@@ -150,6 +151,6 @@ int map__browse(struct map *self)
 		++mb.b.nr_entries;
 	}
 
-	mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr);
+	mb.addrlen = snprintf(tmp, sizeof(tmp), "%" PRIx64, maxaddr);
 	return map_browser__run(&mb);
 }
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index cfa55d6..bdd3347 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -150,7 +150,7 @@ static void perf_read_values__display_pretty(FILE *fp,
 		if (width > tidwidth)
 			tidwidth = width;
 		for (j = 0; j < values->counters; j++) {
-			width = snprintf(NULL, 0, "%Lu", values->value[i][j]);
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
 			if (width > counterwidth[j])
 				counterwidth[j] = width;
 		}
@@ -165,7 +165,7 @@ static void perf_read_values__display_pretty(FILE *fp,
 		fprintf(fp, "  %*d  %*d", pidwidth, values->pid[i],
 			tidwidth, values->tid[i]);
 		for (j = 0; j < values->counters; j++)
-			fprintf(fp, "  %*Lu",
+			fprintf(fp, "  %*" PRIu64,
 				counterwidth[j], values->value[i][j]);
 		fprintf(fp, "\n");
 	}
@@ -196,13 +196,13 @@ static void perf_read_values__display_raw(FILE *fp,
 		width = strlen(values->countername[j]);
 		if (width > namewidth)
 			namewidth = width;
-		width = snprintf(NULL, 0, "%llx", values->counterrawid[j]);
+		width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
 		if (width > rawwidth)
 			rawwidth = width;
 	}
 	for (i = 0; i < values->threads; i++) {
 		for (j = 0; j < values->counters; j++) {
-			width = snprintf(NULL, 0, "%Lu", values->value[i][j]);
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
 			if (width > countwidth)
 				countwidth = width;
 		}
@@ -214,7 +214,7 @@ static void perf_read_values__display_raw(FILE *fp,
 		countwidth, "Count");
 	for (i = 0; i < values->threads; i++)
 		for (j = 0; j < values->counters; j++)
-			fprintf(fp, "  %*d  %*d  %*s  %*llx  %*Lu\n",
+			fprintf(fp, "  %*d  %*d  %*s  %*" PRIx64 "  %*" PRIu64,
 				pidwidth, values->pid[i],
 				tidwidth, values->tid[i],
 				namewidth, values->countername[j],

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 13:34 [GIT PULL] perf fixes Ingo Molnar
@ 2011-01-24 19:48 ` Linus Torvalds
  2011-01-24 20:07   ` Ingo Molnar
  2011-01-24 20:14   ` Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 16+ messages in thread
From: Linus Torvalds @ 2011-01-24 19:48 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

On Mon, Jan 24, 2011 at 11:34 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> Please pull the latest perf-fixes-for-linus git tree from:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf-fixes-for-linus

I'm not sure if this was true before too, but when I do

    perf report -agf sleep 10

while compiling the kernel to get a system profile on x86-32, the
resulting pef.data file will cause "perf report" to just hang.

Is it just me?

                Linus

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 19:48 ` Linus Torvalds
@ 2011-01-24 20:07   ` Ingo Molnar
  2011-01-24 20:11     ` Ingo Molnar
  2011-01-24 20:17     ` Linus Torvalds
  2011-01-24 20:14   ` Arnaldo Carvalho de Melo
  1 sibling, 2 replies; 16+ messages in thread
From: Ingo Molnar @ 2011-01-24 20:07 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Mon, Jan 24, 2011 at 11:34 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > Please pull the latest perf-fixes-for-linus git tree from:
> >
> >   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf-fixes-for-linus
> 
> I'm not sure if this was true before too, but when I do
> 
>     perf report -agf sleep 10

(that's perf record i guess?)

> 
> while compiling the kernel to get a system profile on x86-32, the
> resulting pef.data file will cause "perf report" to just hang.
> 
> Is it just me?

We used to have such bugs recently so i'm quite sure what you see is real.

The above test is almost the same what i did before sending you the pull request, so 
it's not occuring all the time and on all boxes.

The distro version on that box you are using would be helpful, plus the build output 
you get when you build 'tools/perf'. (I.e. which libraries are there. If it comes up 
empty with no complaints you have all the devel libraries.)

We'll try to reproduce it locally before asking more debug data from you, 
perf.data's can be pretty large to send via email :)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:07   ` Ingo Molnar
@ 2011-01-24 20:11     ` Ingo Molnar
  2011-01-24 20:17       ` Ingo Molnar
  2011-01-24 20:17     ` Linus Torvalds
  1 sibling, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2011-01-24 20:11 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton


* Ingo Molnar <mingo@elte.hu> wrote:

> The distro version on that box you are using would be helpful, plus the build 
> output you get when you build 'tools/perf'. (I.e. which libraries are there. If it 
> comes up empty with no complaints you have all the devel libraries.)

and a gdb backtrace of the perf report lockup site would be helpful too i suspect.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 19:48 ` Linus Torvalds
  2011-01-24 20:07   ` Ingo Molnar
@ 2011-01-24 20:14   ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2011-01-24 20:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

Em Tue, Jan 25, 2011 at 05:48:01AM +1000, Linus Torvalds escreveu:
> On Mon, Jan 24, 2011 at 11:34 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > Please pull the latest perf-fixes-for-linus git tree from:
> >
> >   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf-fixes-for-linus
> 
> I'm not sure if this was true before too, but when I do
> 
>     perf report -agf sleep 10
> 
> while compiling the kernel to get a system profile on x86-32, the
> resulting pef.data file will cause "perf report" to just hang.
> 
> Is it just me?

s/report/record/ :-)

I'm building perf/urgent tooling then kernel on my Acer netbook with a:

model name	: Intel(R) Atom(TM) CPU N270   @ 1.60GHz

To check. It now has 2.6.38-rc1 with my perf/core branch tool/kernel
combo, where I couldn't reproduce the problem.

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:07   ` Ingo Molnar
  2011-01-24 20:11     ` Ingo Molnar
@ 2011-01-24 20:17     ` Linus Torvalds
  2011-01-24 20:27       ` Linus Torvalds
  2011-01-24 20:37       ` [GIT PULL] perf fixes Davidlohr Bueso
  1 sibling, 2 replies; 16+ messages in thread
From: Linus Torvalds @ 2011-01-24 20:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

On Tue, Jan 25, 2011 at 6:07 AM, Ingo Molnar <mingo@elte.hu> wrote:
>>
>>     perf report -agf sleep 10
>
> (that's perf record i guess?)

Eh. Yes,

>> Is it just me?
>
> We used to have such bugs recently so i'm quite sure what you see is real.
>
> The above test is almost the same what i did before sending you the pull request, so
> it's not occuring all the time and on all boxes.

It only happens for me with "g". With "perf record -af sleep 10" it worked.

> The distro version on that box you are using would be helpful, plus the build output
> you get when you build 'tools/perf'. (I.e. which libraries are there. If it comes up
> empty with no complaints you have all the devel libraries.)

It's up-to-date Fedora 14 on x86-32.

And there's no build output at all, except for the trivial:

  [torvalds@eeepc perf]$ make
  PERF_VERSION = 2.6.38.rc2.62.gec30f3.dirty
      GEN common-cmds.h
      * new build flags or prefix
      CC perf.o
      CC builtin-annotate.o
      CC builtin-bench.o
      ...

so I have everything installed.

                    Linus

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:11     ` Ingo Molnar
@ 2011-01-24 20:17       ` Ingo Molnar
  0 siblings, 0 replies; 16+ messages in thread
From: Ingo Molnar @ 2011-01-24 20:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > The distro version on that box you are using would be helpful, plus the build 
> > output you get when you build 'tools/perf'. (I.e. which libraries are there. If it 
> > comes up empty with no complaints you have all the devel libraries.)
> 
> and a gdb backtrace of the perf report lockup site would be helpful too i suspect.

There's two areas that would be prime suspects: the ELF symbol lookup code, or the 
call-chain code.

Neither is expected to lock up, on arbitrary input of perf.data.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:17     ` Linus Torvalds
@ 2011-01-24 20:27       ` Linus Torvalds
  2011-01-24 20:38         ` Arnaldo Carvalho de Melo
  2011-01-24 20:37       ` [GIT PULL] perf fixes Davidlohr Bueso
  1 sibling, 1 reply; 16+ messages in thread
From: Linus Torvalds @ 2011-01-24 20:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

On Tue, Jan 25, 2011 at 6:17 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> It only happens for me with "g". With "perf record -af sleep 10" it worked.

Actually, it's something subtler than that. It must depend on the
actual data, because now when I tried it again, it worked with 'g'
too. I hadn't saved the old perf.data that caused the lockup (it got
overwritten by the non-g test), and now when I try to re-create it it
doesn't hang on the result.

So it's probably some very specific data pattern that causes it.

(And I don't know if it's a hard hang - it could just be something
_very_ slow. But we're talking half a minute kind of slow).

                     Linus

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:17     ` Linus Torvalds
  2011-01-24 20:27       ` Linus Torvalds
@ 2011-01-24 20:37       ` Davidlohr Bueso
  1 sibling, 0 replies; 16+ messages in thread
From: Davidlohr Bueso @ 2011-01-24 20:37 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Frédéric Weisbecker,
	Steven Rostedt, Thomas Gleixner, Andrew Morton

On Tue, 2011-01-25 at 06:17 +1000, Linus Torvalds wrote:
> On Tue, Jan 25, 2011 at 6:07 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >>
> >>     perf report -agf sleep 10
> >
> > (that's perf record i guess?)
> 
> Eh. Yes,
> 
> >> Is it just me?
> >
> > We used to have such bugs recently so i'm quite sure what you see is real.
> >
> > The above test is almost the same what i did before sending you the pull request, so
> > it's not occuring all the time and on all boxes.
> 
> It only happens for me with "g". With "perf record -af sleep 10" it worked.
> 
> > The distro version on that box you are using would be helpful, plus the build output
> > you get when you build 'tools/perf'. (I.e. which libraries are there. If it comes up
> > empty with no complaints you have all the devel libraries.)
> 
> It's up-to-date Fedora 14 on x86-32.
> 
> And there's no build output at all, except for the trivial:
> 
>   [torvalds@eeepc perf]$ make
>   PERF_VERSION = 2.6.38.rc2.62.gec30f3.dirty
>       GEN common-cmds.h
>       * new build flags or prefix
>       CC perf.o
>       CC builtin-annotate.o
>       CC builtin-bench.o
>       ...
> 

For what it's worth I cant reproduce this either, at least on x86_64
using 2.6.38.rc2.37.g9320a7.

> so I have everything installed.
> 
>                     Linus
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:27       ` Linus Torvalds
@ 2011-01-24 20:38         ` Arnaldo Carvalho de Melo
  2011-01-24 21:13           ` Linus Torvalds
  2011-01-24 21:25           ` Ingo Molnar
  0 siblings, 2 replies; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2011-01-24 20:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

Em Tue, Jan 25, 2011 at 06:27:31AM +1000, Linus Torvalds escreveu:
> On Tue, Jan 25, 2011 at 6:17 AM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > It only happens for me with "g". With "perf record -af sleep 10" it worked.
> 
> Actually, it's something subtler than that. It must depend on the
> actual data, because now when I tried it again, it worked with 'g'
> too. I hadn't saved the old perf.data that caused the lockup (it got
> overwritten by the non-g test), and now when I try to re-create it it
> doesn't hang on the result.
> 
> So it's probably some very specific data pattern that causes it.
> 
> (And I don't know if it's a hard hang - it could just be something
> _very_ slow. But we're talking half a minute kind of slow).

Was this on a freshly installed machine? Or on a freshly updated one?

Probably its the build-id collecting at the end of a session, on the
first run you had a cold cache and it had to figure out which binaries
to cache on ~/.debug, second time it was already cached so it was fast.

So one way to try to reproduce would be to:

rm -rf ~/.debug

and then try it again.

To double check, you can try to disable the build-id cache with:

--no-buildid-cache or -N

i.e.:

perf record -afN sleep 10

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:38         ` Arnaldo Carvalho de Melo
@ 2011-01-24 21:13           ` Linus Torvalds
  2011-01-24 21:25           ` Ingo Molnar
  1 sibling, 0 replies; 16+ messages in thread
From: Linus Torvalds @ 2011-01-24 21:13 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ingo Molnar, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

On Tue, Jan 25, 2011 at 6:38 AM, Arnaldo Carvalho de Melo
<acme@redhat.com> wrote:
>
> Was this on a freshly installed machine? Or on a freshly updated one?

It is. And the machine is slow too.

> Probably its the build-id collecting at the end of a session, on the
> first run you had a cold cache and it had to figure out which binaries
> to cache on ~/.debug, second time it was already cached so it was fast.

That sounds like a reasonable explanation.

So just ignore this report,

             Linus

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 20:38         ` Arnaldo Carvalho de Melo
  2011-01-24 21:13           ` Linus Torvalds
@ 2011-01-24 21:25           ` Ingo Molnar
  2011-01-24 22:00             ` Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2011-01-24 21:25 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> Em Tue, Jan 25, 2011 at 06:27:31AM +1000, Linus Torvalds escreveu:
> > On Tue, Jan 25, 2011 at 6:17 AM, Linus Torvalds
> > <torvalds@linux-foundation.org> wrote:
> > >
> > > It only happens for me with "g". With "perf record -af sleep 10" it worked.
> > 
> > Actually, it's something subtler than that. It must depend on the
> > actual data, because now when I tried it again, it worked with 'g'
> > too. I hadn't saved the old perf.data that caused the lockup (it got
> > overwritten by the non-g test), and now when I try to re-create it it
> > doesn't hang on the result.
> > 
> > So it's probably some very specific data pattern that causes it.
> > 
> > (And I don't know if it's a hard hang - it could just be something
> > _very_ slow. But we're talking half a minute kind of slow).
> 
> Was this on a freshly installed machine? Or on a freshly updated one?
> 
> Probably its the build-id collecting at the end of a session, on the
> first run you had a cold cache and it had to figure out which binaries
> to cache on ~/.debug, second time it was already cached so it was fast.

Hm, it would be nice to not surprise users with an unlimited-timeout, up to half a 
minute 'frozen' app. Can we possibly display a more finegrained progress indicator?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 21:25           ` Ingo Molnar
@ 2011-01-24 22:00             ` Arnaldo Carvalho de Melo
  2011-01-25  0:16               ` Ingo Molnar
  0 siblings, 1 reply; 16+ messages in thread
From: Arnaldo Carvalho de Melo @ 2011-01-24 22:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

Em Mon, Jan 24, 2011 at 10:25:26PM +0100, Ingo Molnar escreveu:
> * Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
> > Was this on a freshly installed machine? Or on a freshly updated one?
> > 
> > Probably its the build-id collecting at the end of a session, on the
> > first run you had a cold cache and it had to figure out which binaries
> > to cache on ~/.debug, second time it was already cached so it was fast.
> 
> Hm, it would be nice to not surprise users with an unlimited-timeout, up to half a 
> minute 'frozen' app. Can we possibly display a more finegrained progress indicator?

Definetely, adding this to the todo list.

In fact perf_session__process_events already has an ui_progress stuff,
its just that it works only on report/TUI. Need to make it work with
some /-|/- spinning text progress indicator after a "collecting
build-ids:" string, something like that.

- Arnaldo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [GIT PULL] perf fixes
  2011-01-24 22:00             ` Arnaldo Carvalho de Melo
@ 2011-01-25  0:16               ` Ingo Molnar
  2011-01-25 16:31                 ` [PATCH] x86,percpu: fix percpu_xchg_op() Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2011-01-25  0:16 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Frédéric Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton


* Arnaldo Carvalho de Melo <acme@redhat.com> wrote:

> Em Mon, Jan 24, 2011 at 10:25:26PM +0100, Ingo Molnar escreveu:
> > * Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
> > > Was this on a freshly installed machine? Or on a freshly updated one?
> > > 
> > > Probably its the build-id collecting at the end of a session, on the
> > > first run you had a cold cache and it had to figure out which binaries
> > > to cache on ~/.debug, second time it was already cached so it was fast.
> > 
> > Hm, it would be nice to not surprise users with an unlimited-timeout, up to half a 
> > minute 'frozen' app. Can we possibly display a more finegrained progress indicator?
> 
> Definetely, adding this to the todo list.
> 
> In fact perf_session__process_events already has an ui_progress stuff,
> its just that it works only on report/TUI. Need to make it work with
> some /-|/- spinning text progress indicator after a "collecting
> build-ids:" string, something like that.

Yeah, something like that would awesome!

No need for it to be particularly pretty or complex - just _some_ minimal feedback 
to the user gives us most of the bang for the buck already.

( And, of course, the best solution is to make it all run even faster. When it comes
  to debuginfo data structures i can not prevent myself from thinking 'bloat!' ;-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] x86,percpu: fix percpu_xchg_op()
  2011-01-25  0:16               ` Ingo Molnar
@ 2011-01-25 16:31                 ` Eric Dumazet
  2011-01-26  7:26                   ` [tip:x86/urgent] percpu, x86: Fix percpu_xchg_op() tip-bot for Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2011-01-25 16:31 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Christoph Lameter
  Cc: Arnaldo Carvalho de Melo, Linus Torvalds, linux-kernel,
	Frederic Weisbecker, Steven Rostedt, Thomas Gleixner,
	Andrew Morton

Hi guys

So I wanted to play again with perf ;)

I had several crashes on a x86_64 machine, while "perf top" was running,
on latest linux-2.6 tree.

Crash was in irq_work(), calling a NULL entry->func()


Code: Bad RIP value.
RIP [<       (null)>]   (null)
 RSP <ffff8800df203e50>
CR2: 0000000000000000
---[ end trace fd7ad949f6766354 ]---
Kernel panic - not syncing: Fatal exception in interrupt
Pid: 0, comm: swapper Tainted: G     D  2.6.38-rc2-00181-gef71723 #413
Call Trace:
<IRQ> [<ffffffff810465b5>] ? panic
    ? kmsg_dump
    ? kmsg_dump
    ? oops_end
    ? no_context
    ? __bad_area_nosemaphore
    ? perf_output_begin
    ? bad_area_nosemaphore
    ? do_page_fault
    ? __task_pid_nr_ns
    ? perf_event_tid
    ? __perf_event_header__init_id
    ? validate_chain
    ? perf_output_sample
    ? trace_hardirqs_off
    ? page_fault
    ? irq_work_run
    ? update_process_times
    ? tick_sched_timer
    ? tick_sched_timer
    ? __run_hrtimer
    ? hrtimer_interrupt
    ? account_system_vtime
    ? smp_apic_timer_interrupt
    ? apic_timer_interrupt
<EOI>  ? restore_args
     ? poll_idle
     ? poll_idle
     ? menu_select
     ? cpuidle_call
 ...

Looking at assembly code, I found 

list = this_cpu_xchg(irq_work_list, NULL);

gives this wrong code : (gcc-4.1.2 cross compiler)

ffffffff810bc45e:
	mov    %gs:0xead0,%rax
	cmpxchg %rax,%gs:0xead0
	jne    ffffffff810bc45e <irq_work_run+0x3e>
	test   %rax,%rax
	je     ffffffff810bc4aa <irq_work_run+0x8a>	

Following patch cures the problem for me

Thanks

[PATCH] x86,percpu: fix percpu_xchg_op()

Tell gcc we dirty eax/rax register in percpu_xchg_op()

Compiler must use another register to store pxo_new__ 

We also dont need to reload percpu value after a jump,
since a 'failed' cmpxchg already updated eax/rax

Wrong generated code was :
	xor     %rax,%rax   /* load 0 into %rax */
1:	mov     %gs:0xead0,%rax
	cmpxchg %rax,%gs:0xead0
	jne     1b
	test    %rax,%rax

After patch :

	xor     %rdx,%rdx   /* load 0 into %rdx */
	mov     %gs:0xead0,%rax
1:	cmpxchg %rdx,%gs:0xead0
	jne     1b:
	test    %rax,%rax

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3788f46..7e17295 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -273,34 +273,34 @@ do {									\
 	typeof(var) pxo_new__ = (nval);					\
 	switch (sizeof(var)) {						\
 	case 1:								\
-		asm("\n1:mov "__percpu_arg(1)",%%al"			\
-		    "\n\tcmpxchgb %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%al"			\
+		    "\n1:\tcmpxchgb %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "q" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 2:								\
-		asm("\n1:mov "__percpu_arg(1)",%%ax"			\
-		    "\n\tcmpxchgw %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%ax"			\
+		    "\n1:\tcmpxchgw %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 4:								\
-		asm("\n1:mov "__percpu_arg(1)",%%eax"			\
-		    "\n\tcmpxchgl %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%eax"			\
+		    "\n1:\tcmpxchgl %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 8:								\
-		asm("\n1:mov "__percpu_arg(1)",%%rax"			\
-		    "\n\tcmpxchgq %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%rax"			\
+		    "\n1:\tcmpxchgq %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [tip:x86/urgent] percpu, x86: Fix percpu_xchg_op()
  2011-01-25 16:31                 ` [PATCH] x86,percpu: fix percpu_xchg_op() Eric Dumazet
@ 2011-01-26  7:26                   ` tip-bot for Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Eric Dumazet @ 2011-01-26  7:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, hpa, mingo, eric.dumazet, a.p.zijlstra,
	torvalds, fweisbec, rostedt, tj, tglx, mingo, cl

Commit-ID:  889a7a6a5d5e64063effd40056bdc7b8fb336bd1
Gitweb:     http://git.kernel.org/tip/889a7a6a5d5e64063effd40056bdc7b8fb336bd1
Author:     Eric Dumazet <eric.dumazet@gmail.com>
AuthorDate: Tue, 25 Jan 2011 17:31:54 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 26 Jan 2011 08:10:49 +0100

percpu, x86: Fix percpu_xchg_op()

These recent percpu commits:

  2485b6464cf8: x86,percpu: Move out of place 64 bit ops into X86_64 section
  8270137a0d50: cpuops: Use cmpxchg for xchg to avoid lock semantics

Caused this 'perf top' crash:

 Kernel panic - not syncing: Fatal exception in interrupt
 Pid: 0, comm: swapper Tainted: G     D
 2.6.38-rc2-00181-gef71723 #413 Call Trace: <IRQ> [<ffffffff810465b5>]
    ? panic
    ? kmsg_dump
    ? kmsg_dump
    ? oops_end
    ? no_context
    ? __bad_area_nosemaphore
    ? perf_output_begin
    ? bad_area_nosemaphore
    ? do_page_fault
    ? __task_pid_nr_ns
    ? perf_event_tid
    ? __perf_event_header__init_id
    ? validate_chain
    ? perf_output_sample
    ? trace_hardirqs_off
    ? page_fault
    ? irq_work_run
    ? update_process_times
    ? tick_sched_timer
    ? tick_sched_timer
    ? __run_hrtimer
    ? hrtimer_interrupt
    ? account_system_vtime
    ? smp_apic_timer_interrupt
    ? apic_timer_interrupt
 ...

Looking at assembly code, I found:

list = this_cpu_xchg(irq_work_list, NULL);

gives this wrong code : (gcc-4.1.2 cross compiler)

ffffffff810bc45e:
	mov    %gs:0xead0,%rax
	cmpxchg %rax,%gs:0xead0
	jne    ffffffff810bc45e <irq_work_run+0x3e>
	test   %rax,%rax
	je     ffffffff810bc4aa <irq_work_run+0x8a>

Tell gcc we dirty eax/rax register in percpu_xchg_op()

Compiler must use another register to store pxo_new__

We also dont need to reload percpu value after a jump,
since a 'failed' cmpxchg already updated eax/rax

Wrong generated code was :
	xor     %rax,%rax   /* load 0 into %rax */
1:	mov     %gs:0xead0,%rax
	cmpxchg %rax,%gs:0xead0
	jne     1b
	test    %rax,%rax

After patch :

	xor     %rdx,%rdx   /* load 0 into %rdx */
	mov     %gs:0xead0,%rax
1:	cmpxchg %rdx,%gs:0xead0
	jne     1b:
	test    %rax,%rax

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>
LKML-Reference: <1295973114.3588.312.camel@edumazet-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3788f46..7e17295 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -273,34 +273,34 @@ do {									\
 	typeof(var) pxo_new__ = (nval);					\
 	switch (sizeof(var)) {						\
 	case 1:								\
-		asm("\n1:mov "__percpu_arg(1)",%%al"			\
-		    "\n\tcmpxchgb %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%al"			\
+		    "\n1:\tcmpxchgb %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "q" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 2:								\
-		asm("\n1:mov "__percpu_arg(1)",%%ax"			\
-		    "\n\tcmpxchgw %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%ax"			\
+		    "\n1:\tcmpxchgw %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 4:								\
-		asm("\n1:mov "__percpu_arg(1)",%%eax"			\
-		    "\n\tcmpxchgl %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%eax"			\
+		    "\n1:\tcmpxchgl %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\
 	case 8:								\
-		asm("\n1:mov "__percpu_arg(1)",%%rax"			\
-		    "\n\tcmpxchgq %2, "__percpu_arg(1)			\
+		asm("\n\tmov "__percpu_arg(1)",%%rax"			\
+		    "\n1:\tcmpxchgq %2, "__percpu_arg(1)		\
 		    "\n\tjnz 1b"					\
-			    : "=a" (pxo_ret__), "+m" (var)		\
+			    : "=&a" (pxo_ret__), "+m" (var)		\
 			    : "r" (pxo_new__)				\
 			    : "memory");				\
 		break;							\

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2011-01-26  7:27 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-24 13:34 [GIT PULL] perf fixes Ingo Molnar
2011-01-24 19:48 ` Linus Torvalds
2011-01-24 20:07   ` Ingo Molnar
2011-01-24 20:11     ` Ingo Molnar
2011-01-24 20:17       ` Ingo Molnar
2011-01-24 20:17     ` Linus Torvalds
2011-01-24 20:27       ` Linus Torvalds
2011-01-24 20:38         ` Arnaldo Carvalho de Melo
2011-01-24 21:13           ` Linus Torvalds
2011-01-24 21:25           ` Ingo Molnar
2011-01-24 22:00             ` Arnaldo Carvalho de Melo
2011-01-25  0:16               ` Ingo Molnar
2011-01-25 16:31                 ` [PATCH] x86,percpu: fix percpu_xchg_op() Eric Dumazet
2011-01-26  7:26                   ` [tip:x86/urgent] percpu, x86: Fix percpu_xchg_op() tip-bot for Eric Dumazet
2011-01-24 20:37       ` [GIT PULL] perf fixes Davidlohr Bueso
2011-01-24 20:14   ` Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).