* [GIT PULL] perfcounters fixes
@ 2009-08-25 18:05 Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-08-25 18:05 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Thanks,
Ingo
------------------>
Peter Zijlstra (2):
perf tools: Check perf.data owner
perf_counter: Fix typo in read() output generation
kernel/perf_counter.c | 2 +-
tools/perf/builtin-annotate.c | 7 +++++++
tools/perf/builtin-report.c | 7 +++++++
3 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 36f65e2..f274e19 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1791,7 +1791,7 @@ static int perf_counter_read_group(struct perf_counter *counter,
size += err;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- err = perf_counter_read_entry(counter, read_format,
+ err = perf_counter_read_entry(sub, read_format,
buf + size);
if (err < 0)
return err;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 343e7b1..5e17de9 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -31,6 +31,7 @@ static char *vmlinux = "vmlinux";
static char default_sort_order[] = "comm,symbol";
static char *sort_order = default_sort_order;
+static int force;
static int input;
static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -1334,6 +1335,11 @@ static int __cmd_annotate(void)
exit(-1);
}
+ if (!force && (stat.st_uid != geteuid())) {
+ fprintf(stderr, "file: %s not owned by current user\n", input_name);
+ exit(-1);
+ }
+
if (!stat.st_size) {
fprintf(stderr, "zero-sized file, nothing to do!\n");
exit(0);
@@ -1439,6 +1445,7 @@ static const struct option options[] = {
"input file name"),
OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
"symbol to annotate"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b53a60f..8b2ec88 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -38,6 +38,7 @@ static char *dso_list_str, *comm_list_str, *sym_list_str,
static struct strlist *dso_list, *comm_list, *sym_list;
static char *field_sep;
+static int force;
static int input;
static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -1856,6 +1857,11 @@ static int __cmd_report(void)
exit(-1);
}
+ if (!force && (stat.st_uid != geteuid())) {
+ fprintf(stderr, "file: %s not owned by current user\n", input_name);
+ exit(-1);
+ }
+
if (!stat.st_size) {
fprintf(stderr, "zero-sized file, nothing to do!\n");
exit(0);
@@ -2064,6 +2070,7 @@ static const struct option options[] = {
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('m', "modules", &modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [GIT PULL] perfcounters fixes
@ 2009-09-04 8:49 Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-09-04 8:49 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Would be nice if you could pull this into .31-final, due to the
security and the stability fix.
Thanks,
Ingo
------------------>
Ingo Molnar (1):
perf_counters: Increase paranoia level
Paul Mackerras (1):
perf_counter/powerpc: Fix cache event codes for POWER7
Peter Zijlstra (1):
perf_counter: Fix /0 bug in swcounters
arch/powerpc/kernel/power7-pmu.c | 6 +++---
kernel/perf_counter.c | 3 ++-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 388cf57..018d094 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -317,7 +317,7 @@ static int power7_generic_events[] = {
*/
static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x400f0, 0xc880 },
+ [C(OP_READ)] = { 0xc880, 0x400f0 },
[C(OP_WRITE)] = { 0, 0x300f0 },
[C(OP_PREFETCH)] = { 0xd8b8, 0 },
},
@@ -327,8 +327,8 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(OP_PREFETCH)] = { 0x408a, 0 },
},
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x6080, 0x6084 },
- [C(OP_WRITE)] = { 0x6082, 0x6086 },
+ [C(OP_READ)] = { 0x16080, 0x26080 },
+ [C(OP_WRITE)] = { 0x16082, 0x26082 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f274e19..d7cbc57 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -50,7 +50,7 @@ static atomic_t nr_task_counters __read_mostly;
* 1 - disallow cpu counters to unpriv
* 2 - disallow kernel profiling to unpriv
*/
-int sysctl_perf_counter_paranoid __read_mostly;
+int sysctl_perf_counter_paranoid __read_mostly = 1;
static inline bool perf_paranoid_cpu(void)
{
@@ -4066,6 +4066,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
hwc->sample_period = attr->sample_period;
if (attr->freq && attr->sample_freq)
hwc->sample_period = 1;
+ hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [GIT PULL] perfcounters fixes
@ 2009-08-17 21:39 Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-08-17 21:39 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Thanks,
Ingo
------------------>
Arnaldo Carvalho de Melo (1):
perf record: Fix typo in pid_synthesize_comm_event
Carlos R. Mafra (1):
perf: Rename perf-examples.txt to examples.txt
Paul Mackerras (1):
perf_counter: Check task on counter read IPI
kernel/perf_counter.c | 11 +++++++++++
.../{perf-examples.txt => examples.txt} | 0
tools/perf/builtin-record.c | 2 +-
3 files changed, 12 insertions(+), 1 deletions(-)
rename tools/perf/Documentation/{perf-examples.txt => examples.txt} (100%)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 534e20d..b8fe739 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1503,10 +1503,21 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
*/
static void __perf_counter_read(void *info)
{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_counter *counter = info;
struct perf_counter_context *ctx = counter->ctx;
unsigned long flags;
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived. In that case
+ * counter->count would have been updated to a recent sample
+ * when the counter was scheduled out.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/examples.txt
similarity index 100%
rename from tools/perf/Documentation/perf-examples.txt
rename to tools/perf/Documentation/examples.txt
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3d051b9..89a5ddc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -219,7 +219,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full)
snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
fp = fopen(filename, "r");
- if (fd == NULL) {
+ if (fp == NULL) {
/*
* We raced with a task exiting - just return:
*/
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [GIT PULL] perfcounters fixes
2009-08-13 19:27 ` Linus Torvalds
@ 2009-08-13 20:10 ` Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-08-13 20:10 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
* Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Thu, 13 Aug 2009, Ingo Molnar wrote:
> >
> > Please pull the latest perfcounters-fixes-for-linus git tree
> > from:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
>
> Is this really generally fixing things that should be fixed after
> -rc?
>
> I'm getting very tired of the perfcounters tree. It's not calming
> down in -rc. I've pulled this, but quite frankly, I think there
> are commits there that you should not have sent me, and I'm
> getting fed up to the point that I suspect next time around I
> won't pull even the valid things just because you're not being
> careful enough.
Sorry about that. You are right and i'll cool the tree down
(massively) and be much more careful.
Ingo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [GIT PULL] perfcounters fixes
2009-08-13 19:04 Ingo Molnar
@ 2009-08-13 19:27 ` Linus Torvalds
2009-08-13 20:10 ` Ingo Molnar
0 siblings, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2009-08-13 19:27 UTC (permalink / raw)
To: Ingo Molnar; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
On Thu, 13 Aug 2009, Ingo Molnar wrote:
>
> Please pull the latest perfcounters-fixes-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Is this really generally fixing things that should be fixed after -rc?
I'm getting very tired of the perfcounters tree. It's not calming down in
-rc. I've pulled this, but quite frankly, I think there are commits there
that you should not have sent me, and I'm getting fed up to the point that
I suspect next time around I won't pull even the valid things just because
you're not being careful enough.
Linus
^ permalink raw reply [flat|nested] 8+ messages in thread
* [GIT PULL] perfcounters fixes
@ 2009-08-13 19:04 Ingo Molnar
2009-08-13 19:27 ` Linus Torvalds
0 siblings, 1 reply; 8+ messages in thread
From: Ingo Molnar @ 2009-08-13 19:04 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Thanks,
Ingo
------------------>
Arnaldo Carvalho de Melo (6):
perf record: Fix .tid and .pid fill-in when synthesizing events
perf report: Show the tid too in -D
perf tools: Fix fallback to cplus_demangle() when bfd_demangle() is not available
perf tools: Fix dso__new handle() to handle deleted DSOs
perf list: Fix large list output by using the pager
perf report: Don't show unresolved DSOs and symbols when -S/-d is used
Frederic Weisbecker (2):
perf tools: Add a per tracepoint counter attribute to get raw sample
perf tools: Add a general option to enable raw sample records
Ingo Molnar (4):
perf_counter, x86: Fix lapic printk message
perf_counter, x86: Fix generic cache events on P6-mobile CPUs
perf_counter, x86: Fix/improve apic fallback
perf_counter: Provide hw_perf_counter_setup_online() APIs
Jens Axboe (1):
perf record: Add missing -C option support for specifying profile cpu
Peter Zijlstra (4):
perf_counter: Fix swcounter context invariance
perf: Rework/fix the whole read vs group stuff
perf_counter: Fix an ipi-deadlock
perf_counter: Report the cloning task as parent on perf_counter_fork()
arch/x86/Kconfig | 2 +-
arch/x86/kernel/cpu/perf_counter.c | 40 ++++-
include/linux/perf_counter.h | 49 ++++--
kernel/perf_counter.c | 338 +++++++++++++++++++++++++-----------
tools/perf/Makefile | 29 ++--
tools/perf/builtin-list.c | 3 +-
tools/perf/builtin-record.c | 95 ++++++----
tools/perf/builtin-report.c | 12 +-
tools/perf/util/parse-events.c | 10 +
tools/perf/util/symbol.c | 17 +--
tools/perf/util/symbol.h | 24 +++
11 files changed, 429 insertions(+), 190 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6..13ffa5d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_PERF_COUNTERS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
- select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f9..900332b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
+ int apic;
u64 max_period;
u64 intel_ctrl;
};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
static bool reserve_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
+#endif
return true;
+#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
enable_lapic_nmi_watchdog();
return false;
+#endif
}
static void release_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
+#endif
}
static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
+ } else {
+ /*
+ * If we have a PMU initialized but no APIC
+ * interrupts, we cannot sample hardware
+ * counters (user-space has to fall back and
+ * sample via a hrtimer based software counter):
+ */
+ if (!x86_pmu.apic)
+ return -EOPNOTSUPP;
}
counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
void set_perf_counter_pending(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
}
void perf_counters_lapic_init(void)
{
- if (!x86_pmu_initialized())
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!x86_pmu.apic || !x86_pmu_initialized())
return;
/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
}
static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;
+#ifdef CONFIG_X86_LOCAL_APIC
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
.event_map = p6_pmu_event_map,
.raw_event = p6_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
+ .apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
return -ENODEV;
}
+ x86_pmu = p6_pmu;
+
if (!cpu_has_apic) {
- pr_info("no Local APIC, try rebooting with lapic");
- return -ENODEV;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+ x86_pmu.apic = 0;
}
- x86_pmu = p6_pmu;
-
return 0;
}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a9d823a..b53f700 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
- PERF_SAMPLE_GROUP = 1U << 4,
+ PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
};
/*
- * Bits that can be set in attr.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ *
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
+ * };
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
- PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, tid;
- * u64 value;
- * { u64 time_enabled; } && PERF_FORMAT_ENABLED
- * { u64 time_running; } && PERF_FORMAT_RUNNING
- * { u64 parent_id; } && PERF_FORMAT_ID
+ *
+ * struct read_format values;
* };
*/
PERF_EVENT_READ = 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
- * { u64 nr;
- * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
+ * { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
+ *
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
@@ -694,6 +719,8 @@ struct perf_sample_data {
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
+extern void perf_counter_output(struct perf_counter *counter, int nmi,
+ struct perf_sample_data *data);
/*
* Return 1 for a software counter, 0 for a hardware counter
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b0b20a0..534e20d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_counter_setup(int cpu) { barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
int __weak
hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
return;
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ if (counter->pending_disable) {
+ counter->pending_disable = 0;
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
counter->tstamp_stopped = ctx->time;
counter->pmu->disable(counter);
counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += counter->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+
+ return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
{
struct perf_counter *child;
u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
return total;
}
+static int perf_counter_read_entry(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ int n = 0, count = 0;
+ u64 values[2];
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ count = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ int n = 0, size = 0, err = -EFAULT;
+ u64 values[3];
+
+ values[n++] = 1 + leader->nr_siblings;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
+
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, size))
+ return -EFAULT;
+
+ err = perf_counter_read_entry(leader, read_format, buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ err = perf_counter_read_entry(counter, read_format,
+ buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+ }
+
+ return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ if (copy_to_user(buf, values, n * sizeof(u64)))
+ return -EFAULT;
+
+ return n * sizeof(u64);
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
- u64 values[4];
- int n;
+ u64 read_format = counter->attr.read_format;
+ int ret;
/*
* Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
+ if (count < perf_counter_read_size(counter))
+ return -ENOSPC;
+
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read_tree(counter);
- n = 1;
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = counter->total_time_enabled +
- atomic64_read(&counter->child_total_time_enabled);
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = counter->total_time_running +
- atomic64_read(&counter->child_total_time_running);
- if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = primary_counter_id(counter);
+ if (read_format & PERF_FORMAT_GROUP)
+ ret = perf_counter_read_group(counter, read_format, buf);
+ else
+ ret = perf_counter_read_one(counter, read_format, buf);
mutex_unlock(&counter->child_mutex);
- if (count < n * sizeof(u64))
- return -EINVAL;
- count = n * sizeof(u64);
-
- if (copy_to_user(buf, values, count))
- return -EFAULT;
-
- return count;
+ return ret;
}
static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
if (counter->pending_disable) {
counter->pending_disable = 0;
- perf_counter_disable(counter);
+ __perf_counter_disable(counter);
}
if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
return task_pid_nr_ns(p, counter->ns);
}
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+static void perf_output_read_one(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ u64 read_format = counter->attr.read_format;
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = atomic64_read(&counter->count);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ u64 read_format = counter->attr.read_format;
+ u64 values[5];
+ int n = 0;
+
+ values[n++] = 1 + leader->nr_siblings;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = leader->total_time_enabled;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = leader->total_time_running;
+
+ if (leader != counter)
+ leader->pmu->read(leader);
+
+ values[n++] = atomic64_read(&leader->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(leader);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ n = 0;
+
+ if (sub != counter)
+ sub->pmu->read(sub);
+
+ values[n++] = atomic64_read(&sub->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(sub);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+ }
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ if (counter->attr.read_format & PERF_FORMAT_GROUP)
+ perf_output_read_group(handle, counter);
+ else
+ perf_output_read_one(handle, counter);
+}
+
+void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
struct {
u32 pid, tid;
} tid_entry;
- struct {
- u64 id;
- u64 counter;
- } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- if (sample_type & PERF_SAMPLE_GROUP) {
- header.size += sizeof(u64) +
- counter->nr_siblings * sizeof(group_entry);
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@@ -2759,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
perf_output_put(&handle, data->period);
- /*
- * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
- */
- if (sample_type & PERF_SAMPLE_GROUP) {
- struct perf_counter *leader, *sub;
- u64 nr = counter->nr_siblings;
-
- perf_output_put(&handle, nr);
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->pmu->read(sub);
-
- group_entry.id = primary_counter_id(sub);
- group_entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, group_entry);
- }
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@@ -2817,8 +2968,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
- u64 value;
- u64 format[3];
};
static void
@@ -2830,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
- .size = sizeof(event) - sizeof(event.format),
+ .size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
- .value = atomic64_read(&counter->count),
};
- int ret, i = 0;
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_enabled;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_running;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_ID) {
- event.header.size += sizeof(u64);
- event.format[i++] = primary_counter_id(counter);
- }
+ int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
- perf_output_copy(&handle, &event, event.header.size);
+ perf_output_put(&handle, event);
+ perf_output_read(&handle, counter);
+
perf_output_end(&handle);
}
@@ -2893,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
return;
task_event->event.pid = perf_counter_pid(counter, task);
- task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+ task_event->event.ppid = perf_counter_pid(counter, current);
task_event->event.tid = perf_counter_tid(counter, task);
- task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+ task_event->event.ptid = perf_counter_tid(counter, current);
perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
@@ -3443,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
static int perf_swcounter_is_counting(struct perf_counter *counter)
{
- struct perf_counter_context *ctx;
- unsigned long flags;
- int count;
-
+ /*
+ * The counter is active, we're good!
+ */
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
return 1;
+ /*
+ * The counter is off/error, not counting.
+ */
if (counter->state != PERF_COUNTER_STATE_INACTIVE)
return 0;
/*
- * If the counter is inactive, it could be just because
- * its task is scheduled out, or because it's in a group
- * which could not go on the PMU. We want to count in
- * the first case but not the second. If the context is
- * currently active then an inactive software counter must
- * be the second case. If it's not currently active then
- * we need to know whether the counter was active when the
- * context was last active, which we can determine by
- * comparing counter->tstamp_stopped with ctx->time.
- *
- * We are within an RCU read-side critical section,
- * which protects the existence of *ctx.
+ * The counter is inactive, if the context is active
+ * we're part of a group that didn't make it on the 'pmu',
+ * not counting.
*/
- ctx = counter->ctx;
- spin_lock_irqsave(&ctx->lock, flags);
- count = 1;
- /* Re-check state now we have the lock */
- if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
- counter->ctx->is_active ||
- counter->tstamp_stopped < ctx->time)
- count = 0;
- spin_unlock_irqrestore(&ctx->lock, flags);
- return count;
+ if (counter->ctx->is_active)
+ return 0;
+
+ /*
+ * We're inactive and the context is too, this means the
+ * task is scheduled out, we're counting events that happen
+ * to us, like migration events.
+ */
+ return 1;
}
static int perf_swcounter_match(struct perf_counter *counter,
@@ -3928,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
atomic64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+ * we currently do not support PERF_FORMAT_GROUP on inherited counters
*/
- if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
@@ -4592,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_counter_init_cpu(cpu);
break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ hw_perf_counter_setup_online(cpu);
+ break;
+
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
perf_counter_exit_cpu(cpu);
@@ -4616,6 +4748,8 @@ void __init perf_counter_init(void)
{
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
+ perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+ (void *)(long)smp_processor_id());
register_cpu_notifier(&perf_cpu_nb);
}
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 60411e9..c045b42 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,22 +382,29 @@ endif
ifdef NO_DEMANGLE
BASIC_CFLAGS += -DNO_DEMANGLE
else
-
has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
- has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
-
- has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
-
ifeq ($(has_bfd),y)
EXTLIBS += -lbfd
- else ifeq ($(has_bfd_iberty),y)
- EXTLIBS += -lbfd -liberty
- else ifeq ($(has_bfd_iberty_z),y)
- EXTLIBS += -lbfd -liberty -lz
else
- msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
- BASIC_CFLAGS += -DNO_DEMANGLE
+ has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+ ifeq ($(has_bfd_iberty),y)
+ EXTLIBS += -lbfd -liberty
+ else
+ has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+ ifeq ($(has_bfd_iberty_z),y)
+ EXTLIBS += -lbfd -liberty -lz
+ else
+ has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
+ ifeq ($(has_cplus_demangle),y)
+ EXTLIBS += -liberty
+ BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
+ else
+ msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+ BASIC_CFLAGS += -DNO_DEMANGLE
+ endif
+ endif
+ endif
endif
endif
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8..d88c696 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
#include "perf.h"
-#include "util/parse-options.h"
#include "util/parse-events.h"
+#include "util/cache.h"
int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
{
+ setup_pager();
print_events();
return 0;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0345aad..3d051b9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
static const char *output_name = "perf.data";
static int group = 0;
static unsigned int realtime_prio = 0;
+static int raw_samples = 0;
static int system_wide = 0;
+static int profile_cpu = -1;
static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static void pid_synthesize_comm_event(pid_t pid, int full)
+static pid_t pid_synthesize_comm_event(pid_t pid, int full)
{
struct comm_event comm_ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
- int fd;
- size_t size;
- char *field, *sep;
+ FILE *fp;
+ size_t size = 0;
DIR *tasks;
struct dirent dirent, *next;
+ pid_t tgid = 0;
- snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+ snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
- fd = open(filename, O_RDONLY);
- if (fd < 0) {
+ fp = fopen(filename, "r");
+ if (fd == NULL) {
/*
* We raced with a task exiting - just return:
*/
if (verbose)
fprintf(stderr, "couldn't open %s\n", filename);
- return;
- }
- if (read(fd, bf, sizeof(bf)) < 0) {
- fprintf(stderr, "couldn't read %s\n", filename);
- exit(EXIT_FAILURE);
+ return 0;
}
- close(fd);
- /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
memset(&comm_ev, 0, sizeof(comm_ev));
- field = strchr(bf, '(');
- if (field == NULL)
- goto out_failure;
- sep = strchr(++field, ')');
- if (sep == NULL)
- goto out_failure;
- size = sep - field;
- memcpy(comm_ev.comm, field, size++);
-
- comm_ev.pid = pid;
+ while (!comm_ev.comm[0] || !comm_ev.pid) {
+ if (fgets(bf, sizeof(bf), fp) == NULL)
+ goto out_failure;
+
+ if (memcmp(bf, "Name:", 5) == 0) {
+ char *name = bf + 5;
+ while (*name && isspace(*name))
+ ++name;
+ size = strlen(name) - 1;
+ memcpy(comm_ev.comm, name, size++);
+ } else if (memcmp(bf, "Tgid:", 5) == 0) {
+ char *tgids = bf + 5;
+ while (*tgids && isspace(*tgids))
+ ++tgids;
+ tgid = comm_ev.pid = atoi(tgids);
+ }
+ }
+
comm_ev.header.type = PERF_EVENT_COMM;
size = ALIGN(size, sizeof(u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.tid = pid;
write_output(&comm_ev, comm_ev.header.size);
- return;
+ goto out_fclose;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
write_output(&comm_ev, comm_ev.header.size);
}
closedir(tasks);
- return;
+
+out_fclose:
+ fclose(fp);
+ return tgid;
out_failure:
fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
exit(EXIT_FAILURE);
}
-static void pid_synthesize_mmap_samples(pid_t pid)
+static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
{
char filename[PATH_MAX];
FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size));
- mmap_ev.pid = pid;
+ mmap_ev.pid = tgid;
mmap_ev.tid = pid;
write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
- pid_t pid;
+ pid_t pid, tgid;
pid = strtol(dirent.d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
continue;
- pid_synthesize_comm_event(pid, 1);
- pid_synthesize_mmap_samples(pid);
+ tgid = pid_synthesize_comm_event(pid, 1);
+ pid_synthesize_mmap_samples(pid, tgid);
}
closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
if (call_graph)
attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (raw_samples)
+ attr->sample_type |= PERF_SAMPLE_RAW;
attr->mmap = track;
attr->comm = track;
@@ -426,6 +435,8 @@ try_again:
if (err == EPERM)
die("Permission error - are you root?\n");
+ else if (err == ENODEV && profile_cpu != -1)
+ die("No such device - did you specify an out-of-range profile CPU?\n");
/*
* If it's cycles then fall back to hrtimer
@@ -559,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
if (pid == -1)
pid = getpid();
- open_counters(-1, pid);
- } else for (i = 0; i < nr_cpus; i++)
- open_counters(i, target_pid);
+ open_counters(profile_cpu, pid);
+ } else {
+ if (profile_cpu != -1) {
+ open_counters(profile_cpu, target_pid);
+ } else {
+ for (i = 0; i < nr_cpus; i++)
+ open_counters(i, target_pid);
+ }
+ }
if (file_new)
perf_header__write(header, output);
if (!system_wide) {
- pid_synthesize_comm_event(pid, 0);
- pid_synthesize_mmap_samples(pid);
+ pid_t tgid = pid_synthesize_comm_event(pid, 0);
+ pid_synthesize_mmap_samples(pid, tgid);
} else
synthesize_all();
@@ -636,10 +653,14 @@ static const struct option options[] = {
"record events on existing pid"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
+ OPT_BOOLEAN('R', "raw-samples", &raw_samples,
+ "collect raw sample records from all opened counters"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('A', "append", &append_file,
"append to the output file to do incremental profiling"),
+ OPT_INTEGER('C', "profile_cpu", &profile_cpu,
+ "CPU to profile on"),
OPT_BOOLEAN('f', "force", &force,
"overwrite existing data file"),
OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 99274ce..b53a60f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
more_data += sizeof(u64);
}
- dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
+ dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.misc,
- event->ip.pid,
+ event->ip.pid, event->ip.tid,
(void *)(long)ip,
(long long)period);
@@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (show & show_mask) {
struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
- if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
+ if (dso_list && (!dso || !dso->name ||
+ !strlist__has_entry(dso_list, dso->name)))
return 0;
- if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+ if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
return 0;
if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1612,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
struct thread *thread = threads__findnew(event->mmap.pid);
struct map *map = map__new(&event->mmap);
- dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+ dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->mmap.pid,
+ event->mmap.tid,
(void *)(long)event->mmap.start,
(void *)(long)event->mmap.len,
(void *)(long)event->mmap.pgoff,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4858d83..0441784 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
struct perf_counter_attr *attr)
{
const char *evt_name;
+ char *flags;
char sys_name[MAX_EVENT_LENGTH];
char id_buf[4];
int fd;
@@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
strncpy(sys_name, *strp, sys_length);
sys_name[sys_length] = '\0';
evt_name = evt_name + 1;
+
+ flags = strchr(evt_name, ':');
+ if (flags) {
+ *flags = '\0';
+ flags++;
+ if (!strncmp(flags, "record", strlen(flags)))
+ attr->sample_type |= PERF_SAMPLE_RAW;
+ }
+
evt_length = strlen(evt_name);
if (evt_length >= MAX_EVENT_LENGTH)
return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f1dcede..5c0f42e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,23 +7,8 @@
#include <gelf.h>
#include <elf.h>
-#ifndef NO_DEMANGLE
-#include <bfd.h>
-#else
-static inline
-char *bfd_demangle(void __used *v, const char __used *c, int __used i)
-{
- return NULL;
-}
-#endif
-
const char *sym_hist_filter;
-#ifndef DMGL_PARAMS
-#define DMGL_PARAMS (1 << 0) /* Include function args */
-#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
-#endif
-
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
@@ -816,6 +801,8 @@ more:
}
out:
free(name);
+ if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
+ return 0;
return ret;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1e003ec..b53bf01 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
#include <linux/rbtree.h>
#include "module.h"
+#ifdef HAVE_CPLUS_DEMANGLE
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __used *v, const char *c, int i)
+{
+ return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __used *v, const char __used *c,
+ int __used i)
+{
+ return NULL;
+}
+#else
+#include <bfd.h>
+#endif
+#endif
+
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+
struct symbol {
struct rb_node rb_node;
u64 start;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [GIT PULL] perfcounters fixes
@ 2009-08-06 19:53 Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-08-06 19:53 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, Peter Zijlstra, Paul Mackerras
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Thanks,
Ingo
------------------>
Arnaldo Carvalho de Melo (2):
perf report: Add missing command line options to man page
perf symbol: Fix symbol parsing in certain cases: use the build-id as a symlink
Benjamin Herrenschmidt (1):
perf_counter/powerpc: Check oprofile_cpu_type for NULL before using it
Pekka Enberg (1):
perf report: Make --sort comm,dso,symbol the default
Peter Zijlstra (4):
perf: Auto-detect libbfd
ftrace: Fix perf-tracepoint OOPS
perf: Auto-detect libelf
perf_counter: Fix double list iteration in per task precise stats
arch/powerpc/kernel/mpc7450-pmu.c | 3 +-
arch/powerpc/kernel/power4-pmu.c | 3 +-
arch/powerpc/kernel/power5+-pmu.c | 5 +-
arch/powerpc/kernel/power5-pmu.c | 3 +-
arch/powerpc/kernel/power6-pmu.c | 3 +-
arch/powerpc/kernel/power7-pmu.c | 3 +-
arch/powerpc/kernel/ppc970-pmu.c | 5 +-
include/linux/ftrace_event.h | 8 +--
kernel/perf_counter.c | 2 +-
kernel/trace/trace_event_profile.c | 2 +-
kernel/trace/trace_events.c | 2 +-
tools/perf/Documentation/perf-record.txt | 60 ++++++++++++++++++++-
tools/perf/Makefile | 29 +++++++++-
tools/perf/builtin-report.c | 4 +-
tools/perf/util/symbol.c | 85 ++++++++++++++++++++++++++++-
15 files changed, 189 insertions(+), 28 deletions(-)
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index c244133..cc466d0 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -407,7 +407,8 @@ struct power_pmu mpc7450_pmu = {
static int init_mpc7450_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
return -ENODEV;
return register_power_pmu(&mpc7450_pmu);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index db90b0c..3c90a3d 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -606,7 +606,8 @@ static struct power_pmu power4_pmu = {
static int init_power4_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
return -ENODEV;
return register_power_pmu(&power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index f4adca8..31918af 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -678,8 +678,9 @@ static struct power_pmu power5p_pmu = {
static int init_power5p_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
+ && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
return -ENODEV;
return register_power_pmu(&power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 29b2c6c..867f6f6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -618,7 +618,8 @@ static struct power_pmu power5_pmu = {
static int init_power5_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
return -ENODEV;
return register_power_pmu(&power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 09ae5bf..fa21890 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -537,7 +537,8 @@ static struct power_pmu power6_pmu = {
static int init_power6_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
return -ENODEV;
return register_power_pmu(&power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 5a9f5cb..388cf57 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -366,7 +366,8 @@ static struct power_pmu power7_pmu = {
static int init_power7_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
return -ENODEV;
return register_power_pmu(&power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 833097a..75dccb7 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -488,8 +488,9 @@ static struct power_pmu ppc970_pmu = {
static int init_ppc970_pmu(void)
{
- if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
+ && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")))
return -ENODEV;
return register_power_pmu(&ppc970_pmu);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5c093ff..d7cd193 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -119,11 +119,9 @@ struct ftrace_event_call {
void *filter;
void *mod;
-#ifdef CONFIG_EVENT_PROFILE
- atomic_t profile_count;
- int (*profile_enable)(struct ftrace_event_call *);
- void (*profile_disable)(struct ftrace_event_call *);
-#endif
+ atomic_t profile_count;
+ int (*profile_enable)(struct ftrace_event_call *);
+ void (*profile_disable)(struct ftrace_event_call *);
};
#define MAX_FILTER_PRED 32
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 199ed47..673c1aa 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1104,7 +1104,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
__perf_counter_sync_stat(counter, next_counter);
counter = list_next_entry(counter, event_entry);
- next_counter = list_next_entry(counter, event_entry);
+ next_counter = list_next_entry(next_counter, event_entry);
}
}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895a..11ba5bb 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id) {
+ if (event->id == event_id && event->profile_enable) {
ret = event->profile_enable(event);
break;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 23d2972..e75276a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
entry = trace_create_file("enable", 0644, call->dir, call,
enable);
- if (call->id)
+ if (call->id && call->profile_enable)
entry = trace_create_file("id", 0444, call->dir, call,
id);
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 1dbc1ee..6be696b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -29,13 +29,67 @@ OPTIONS
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ hexadecimal event descriptor.
-a::
- system-wide collection
+ System-wide collection.
-l::
- scale counter values
+ Scale counter values.
+
+-p::
+--pid=::
+ Record events on existing pid.
+
+-r::
+--realtime=::
+ Collect data with this RT SCHED_FIFO priority.
+-A::
+--append::
+ Append to the output file to do incremental profiling.
+
+-f::
+--force::
+ Overwrite existing data file.
+
+-c::
+--count=::
+ Event period to sample.
+
+-o::
+--output=::
+ Output file name.
+
+-i::
+--inherit::
+ Child tasks inherit counters.
+-F::
+--freq=::
+ Profile at this frequency.
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages.
+
+-g::
+--call-graph::
+ Do call-graph (stack chain/backtrace) recording.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-s::
+--stat::
+ Per thread counts.
+
+-d::
+--data::
+ Sample addresses.
+
+-n::
+--no-samples::
+ Don't sample.
SEE ALSO
--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 4b20fa4..1916e44 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -158,8 +158,10 @@ uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
# If we're on a 64-bit kernel, use -m64
-ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
- M64 := -m64
+ifndef NO_64BIT
+ ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
+ M64 := -m64
+ endif
endif
# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -345,7 +347,6 @@ BUILTIN_OBJS += builtin-stat.o
BUILTIN_OBJS += builtin-top.o
PERFLIBS = $(LIB_FILE)
-EXTLIBS = -lbfd -liberty
#
# Platform specific tweaks
@@ -374,6 +375,28 @@ ifeq ($(uname_S),Darwin)
PTHREAD_LIBS =
endif
+ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
+ msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel);
+endif
+
+ifdef NO_DEMANGLE
+ BASIC_CFLAGS += -DNO_DEMANGLE
+else
+
+ has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
+
+ has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+
+ ifeq ($(has_bfd),y)
+ EXTLIBS += -lbfd
+ else ifeq ($(has_bfd_iberty),y)
+ EXTLIBS += -lbfd -liberty
+ else
+ msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+ BASIC_CFLAGS += -DNO_DEMANGLE
+ endif
+endif
+
ifndef CC_LD_DYNPATH
ifdef NO_R_TO_GCC_LINKER
# Some gcc does not accept and pass -R to the linker to specify
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ce4f286..8cb58d6 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -31,7 +31,7 @@
static char const *input_name = "perf.data";
static char *vmlinux = NULL;
-static char default_sort_order[] = "comm,dso";
+static char default_sort_order[] = "comm,dso,symbol";
static char *sort_order = default_sort_order;
static char *dso_list_str, *comm_list_str, *sym_list_str,
*col_width_list_str;
@@ -1424,7 +1424,7 @@ print_entries:
if (sort_order == default_sort_order &&
parent_pattern == default_parent_pattern) {
fprintf(fp, "#\n");
- fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
+ fprintf(fp, "# (For a higher level overview, try: perf report --sort comm,dso)\n");
fprintf(fp, "#\n");
}
fprintf(fp, "\n");
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b4fe057..16ddca2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -6,7 +6,16 @@
#include <libelf.h>
#include <gelf.h>
#include <elf.h>
+
+#ifndef NO_DEMANGLE
#include <bfd.h>
+#else
+static inline
+char *bfd_demangle(void __used *v, const char __used *c, int __used i)
+{
+ return NULL;
+}
+#endif
const char *sym_hist_filter;
@@ -652,10 +661,69 @@ out_close:
return err;
}
+#define BUILD_ID_SIZE 128
+
+static char *dso__read_build_id(struct dso *self, int verbose)
+{
+ int i;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *build_id_data;
+ Elf_Scn *sec;
+ char *build_id = NULL, *bid;
+ unsigned char *raw;
+ Elf *elf;
+ int fd = open(self->name, O_RDONLY);
+
+ if (fd < 0)
+ goto out;
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ if (verbose)
+ fprintf(stderr, "%s: cannot read %s ELF file.\n",
+ __func__, self->name);
+ goto out_close;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ if (verbose)
+ fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+ goto out_elf_end;
+ }
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL);
+ if (sec == NULL)
+ goto out_elf_end;
+
+ build_id_data = elf_getdata(sec, NULL);
+ if (build_id_data == NULL)
+ goto out_elf_end;
+ build_id = malloc(BUILD_ID_SIZE);
+ if (build_id == NULL)
+ goto out_elf_end;
+ raw = build_id_data->d_buf + 16;
+ bid = build_id;
+
+ for (i = 0; i < 20; ++i) {
+ sprintf(bid, "%02x", *raw);
+ ++raw;
+ bid += 2;
+ }
+ if (verbose)
+ printf("%s(%s): %s\n", __func__, self->name, build_id);
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return build_id;
+}
+
int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
{
- int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
- char *name = malloc(size);
+ int size = PATH_MAX;
+ char *name = malloc(size), *build_id = NULL;
int variant = 0;
int ret = -1;
int fd;
@@ -677,7 +745,18 @@ more:
case 1: /* Ubuntu */
snprintf(name, size, "/usr/lib/debug%s", self->name);
break;
- case 2: /* Sane people */
+ case 2:
+ build_id = dso__read_build_id(self, verbose);
+ if (build_id != NULL) {
+ snprintf(name, size,
+ "/usr/lib/debug/.build-id/%.2s/%s.debug",
+ build_id, build_id + 2);
+ free(build_id);
+ break;
+ }
+ variant++;
+ /* Fall thru */
+ case 3: /* Sane people */
snprintf(name, size, "%s", self->name);
break;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [GIT PULL] perfcounters fixes
@ 2009-06-12 18:47 Ingo Molnar
0 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-06-12 18:47 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-kernel, Peter Zijlstra, Paul Mackerras, Andrew Morton,
Thomas Gleixner
Linus,
Please pull the latest perfcounters-fixes-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perfcounters-fixes-for-linus
Fixes plus make the syscall ABI easier to extend. (this feature is
added in an ABI-compatible way)
Thanks,
Ingo
------------------>
Jaswinder Singh Rajput (1):
powerpc, perf_counter: Fix performance counter event types
Mike Frysinger (1):
perf_counter: Start documenting HAVE_PERF_COUNTERS requirements
Peter Zijlstra (4):
perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too
perf_counter: Remove PERF_TYPE_RAW special casing
perf record: Explicity program a default counter
perf_counter: Add forward/backward attribute ABI compatibility
Yong Wang (2):
perf_counter tools: Remove one L1-data alias
perf_counter/x86: Add a quirk for Atom processors
arch/powerpc/kernel/power7-pmu.c | 12 ++--
arch/x86/kernel/cpu/perf_counter.c | 7 +++
include/linux/perf_counter.h | 22 ++++++--
include/linux/syscalls.h | 2 +-
init/Kconfig | 2 +
kernel/perf_counter.c | 95 +++++++++++++++++++++++++++++++++---
tools/perf/builtin-record.c | 7 ++-
tools/perf/design.txt | 15 ++++++
tools/perf/perf.h | 5 +-
tools/perf/util/parse-events.c | 2 +-
10 files changed, 144 insertions(+), 25 deletions(-)
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index b3f7d12..b72e7a1 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -294,12 +294,12 @@ static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
}
static int power7_generic_events[] = {
- [PERF_COUNT_CPU_CYCLES] = 0x1e,
- [PERF_COUNT_INSTRUCTIONS] = 2,
- [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
- [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
- [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
- [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 2,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
};
#define C(x) PERF_COUNT_HW_CACHE_##x
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 895c82e..275bc14 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -968,6 +968,13 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
if (!x86_pmu.num_counters_fixed)
return -1;
+ /*
+ * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86_model == 28)
+ return -1;
+
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6e13395..1b3118a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -120,6 +120,8 @@ enum perf_counter_sample_format {
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
PERF_SAMPLE_PERIOD = 1U << 8,
+
+ PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */
};
/*
@@ -131,17 +133,26 @@ enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
+
+ PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
};
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+
/*
* Hardware event to monitor via a performance monitoring counter:
*/
struct perf_counter_attr {
+
/*
* Major type: hardware/software/tracepoint/etc.
*/
__u32 type;
- __u32 __reserved_1;
+
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
/*
* Type specific configuration information.
@@ -168,12 +179,12 @@ struct perf_counter_attr {
comm : 1, /* include comm data */
freq : 1, /* use freq, not period */
- __reserved_2 : 53;
+ __reserved_1 : 53;
__u32 wakeup_events; /* wakeup every n events */
- __u32 __reserved_3;
+ __u32 __reserved_2;
- __u64 __reserved_4;
+ __u64 __reserved_3;
};
/*
@@ -621,7 +632,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
static inline int is_software_counter(struct perf_counter *counter)
{
return (counter->attr.type != PERF_TYPE_RAW) &&
- (counter->attr.type != PERF_TYPE_HARDWARE);
+ (counter->attr.type != PERF_TYPE_HARDWARE) &&
+ (counter->attr.type != PERF_TYPE_HW_CACHE);
}
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6c84ad..418d90f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
asmlinkage long sys_perf_counter_open(
- const struct perf_counter_attr __user *attr_uptr,
+ struct perf_counter_attr __user *attr_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags);
#endif
diff --git a/init/Kconfig b/init/Kconfig
index c649657..d3a5096 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -936,6 +936,8 @@ config AIO
config HAVE_PERF_COUNTERS
bool
+ help
+ See tools/perf/design.txt for details.
menu "Performance Counters"
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ef5d8a5..29b685f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
goto done;
- if (attr->type == PERF_TYPE_RAW) {
- pmu = hw_perf_counter_init(counter);
- goto done;
- }
-
switch (attr->type) {
+ case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
pmu = hw_perf_counter_init(counter);
@@ -3588,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
case PERF_TYPE_TRACEPOINT:
pmu = tp_perf_counter_init(counter);
break;
+
+ default:
+ break;
}
done:
err = 0;
@@ -3614,6 +3613,85 @@ done:
return counter;
}
+static int perf_copy_attr(struct perf_counter_attr __user *uattr,
+ struct perf_counter_attr *attr)
+{
+ int ret;
+ u32 size;
+
+ if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+ return -EFAULT;
+
+ /*
+ * zero the full structure, so that a short copy will be nice.
+ */
+ memset(attr, 0, sizeof(*attr));
+
+ ret = get_user(size, &uattr->size);
+ if (ret)
+ return ret;
+
+ if (size > PAGE_SIZE) /* silly large */
+ goto err_size;
+
+ if (!size) /* abi compat */
+ size = PERF_ATTR_SIZE_VER0;
+
+ if (size < PERF_ATTR_SIZE_VER0)
+ goto err_size;
+
+ /*
+ * If we're handed a bigger struct than we know of,
+ * ensure all the unknown bits are 0.
+ */
+ if (size > sizeof(*attr)) {
+ unsigned long val;
+ unsigned long __user *addr;
+ unsigned long __user *end;
+
+ addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
+ sizeof(unsigned long));
+ end = PTR_ALIGN((void __user *)uattr + size,
+ sizeof(unsigned long));
+
+ for (; addr < end; addr += sizeof(unsigned long)) {
+ ret = get_user(val, addr);
+ if (ret)
+ return ret;
+ if (val)
+ goto err_size;
+ }
+ }
+
+ ret = copy_from_user(attr, uattr, size);
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * If the type exists, the corresponding creation will verify
+ * the attr->config.
+ */
+ if (attr->type >= PERF_TYPE_MAX)
+ return -EINVAL;
+
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+ return -EINVAL;
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+ return -EINVAL;
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+ return -EINVAL;
+
+out:
+ return ret;
+
+err_size:
+ put_user(sizeof(*attr), &uattr->size);
+ ret = -E2BIG;
+ goto out;
+}
+
/**
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
*
@@ -3623,7 +3701,7 @@ done:
* @group_fd: group leader counter fd
*/
SYSCALL_DEFINE5(perf_counter_open,
- const struct perf_counter_attr __user *, attr_uptr,
+ struct perf_counter_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
struct perf_counter *counter, *group_leader;
@@ -3639,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
if (flags)
return -EINVAL;
- if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
- return -EFAULT;
+ ret = perf_copy_attr(attr_uptr, &attr);
+ if (ret)
+ return ret;
if (!attr.exclude_kernel) {
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 29259e7..0f5771f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -568,8 +568,11 @@ int cmd_record(int argc, const char **argv, const char *prefix)
if (!argc && target_pid == -1 && !system_wide)
usage_with_options(record_usage, options);
- if (!nr_counters)
- nr_counters = 1;
+ if (!nr_counters) {
+ nr_counters = 1;
+ attrs[0].type = PERF_TYPE_HARDWARE;
+ attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
+ }
for (counter = 0; counter < nr_counters; counter++) {
if (attrs[counter].sample_period)
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 860e116..f71e0d2 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -440,3 +440,18 @@ by this process or by another, and doesn't affect any counters that
this process has created on other processes. It only enables or
disables the group leaders, not any other members in the groups.
+
+Arch requirements
+-----------------
+
+If your architecture does not have hardware performance metrics, you can
+still use the generic software counters based on hrtimers for sampling.
+
+So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+will need at least this:
+ - asm/perf_counter.h - a basic stub will suffice at first
+ - support for atomic64 types (and associated helper functions)
+ - set_perf_counter_pending() implemented
+
+If your architecture does have hardware capabilities, you can override the
+weak stub hw_perf_counter_init() to register hardware counters.
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index af0a504..87a1aca 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void)
_min1 < _min2 ? _min1 : _min2; })
static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
+sys_perf_counter_open(struct perf_counter_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_counter_open, attr, pid, cpu,
group_fd, flags);
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9d5f1ca..5a72586 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -75,7 +75,7 @@ static char *sw_event_names[] = {
#define MAX_ALIASES 8
static char *hw_cache [][MAX_ALIASES] = {
- { "L1-data" , "l1-d", "l1d", "l1" },
+ { "L1-data" , "l1-d", "l1d" },
{ "L1-instruction" , "l1-i", "l1i" },
{ "L2" , "l2" },
{ "Data-TLB" , "dtlb", "d-tlb" },
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-09-04 8:49 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-25 18:05 [GIT PULL] perfcounters fixes Ingo Molnar
-- strict thread matches above, loose matches on Subject: below --
2009-09-04 8:49 Ingo Molnar
2009-08-17 21:39 Ingo Molnar
2009-08-13 19:04 Ingo Molnar
2009-08-13 19:27 ` Linus Torvalds
2009-08-13 20:10 ` Ingo Molnar
2009-08-06 19:53 Ingo Molnar
2009-06-12 18:47 Ingo Molnar
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.