All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: Ingo Molnar <mingo@kernel.org>
Cc: Michael Ellerman <michael@ellerman.id.au>,
	Paul Mackerras <paulus@samba.org>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>,
	Maynard Johnson <mpjohn@us.ibm.com>,
	Anton Blanchard <anton@samba.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	"mingo@elte.hu" <mingo@elte.hu>,
	"ak@linux.intel.com" <ak@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung.kim@lge.com>
Subject: Re: [PATCH v7 07/18] perf: add generic memory sampling interface
Date: Fri, 25 Jan 2013 16:30:37 +0100	[thread overview]
Message-ID: <CABPqkBRpTPcJOohtujR6kk4tAHDFNCv24Ha6E0v1p5MRkPpy6A@mail.gmail.com> (raw)
In-Reply-To: <20130125090158.GB31478@gmail.com>

On Fri, Jan 25, 2013 at 10:01 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Stephane Eranian <eranian@google.com> wrote:
>
>> This patch adds PERF_SAMPLE_DSRC.
>>
>> PERF_SAMPLE_DSRC collects the data source, i.e., where
>> did the data associated with the sampled instruction
>> come from. Information is stored in a perf_mem_dsrc
>> structure. It contains opcode, mem level, tlb, snoop,
>> lock information, subject to availability in hardware.
>>
>> Signed-off-by: Stephane Eranian <eranian@google.com>
>> ---
>>  include/linux/perf_event.h      |    2 ++
>>  include/uapi/linux/perf_event.h |   68 +++++++++++++++++++++++++++++++++++++--
>>  kernel/events/core.c            |    6 ++++
>>  3 files changed, 74 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index bb2429d..8fe4610 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -579,6 +579,7 @@ struct perf_sample_data {
>>               u32     reserved;
>>       }                               cpu_entry;
>>       u64                             period;
>> +     union  perf_mem_dsrc            dsrc;
>>       struct perf_callchain_entry     *callchain;
>>       struct perf_raw_record          *raw;
>>       struct perf_branch_stack        *br_stack;
>> @@ -599,6 +600,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>>       data->regs_user.regs = NULL;
>>       data->stack_user_size = 0;
>>       data->weight = 0;
>> +     data->dsrc.val = 0;
>>  }
>>
>>  extern void perf_output_sample(struct perf_output_handle *handle,
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 3e6c394..3e4844c 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -133,9 +133,9 @@ enum perf_event_sample_format {
>>       PERF_SAMPLE_REGS_USER                   = 1U << 12,
>>       PERF_SAMPLE_STACK_USER                  = 1U << 13,
>>       PERF_SAMPLE_WEIGHT                      = 1U << 14,
>> +     PERF_SAMPLE_DSRC                        = 1U << 15,
>>
>> -     PERF_SAMPLE_MAX = 1U << 15,             /* non-ABI */
>> -
>> +     PERF_SAMPLE_MAX = 1U << 16,             /* non-ABI */
>>  };
>>
>>  /*
>> @@ -591,6 +591,7 @@ enum perf_event_type {
>>        *        u64                   dyn_size; } && PERF_SAMPLE_STACK_USER
>>        *
>>        *      { u64                   weight;   } && PERF_SAMPLE_WEIGHT
>> +      *      { u64                   dsrc;     } && PERF_SAMPLE_DSRC
>>        * };
>>        */
>>       PERF_RECORD_SAMPLE                      = 9,
>> @@ -616,4 +617,67 @@ enum perf_callchain_context {
>>  #define PERF_FLAG_FD_OUTPUT          (1U << 1)
>>  #define PERF_FLAG_PID_CGROUP         (1U << 2) /* pid=cgroup id, per-cpu mode only */
>>
>> +union perf_mem_dsrc {
>> +     __u64 val;
>> +     struct {
>> +             __u64   mem_op:5,       /* type of opcode */
>> +                     mem_lvl:14,     /* memory hierarchy level */
>> +                     mem_snoop:5,    /* snoop mode */
>> +                     mem_lock:2,     /* lock instr */
>> +                     mem_dtlb:7,     /* tlb access */
>> +                     mem_rsvd:31;
>> +     };
>> +};
>> +
>> +/* type of opcode (load/store/prefetch,code) */
>> +#define PERF_MEM_OP_NA               0x01 /* not available */
>> +#define PERF_MEM_OP_LOAD     0x02 /* load instruction */
>> +#define PERF_MEM_OP_STORE    0x04 /* store instruction */
>> +#define PERF_MEM_OP_PFETCH   0x08 /* prefetch */
>> +#define PERF_MEM_OP_EXEC     0x10 /* code (execution) */
>> +#define PERF_MEM_OP_SHIFT    0
>> +
>> +/* memory hierarchy (memory level, hit or miss) */
>> +#define PERF_MEM_LVL_NA              0x01  /* not available */
>> +#define PERF_MEM_LVL_HIT     0x02  /* hit level */
>> +#define PERF_MEM_LVL_MISS    0x04  /* miss level  */
>> +#define PERF_MEM_LVL_L1              0x08  /* L1 */
>> +#define PERF_MEM_LVL_LFB     0x10  /* Line Fill Buffer */
>> +#define PERF_MEM_LVL_L2              0x20  /* L2 hit */
>> +#define PERF_MEM_LVL_L3              0x40  /* L3 hit */
>> +#define PERF_MEM_LVL_LOC_RAM 0x80  /* Local DRAM */
>> +#define PERF_MEM_LVL_REM_RAM1        0x100 /* Remote DRAM (1 hop) */
>> +#define PERF_MEM_LVL_REM_RAM2        0x200 /* Remote DRAM (2 hops) */
>> +#define PERF_MEM_LVL_REM_CCE1        0x400 /* Remote Cache (1 hop) */
>> +#define PERF_MEM_LVL_REM_CCE2        0x800 /* Remote Cache (2 hops) */
>> +#define PERF_MEM_LVL_IO              0x1000 /* I/O memory */
>> +#define PERF_MEM_LVL_UNC     0x2000 /* Uncached memory */
>> +#define PERF_MEM_LVL_SHIFT   5
>> +
>> +/* snoop mode */
>> +#define PERF_MEM_SNOOP_NA    0x01 /* not available */
>> +#define PERF_MEM_SNOOP_NONE  0x02 /* no snoop */
>> +#define PERF_MEM_SNOOP_HIT   0x04 /* snoop hit */
>> +#define PERF_MEM_SNOOP_MISS  0x08 /* snoop miss */
>> +#define PERF_MEM_SNOOP_HITM  0x10 /* snoop hit modified */
>> +#define PERF_MEM_SNOOP_SHIFT 19
>> +
>> +/* locked instruction */
>> +#define PERF_MEM_LOCK_NA     0x01 /* not available */
>> +#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
>> +#define PERF_MEM_LOCK_SHIFT  24
>> +
>> +/* TLB access */
>> +#define PERF_MEM_TLB_NA              0x01 /* not available */
>> +#define PERF_MEM_TLB_HIT     0x02 /* hit level */
>> +#define PERF_MEM_TLB_MISS    0x04 /* miss level */
>> +#define PERF_MEM_TLB_L1              0x08 /* L1 */
>> +#define PERF_MEM_TLB_L2              0x10 /* L2 */
>> +#define PERF_MEM_TLB_WK              0x20 /* Hardware Walker*/
>> +#define PERF_MEM_TLB_OS              0x40 /* OS fault handler */
>> +#define PERF_MEM_TLB_SHIFT   26
>> +
>> +#define PERF_MEM_S(a, s) \
>> +     (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>> +
>
> Would be nice to get feedback from PowerPC folks to see how well
> this matches their memory profiling hw capabilities?
>
I agree, I tried to remain as generic as possible here but I probably
don't have all the possibilities covered. I remember IBM asking
me about the categories a long time ago. Haven't heard anything since then.

> I suspect there's a lot of differences, but one can always hope
> ...
>
> If there's some hope for unification we could at least shape it
> in a way that they could pick up and extend.
>
Agreed.

  reply	other threads:[~2013-01-25 15:30 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-24 15:10 [PATCH v7 00/18] perf: add memory access sampling support Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 01/18] perf, x86: Support CPU specific sysfs events Stephane Eranian
2013-01-25 12:16   ` [tip:perf/x86] perf/x86: " tip-bot for Andi Kleen
2013-04-02  9:38   ` [tip:perf/core] " tip-bot for Andi Kleen
2013-01-24 15:10 ` [PATCH v7 02/18] perf/x86: improve sysfs event mapping with event string Stephane Eranian
2013-01-25 12:17   ` [tip:perf/x86] perf/x86: Improve " tip-bot for Stephane Eranian
2013-04-02  9:39   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 03/18] perf/x86: add flags to event constraints Stephane Eranian
2013-01-25 12:18   ` [tip:perf/x86] perf/x86: Add " tip-bot for Stephane Eranian
2013-04-02  9:40   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 04/18] perf, core: Add a concept of a weightened sample v2 Stephane Eranian
2013-01-25 12:20   ` [tip:perf/x86] perf/core: Add weighted samples tip-bot for Andi Kleen
2013-04-02  9:42   ` [tip:perf/core] " tip-bot for Andi Kleen
2013-01-24 15:10 ` [PATCH v7 05/18] perf, tools: Add support for weight v7 (modified) Stephane Eranian
2013-04-02  9:49   ` [tip:perf/core] perf " tip-bot for Andi Kleen
2013-01-24 15:10 ` [PATCH v7 06/18] perf: add support for PERF_SAMPLE_ADDR in dump_sampple() Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 07/18] perf: add generic memory sampling interface Stephane Eranian
2013-01-25  9:01   ` Ingo Molnar
2013-01-25 15:30     ` Stephane Eranian [this message]
2013-01-29 10:37       ` Michael Ellerman
2013-02-15 19:46     ` Sukadev Bhattiprolu
2013-02-16  2:45       ` Benjamin Herrenschmidt
2013-02-16  8:41         ` Ingo Molnar
2013-02-16 14:14         ` Stephane Eranian
2013-01-25 12:21   ` [tip:perf/x86] perf: Add " tip-bot for Stephane Eranian
2013-04-02  9:43   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 08/18] perf/x86: add memory profiling via PEBS Load Latency Stephane Eranian
2013-01-25 12:22   ` [tip:perf/x86] perf/x86: Add " tip-bot for Stephane Eranian
2013-04-02  9:44   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 09/18] perf/x86: export PEBS load latency threshold register to sysfs Stephane Eranian
2013-01-25 12:23   ` [tip:perf/x86] perf/x86: Export " tip-bot for Stephane Eranian
2013-04-02  9:45   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 10/18] perf/x86: add support for PEBS Precise Store Stephane Eranian
2013-01-25 12:24   ` [tip:perf/x86] perf/x86: Add " tip-bot for Stephane Eranian
2013-04-02  9:47   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 11/18] perf tools: add mem access sampling core support Stephane Eranian
2013-03-27 14:14   ` Jiri Olsa
2013-03-27 14:20     ` Peter Zijlstra
2013-03-27 14:34       ` Jiri Olsa
2013-03-27 14:48         ` Stephane Eranian
2013-03-27 16:56           ` Arnaldo Carvalho de Melo
2013-03-28 14:24             ` Stephane Eranian
2013-03-28 15:00               ` Arnaldo Carvalho de Melo
2013-03-28 15:06                 ` Stephane Eranian
2013-03-28 15:12                 ` Arnaldo Carvalho de Melo
2013-03-28 15:15                   ` Stephane Eranian
2013-03-27 14:23     ` Jiri Olsa
2013-04-02  9:50   ` [tip:perf/core] perf tools: Add " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 12/18] perf report: add support for mem access profiling Stephane Eranian
2013-04-02  9:53   ` [tip:perf/core] perf report: Add " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 13/18] perf record: add " Stephane Eranian
2013-04-02  9:51   ` [tip:perf/core] perf record: Add " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 14/18] perf tools: add new mem command for memory " Stephane Eranian
2013-04-02  9:55   ` [tip:perf/core] perf tools: Add " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 15/18] perf: add PERF_RECORD_MISC_MMAP_DATA to RECORD_MMAP Stephane Eranian
2013-01-25 12:25   ` [tip:perf/x86] perf: Add " tip-bot for Stephane Eranian
2013-04-02  9:48   ` [tip:perf/core] " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 16/18] perf tools: detect data vs. text mappings Stephane Eranian
2013-04-02  9:57   ` [tip:perf/core] perf machine: Detect " tip-bot for Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 17/18] perf tools: Ignore ABS symbols when loading data maps Stephane Eranian
2013-01-24 15:10 ` [PATCH v7 18/18] perf tools: Fix output of symbol_daddr offset Stephane Eranian
2013-04-02  9:58   ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-01-25  8:55 ` [PATCH v7 00/18] perf: add memory access sampling support Ingo Molnar
2013-01-25 15:28   ` Stephane Eranian
2013-01-25 10:38 ` Ingo Molnar
2013-02-05 13:03   ` Stephane Eranian
2013-02-05 15:35     ` Arnaldo Carvalho de Melo
2013-02-06 13:24       ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CABPqkBRpTPcJOohtujR6kk4tAHDFNCv24Ha6E0v1p5MRkPpy6A@mail.gmail.com \
    --to=eranian@google.com \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=anton@samba.org \
    --cc=benh@kernel.crashing.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael@ellerman.id.au \
    --cc=mingo@elte.hu \
    --cc=mingo@kernel.org \
    --cc=mpjohn@us.ibm.com \
    --cc=namhyung.kim@lge.com \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=sukadev@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.