All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V4 01/13] perf/core, x86: Add PERF_SAMPLE_DATA_PAGE_SIZE
@ 2019-01-31 20:27 kan.liang
  2019-01-31 20:27 ` [PATCH V4 02/13] perf tools: Support new sample type for data page size kan.liang
                   ` (12 more replies)
  0 siblings, 13 replies; 23+ messages in thread
From: kan.liang @ 2019-01-31 20:27 UTC (permalink / raw)
  To: peterz, acme, tglx, mingo, linux-kernel
  Cc: eranian, jolsa, namhyung, ak, luto, Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

Current perf can report both virtual address and physical address, but
it doesn't report page size. Users have no idea how large the utilized
page is. They cannot promote/demote large pages to optimize memory use.

Add a new sample type for data page size.

Current perf already has a facility to collect data virtual address.
A __weak function, aim to retrieve page size via a given virtual
address, is introduced in the generic code. Now, it always returns 0.
The function must be IRQ-safe.
This patch only implements a x86 specific version, which do full
page-table walk of a given virtual address to retrieve page size.
For x86, disabling IRQs over the walk is sufficient to prevent any
tear down of the page tables.
Other architectures can implement their own functions later separately.

The new sample type requires collecting the virtual address. The
virtual address will not be output unless SAMPLE_ADDR is applied.

A u64 type is claimed for page_size. Because struct perf_sample_data
requires cacheline_aligned.

The large PEBS will be disabled with this sample type. Because we need
to track munmap to flush the PEBS buffer for large PEBS. Perf doesn't
support munmap tracking yet. The large PEBS can be enabled later
separately when munmap tracking is supported.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V3
- Use the real page size to replace enum.
- Modify the changelog to mention the generic support of
  __weak perf_get_page_size()

 arch/x86/events/core.c          | 31 +++++++++++++++++++++++++++++++
 arch/x86/events/intel/ds.c      |  3 ++-
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/events/core.c            | 15 +++++++++++++++
 5 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 374a197..229a73b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2578,3 +2578,34 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 	cap->events_mask_len	= x86_pmu.events_mask_len;
 }
 EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
+
+u64 perf_get_page_size(u64 virt)
+{
+	unsigned long flags;
+	unsigned int level;
+	pte_t *pte;
+
+	if (!virt)
+		return 0;
+
+	/*
+	 * Interrupts are disabled, so it prevents any tear down
+	 * of the page tables.
+	 * See the comment near struct mmu_table_batch.
+	 */
+	local_irq_save(flags);
+	if (virt >= TASK_SIZE)
+		pte = lookup_address(virt, &level);
+	else {
+		if (current->mm) {
+			pte = lookup_address_in_pgd(pgd_offset(current->mm, virt),
+						    virt, &level);
+		} else
+			level = PG_LEVEL_NUM;
+	}
+	local_irq_restore(flags);
+	if (level >= PG_LEVEL_NUM)
+		return 0;
+
+	return (u64)page_level_size(level);
+}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e9acf1d..720dc9e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1274,7 +1274,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	}
 
 
-	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
+	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR
+			    | PERF_SAMPLE_DATA_PAGE_SIZE)) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
 		data->addr = pebs->dla;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a79e59f..0e048ab 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -937,6 +937,7 @@ struct perf_sample_data {
 	u64				stack_user_size;
 
 	u64				phys_addr;
+	u64				data_page_size;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd..0e8d222 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -141,8 +141,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
+	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 20,
 
-	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -863,6 +864,7 @@ enum perf_event_type {
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
 	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
+	 *	{ u64			data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 236bb8d..d233f45 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1753,6 +1753,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
 		size += sizeof(data->phys_addr);
 
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		size += sizeof(data->data_page_size);
+
 	event->header_size = size;
 }
 
@@ -6305,6 +6308,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
 		perf_output_put(handle, data->phys_addr);
 
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		perf_output_put(handle, data->data_page_size);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -6352,6 +6358,12 @@ static u64 perf_virt_to_phys(u64 virt)
 	return phys_addr;
 }
 
+/* Return page size of given virtual address. IRQ-safe required. */
+u64 __weak perf_get_page_size(u64 virt)
+{
+	return 0;
+}
+
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
 struct perf_callchain_entry *
@@ -6493,6 +6505,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
 		data->phys_addr = perf_virt_to_phys(data->addr);
+
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		data->data_page_size = perf_get_page_size(data->addr);
 }
 
 static __always_inline int
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2019-02-06 20:23 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-31 20:27 [PATCH V4 01/13] perf/core, x86: Add PERF_SAMPLE_DATA_PAGE_SIZE kan.liang
2019-01-31 20:27 ` [PATCH V4 02/13] perf tools: Support new sample type for data page size kan.liang
2019-01-31 20:27 ` [PATCH V4 03/13] perf script: Support " kan.liang
2019-01-31 20:27 ` [PATCH V4 04/13] perf sort: Add sort option for " kan.liang
2019-01-31 20:27 ` [PATCH V4 05/13] perf mem: Factor out a function to generate sort order kan.liang
2019-01-31 20:27 ` [PATCH V4 06/13] perf mem: Clean up output format kan.liang
2019-01-31 20:28 ` [PATCH V4 07/13] perf mem: Support data page size kan.liang
2019-01-31 20:28 ` [PATCH V4 08/13] perf test: Add test case for PERF_SAMPLE_DATA_PAGE_SIZE kan.liang
2019-01-31 20:28 ` [PATCH V4 09/13] perf/core, x86: Add support for PERF_SAMPLE_CODE_PAGE_SIZE kan.liang
2019-01-31 20:28 ` [PATCH V4 10/13] perf tools: " kan.liang
2019-01-31 20:28 ` [PATCH V4 11/13] perf script: " kan.liang
2019-01-31 20:28 ` [PATCH V4 12/13] perf report: " kan.liang
2019-01-31 20:28 ` [PATCH V4 13/13] perf test: Add test case " kan.liang
2019-02-01  9:22 ` [PATCH V4 01/13] perf/core, x86: Add PERF_SAMPLE_DATA_PAGE_SIZE Peter Zijlstra
2019-02-01 10:03   ` Peter Zijlstra
2019-02-01 10:36     ` Kirill A. Shutemov
2019-02-01 12:43       ` Peter Zijlstra
2019-02-01 12:47         ` Peter Zijlstra
2019-02-01 16:16         ` Liang, Kan
2019-02-04 10:54           ` Peter Zijlstra
2019-02-06 20:23             ` Liang, Kan
2019-02-01 10:34   ` Kirill A. Shutemov
2019-02-01 14:45   ` Liang, Kan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.