All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR
@ 2017-08-29  0:52 kan.liang
  2017-08-29 11:35 ` Madhavan Srinivasan
  2017-08-29 14:22 ` [tip:perf/core] perf/core, x86: " tip-bot for Kan Liang
  0 siblings, 2 replies; 6+ messages in thread
From: kan.liang @ 2017-08-29  0:52 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel
  Cc: acme, jolsa, tglx, eranian, ak, mpe, maddy, Kan Liang

From: Kan Liang <kan.liang@intel.com>

For understanding how the workload maps to memory channels and hardware
behavior, it's very important to collect address maps with physical
addresses. For example, 3D XPoint access can only be found by filtering
the physical address.

Add a new sample type for physical address.

perf already has a facility to collect data virtual address. This patch
introduces a function to convert the virtual address to physical address.
The function is quite generic and can be extended to any architecture as
long as a virtual address is provided.
 - For kernel direct mapping addresses, virt_to_phys is used to convert
   the virtual addresses to physical address.
 - For user virtual addresses, __get_user_pages_fast is used to walk the
   pages tables for user physical address.
 - This does not work for vmalloc addresses right now. These are not
   resolved, but code to do that could be added.

The new sample type requires collecting the virtual address. The
virtual address will not be output unless SAMPLE_ADDR is applied.

For security, the physical address can only be exposed to root or
privileged user.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---

This patch is kernel patch.
The user space patch can be found here.
https://www.spinics.net/lists/kernel/msg2587093.html

Changes since V7
 - Fix virt_addr_valid compile warning for MIPS architecture (LKP)

 arch/powerpc/perf/core-book3s.c |  3 ++-
 arch/x86/events/intel/ds.c      |  2 +-
 arch/x86/events/perf_event.h    |  2 +-
 include/linux/perf_event.h      |  2 ++
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/events/core.c            | 47 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6c2d416..2e3eb74 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
 
-		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+		if (event->attr.sample_type &
+		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
 			perf_get_data_addr(regs, &data.addr);
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a322fed..0516f78 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1175,7 +1175,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	else
 		regs->flags &= ~PERF_EFLAGS_EXACT;
 
-	if ((sample_type & PERF_SAMPLE_ADDR) &&
+	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
 		data->addr = pebs->dla;
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 476aec3..65bb91e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -91,7 +91,7 @@ struct amd_nb {
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
-	PERF_SAMPLE_TRANSACTION)
+	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
 
 /*
  * A debug store configuration.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b14095b..74fb87e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -944,6 +944,8 @@ struct perf_sample_data {
 
 	struct perf_regs		regs_intr;
 	u64				stack_user_size;
+
+	u64				phys_addr;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 642db5f..cbea02f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -814,6 +815,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d704e23..e8d5c5d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1570,6 +1570,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		size += sizeof(data->txn);
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		size += sizeof(data->phys_addr);
+
 	event->header_size = size;
 }
 
@@ -6012,6 +6015,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		perf_output_put(handle, data->phys_addr);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -6027,6 +6033,38 @@ void perf_output_sample(struct perf_output_handle *handle,
 	}
 }
 
+static u64 perf_virt_to_phys(u64 virt)
+{
+	u64 phys_addr = 0;
+	struct page *p = NULL;
+
+	if (!virt)
+		return 0;
+
+	if (virt >= TASK_SIZE) {
+		/* If it's vmalloc()d memory, leave phys_addr as 0 */
+		if (virt_addr_valid((void *)(uintptr_t)virt) &&
+		    !(virt >= VMALLOC_START && virt < VMALLOC_END))
+			phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
+	} else {
+		/*
+		 * Walking the pages tables for user address.
+		 * Interrupts are disabled, so it prevents any tear down
+		 * of the page tables.
+		 * Try IRQ-safe __get_user_pages_fast first.
+		 * If failed, leave phys_addr as 0.
+		 */
+		if ((current->mm != NULL) &&
+		    (__get_user_pages_fast(virt, 1, 0, &p) == 1))
+			phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+
+		if (p)
+			put_page(p);
+	}
+
+	return phys_addr;
+}
+
 void perf_prepare_sample(struct perf_event_header *header,
 			 struct perf_sample_data *data,
 			 struct perf_event *event,
@@ -6145,6 +6183,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		data->phys_addr = perf_virt_to_phys(data->addr);
 }
 
 static void __always_inline
@@ -9892,6 +9933,12 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EINVAL;
 	}
 
+	/* Only privileged users can get kernel addresses */
+	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
+	    perf_paranoid_kernel() &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	if (!attr.sample_max_stack)
 		attr.sample_max_stack = sysctl_perf_event_max_stack;
 
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR
  2017-08-29  0:52 [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR kan.liang
@ 2017-08-29 11:35 ` Madhavan Srinivasan
  2017-08-29 11:45   ` Peter Zijlstra
  2017-08-29 14:22 ` [tip:perf/core] perf/core, x86: " tip-bot for Kan Liang
  1 sibling, 1 reply; 6+ messages in thread
From: Madhavan Srinivasan @ 2017-08-29 11:35 UTC (permalink / raw)
  To: kan.liang, peterz, mingo, linux-kernel
  Cc: acme, jolsa, tglx, eranian, ak, mpe



On Tuesday 29 August 2017 06:22 AM, kan.liang@intel.com wrote:
> From: Kan Liang <kan.liang@intel.com>
>
> For understanding how the workload maps to memory channels and hardware
> behavior, it's very important to collect address maps with physical
> addresses. For example, 3D XPoint access can only be found by filtering
> the physical address.
>
> Add a new sample type for physical address.
>
> perf already has a facility to collect data virtual address. This patch
> introduces a function to convert the virtual address to physical address.
> The function is quite generic and can be extended to any architecture as
> long as a virtual address is provided.
>   - For kernel direct mapping addresses, virt_to_phys is used to convert
>     the virtual addresses to physical address.
>   - For user virtual addresses, __get_user_pages_fast is used to walk the
>     pages tables for user physical address.
>   - This does not work for vmalloc addresses right now. These are not
>     resolved, but code to do that could be added.
>
> The new sample type requires collecting the virtual address. The
> virtual address will not be output unless SAMPLE_ADDR is applied.
>
> For security, the physical address can only be exposed to root or
> privileged user.

Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>

>
> Signed-off-by: Kan Liang <kan.liang@intel.com>
> ---
>
> This patch is kernel patch.
> The user space patch can be found here.
> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_kernel_msg2587093.html&d=DwIBaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=3f2W2m24mqGnx1C8qDsVjM_Sd89MwbaDB37IJVL-h7w&m=bL7qsJKf8aRpvBVr07ODZxoQhPgxffpGFgN9A4iyACw&s=R1OMYJt1szqNhPZu8QQDDRCnQgHZiQzcCPzofF6FIF8&e=
>
> Changes since V7
>   - Fix virt_addr_valid compile warning for MIPS architecture (LKP)
>
>   arch/powerpc/perf/core-book3s.c |  3 ++-
>   arch/x86/events/intel/ds.c      |  2 +-
>   arch/x86/events/perf_event.h    |  2 +-
>   include/linux/perf_event.h      |  2 ++
>   include/uapi/linux/perf_event.h |  4 +++-
>   kernel/events/core.c            | 47 +++++++++++++++++++++++++++++++++++++++++
>   6 files changed, 56 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 6c2d416..2e3eb74 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>   
>   		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
>   
> -		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
> +		if (event->attr.sample_type &
> +		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
>   			perf_get_data_addr(regs, &data.addr);
>   
>   		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index a322fed..0516f78 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1175,7 +1175,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
>   	else
>   		regs->flags &= ~PERF_EFLAGS_EXACT;
>   
> -	if ((sample_type & PERF_SAMPLE_ADDR) &&
> +	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
>   	    x86_pmu.intel_cap.pebs_format >= 1)
>   		data->addr = pebs->dla;
>   
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index 476aec3..65bb91e 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -91,7 +91,7 @@ struct amd_nb {
>   	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
>   	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
>   	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
> -	PERF_SAMPLE_TRANSACTION)
> +	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
>   
>   /*
>    * A debug store configuration.
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index b14095b..74fb87e 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -944,6 +944,8 @@ struct perf_sample_data {
>   
>   	struct perf_regs		regs_intr;
>   	u64				stack_user_size;
> +
> +	u64				phys_addr;
>   } ____cacheline_aligned;
>   
>   /* default value for data source */
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 642db5f..cbea02f 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -139,8 +139,9 @@ enum perf_event_sample_format {
>   	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
>   	PERF_SAMPLE_TRANSACTION			= 1U << 17,
>   	PERF_SAMPLE_REGS_INTR			= 1U << 18,
> +	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
>   
> -	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
> +	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
>   };
>   
>   /*
> @@ -814,6 +815,7 @@ enum perf_event_type {
>   	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
>   	 *	{ u64			abi; # enum perf_sample_regs_abi
>   	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
> +	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
>   	 * };
>   	 */
>   	PERF_RECORD_SAMPLE			= 9,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index d704e23..e8d5c5d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1570,6 +1570,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
>   	if (sample_type & PERF_SAMPLE_TRANSACTION)
>   		size += sizeof(data->txn);
>   
> +	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
> +		size += sizeof(data->phys_addr);
> +
>   	event->header_size = size;
>   }
>   
> @@ -6012,6 +6015,9 @@ void perf_output_sample(struct perf_output_handle *handle,
>   		}
>   	}
>   
> +	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
> +		perf_output_put(handle, data->phys_addr);
> +
>   	if (!event->attr.watermark) {
>   		int wakeup_events = event->attr.wakeup_events;
>   
> @@ -6027,6 +6033,38 @@ void perf_output_sample(struct perf_output_handle *handle,
>   	}
>   }
>   
> +static u64 perf_virt_to_phys(u64 virt)
> +{
> +	u64 phys_addr = 0;
> +	struct page *p = NULL;
> +
> +	if (!virt)
> +		return 0;
> +
> +	if (virt >= TASK_SIZE) {
> +		/* If it's vmalloc()d memory, leave phys_addr as 0 */
> +		if (virt_addr_valid((void *)(uintptr_t)virt) &&
> +		    !(virt >= VMALLOC_START && virt < VMALLOC_END))
> +			phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
> +	} else {
> +		/*
> +		 * Walking the pages tables for user address.
> +		 * Interrupts are disabled, so it prevents any tear down
> +		 * of the page tables.
> +		 * Try IRQ-safe __get_user_pages_fast first.
> +		 * If failed, leave phys_addr as 0.
> +		 */
> +		if ((current->mm != NULL) &&
> +		    (__get_user_pages_fast(virt, 1, 0, &p) == 1))
> +			phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
> +
> +		if (p)
> +			put_page(p);
> +	}
> +
> +	return phys_addr;
> +}
> +
>   void perf_prepare_sample(struct perf_event_header *header,
>   			 struct perf_sample_data *data,
>   			 struct perf_event *event,
> @@ -6145,6 +6183,9 @@ void perf_prepare_sample(struct perf_event_header *header,
>   
>   		header->size += size;
>   	}
> +
> +	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
> +		data->phys_addr = perf_virt_to_phys(data->addr);
>   }
>   
>   static void __always_inline
> @@ -9892,6 +9933,12 @@ SYSCALL_DEFINE5(perf_event_open,
>   			return -EINVAL;
>   	}
>   
> +	/* Only privileged users can get kernel addresses */
> +	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
> +	    perf_paranoid_kernel() &&
> +	    !capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
>   	if (!attr.sample_max_stack)
>   		attr.sample_max_stack = sysctl_perf_event_max_stack;
>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR
  2017-08-29 11:35 ` Madhavan Srinivasan
@ 2017-08-29 11:45   ` Peter Zijlstra
  2017-08-31 16:43     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2017-08-29 11:45 UTC (permalink / raw)
  To: Madhavan Srinivasan
  Cc: kan.liang, mingo, linux-kernel, acme, jolsa, tglx, eranian, ak, mpe

On Tue, Aug 29, 2017 at 05:05:15PM +0530, Madhavan Srinivasan wrote:
> 
> 
> On Tuesday 29 August 2017 06:22 AM, kan.liang@intel.com wrote:
> > From: Kan Liang <kan.liang@intel.com>
> > 
> > For understanding how the workload maps to memory channels and hardware
> > behavior, it's very important to collect address maps with physical
> > addresses. For example, 3D XPoint access can only be found by filtering
> > the physical address.
> > 
> > Add a new sample type for physical address.
> > 
> > perf already has a facility to collect data virtual address. This patch
> > introduces a function to convert the virtual address to physical address.
> > The function is quite generic and can be extended to any architecture as
> > long as a virtual address is provided.
> >   - For kernel direct mapping addresses, virt_to_phys is used to convert
> >     the virtual addresses to physical address.
> >   - For user virtual addresses, __get_user_pages_fast is used to walk the
> >     pages tables for user physical address.
> >   - This does not work for vmalloc addresses right now. These are not
> >     resolved, but code to do that could be added.
> > 
> > The new sample type requires collecting the virtual address. The
> > virtual address will not be output unless SAMPLE_ADDR is applied.
> > 
> > For security, the physical address can only be exposed to root or
> > privileged user.
> 
> Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>

Thanks maddy!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:perf/core] perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR
  2017-08-29  0:52 [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR kan.liang
  2017-08-29 11:35 ` Madhavan Srinivasan
@ 2017-08-29 14:22 ` tip-bot for Kan Liang
  1 sibling, 0 replies; 6+ messages in thread
From: tip-bot for Kan Liang @ 2017-08-29 14:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, kan.liang, maddy, jolsa, mingo, linux-kernel,
	vincent.weaver, alexander.shishkin, acme, torvalds, tglx,
	eranian, peterz

Commit-ID:  fc7ce9c74c3ad232b084d80148654f926d01ece7
Gitweb:     http://git.kernel.org/tip/fc7ce9c74c3ad232b084d80148654f926d01ece7
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Mon, 28 Aug 2017 20:52:49 -0400
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 29 Aug 2017 15:09:25 +0200

perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR

For understanding how the workload maps to memory channels and hardware
behavior, it's very important to collect address maps with physical
addresses. For example, 3D XPoint access can only be found by filtering
the physical address.

Add a new sample type for physical address.

perf already has a facility to collect data virtual address. This patch
introduces a function to convert the virtual address to physical address.
The function is quite generic and can be extended to any architecture as
long as a virtual address is provided.

 - For kernel direct mapping addresses, virt_to_phys is used to convert
   the virtual addresses to physical address.

 - For user virtual addresses, __get_user_pages_fast is used to walk the
   pages tables for user physical address.

 - This does not work for vmalloc addresses right now. These are not
   resolved, but code to do that could be added.

The new sample type requires collecting the virtual address. The
virtual address will not be output unless SAMPLE_ADDR is applied.

For security, the physical address can only be exposed to root or
privileged user.

Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: mpe@ellerman.id.au
Link: http://lkml.kernel.org/r/1503967969-48278-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/perf/core-book3s.c |  3 ++-
 arch/x86/events/intel/ds.c      |  2 +-
 arch/x86/events/perf_event.h    |  2 +-
 include/linux/perf_event.h      |  2 ++
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/events/core.c            | 46 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6c2d416..2e3eb74 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
 
-		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+		if (event->attr.sample_type &
+		    (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
 			perf_get_data_addr(regs, &data.addr);
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 98e36e0..e1965e5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1185,7 +1185,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	else
 		regs->flags &= ~PERF_EFLAGS_EXACT;
 
-	if ((sample_type & PERF_SAMPLE_ADDR) &&
+	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
 		data->addr = pebs->dla;
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9337589..4196f81 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -91,7 +91,7 @@ struct amd_nb {
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
-	PERF_SAMPLE_TRANSACTION)
+	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
 
 /*
  * A debug store configuration.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index adda0aa..718ba16 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -943,6 +943,8 @@ struct perf_sample_data {
 
 	struct perf_regs		regs_intr;
 	u64				stack_user_size;
+
+	u64				phys_addr;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2a37ae9..140ae63 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -814,6 +815,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 77fd6b1..ce64f3f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1575,6 +1575,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		size += sizeof(data->txn);
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		size += sizeof(data->phys_addr);
+
 	event->header_size = size;
 }
 
@@ -6017,6 +6020,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		perf_output_put(handle, data->phys_addr);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -6032,6 +6038,38 @@ void perf_output_sample(struct perf_output_handle *handle,
 	}
 }
 
+static u64 perf_virt_to_phys(u64 virt)
+{
+	u64 phys_addr = 0;
+	struct page *p = NULL;
+
+	if (!virt)
+		return 0;
+
+	if (virt >= TASK_SIZE) {
+		/* If it's vmalloc()d memory, leave phys_addr as 0 */
+		if (virt_addr_valid((void *)(uintptr_t)virt) &&
+		    !(virt >= VMALLOC_START && virt < VMALLOC_END))
+			phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
+	} else {
+		/*
+		 * Walking the pages tables for user address.
+		 * Interrupts are disabled, so it prevents any tear down
+		 * of the page tables.
+		 * Try IRQ-safe __get_user_pages_fast first.
+		 * If failed, leave phys_addr as 0.
+		 */
+		if ((current->mm != NULL) &&
+		    (__get_user_pages_fast(virt, 1, 0, &p) == 1))
+			phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+
+		if (p)
+			put_page(p);
+	}
+
+	return phys_addr;
+}
+
 void perf_prepare_sample(struct perf_event_header *header,
 			 struct perf_sample_data *data,
 			 struct perf_event *event,
@@ -6150,6 +6188,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		data->phys_addr = perf_virt_to_phys(data->addr);
 }
 
 static void __always_inline
@@ -9909,6 +9950,11 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EINVAL;
 	}
 
+	/* Only privileged users can get physical addresses */
+	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
+	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	if (!attr.sample_max_stack)
 		attr.sample_max_stack = sysctl_perf_event_max_stack;
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR
  2017-08-29 11:45   ` Peter Zijlstra
@ 2017-08-31 16:43     ` Arnaldo Carvalho de Melo
       [not found]       ` <f006309a-9c95-6d31-1b0b-107179042c9b@linux.vnet.ibm.com>
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-08-31 16:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Madhavan Srinivasan, kan.liang, mingo, linux-kernel, jolsa, tglx,
	eranian, ak, mpe

Em Tue, Aug 29, 2017 at 01:45:53PM +0200, Peter Zijlstra escreveu:
> On Tue, Aug 29, 2017 at 05:05:15PM +0530, Madhavan Srinivasan wrote:
> > 
> > 
> > On Tuesday 29 August 2017 06:22 AM, kan.liang@intel.com wrote:
> > > From: Kan Liang <kan.liang@intel.com>
> > > 
> > > For understanding how the workload maps to memory channels and hardware
> > > behavior, it's very important to collect address maps with physical
> > > addresses. For example, 3D XPoint access can only be found by filtering
> > > the physical address.
> > > 
> > > Add a new sample type for physical address.
> > > 
> > > perf already has a facility to collect data virtual address. This patch
> > > introduces a function to convert the virtual address to physical address.
> > > The function is quite generic and can be extended to any architecture as
> > > long as a virtual address is provided.
> > >   - For kernel direct mapping addresses, virt_to_phys is used to convert
> > >     the virtual addresses to physical address.
> > >   - For user virtual addresses, __get_user_pages_fast is used to walk the
> > >     pages tables for user physical address.
> > >   - This does not work for vmalloc addresses right now. These are not
> > >     resolved, but code to do that could be added.
> > > 
> > > The new sample type requires collecting the virtual address. The
> > > virtual address will not be output unless SAMPLE_ADDR is applied.
> > > 
> > > For security, the physical address can only be exposed to root or
> > > privileged user.
> > 
> > Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>

You mean you tested this together with Kan's v2 perf tools patches? If
that is so I'll add this tested-by to those patches, ok?
 
> Thanks maddy!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR
       [not found]         ` <37D7C6CF3E00A74B8858931C1DB2F077537A7BB6@SHSMSX103.ccr.corp.intel.com>
@ 2017-09-01 13:02           ` Liang, Kan
  0 siblings, 0 replies; 6+ messages in thread
From: Liang, Kan @ 2017-09-01 13:02 UTC (permalink / raw)
  To: Madhavan Srinivasan, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: mingo, linux-kernel, jolsa, tglx, eranian, ak, mpe

> On Thursday 31 August 2017 10:13 PM, Arnaldo Carvalho de Melo wrote:
> Em Tue, Aug 29, 2017 at 01:45:53PM +0200, Peter Zijlstra escreveu:
> On Tue, Aug 29, 2017 at 05:05:15PM +0530, Madhavan Srinivasan wrote:
> 
> 
> On Tuesday 29 August 2017 06:22 AM, kan.liang@intel.com wrote:
> From: Kan Liang <kan.liang@intel.com>
> 
> For understanding how the workload maps to memory channels and
> hardware
> behavior, it's very important to collect address maps with physical
> addresses. For example, 3D XPoint access can only be found by filtering
> the physical address.
> 
> Add a new sample type for physical address.
> 
> perf already has a facility to collect data virtual address. This patch
> introduces a function to convert the virtual address to physical address.
> The function is quite generic and can be extended to any architecture as
> long as a virtual address is provided.
>   - For kernel direct mapping addresses, virt_to_phys is used to convert
>     the virtual addresses to physical address.
>   - For user virtual addresses, __get_user_pages_fast is used to walk the
>     pages tables for user physical address.
>   - This does not work for vmalloc addresses right now. These are not
>     resolved, but code to do that could be added.
> 
> The new sample type requires collecting the virtual address. The
> virtual address will not be output unless SAMPLE_ADDR is applied.
> 
> For security, the physical address can only be exposed to root or
> privileged user.
> 
> Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> 
> You mean you tested this together with Kan's v2 perf tools patches? If
> that is so I'll add this tested-by to those patches, ok?
> Arnaldo,
> 
> No. Kernel side patch showed the link for the v1 of the perf tool side
> patchset (5 patch series) and I used that to test.
>

Thanks Maddy for the test.
Yes, the kernel patch went with user tool v1 version.
The user tool v2 version did a little modification according to the final
kernel patch. The only difference is not to mandate SAMPLE_ADDR
when SAMPLE_PHYS_ADDR.

Thanks,
Kan
 
> 
> >This patch is kernel patch.
> >The user space patch can be found here.
> >https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__www.spinics.net_lists_kernel_msg2587093.html&d=DwIBaQ&c=jf_iaSHv
> JObTbx-siA1ZOg&r=3f2W2m24mqGnx1C8qDsVjM_Sd89MwbaDB37IJVL-
> h7w&m=bL7qsJKf8aRpvBVr07ODZxoQhPgxffpGFgN9A4iyACw&s=R1OMYJt1sz
> qNhPZu8QQDDRCnQgHZiQzcCPzofF6FIF8&e=
> 
> Maddy
> 
> 
> 
> Thanks maddy!
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-09-01 13:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-29  0:52 [PATCH V8] perf: Add PERF_SAMPLE_PHYS_ADDR kan.liang
2017-08-29 11:35 ` Madhavan Srinivasan
2017-08-29 11:45   ` Peter Zijlstra
2017-08-31 16:43     ` Arnaldo Carvalho de Melo
     [not found]       ` <f006309a-9c95-6d31-1b0b-107179042c9b@linux.vnet.ibm.com>
     [not found]         ` <37D7C6CF3E00A74B8858931C1DB2F077537A7BB6@SHSMSX103.ccr.corp.intel.com>
2017-09-01 13:02           ` Liang, Kan
2017-08-29 14:22 ` [tip:perf/core] perf/core, x86: " tip-bot for Kan Liang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.