All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/1] Perf: fix overwritten of free running PEBSv3 timestamp
@ 2016-02-10 22:38 Tong Zhang
  2016-02-10 22:38 ` [PATCH 1/1] " Tong Zhang
  0 siblings, 1 reply; 5+ messages in thread
From: Tong Zhang @ 2016-02-10 22:38 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo; +Cc: LKML, Tong Zhang

Hi,
  commit a7b58d211ba18c9175b139e18b68c86a6bcc3c3f introduced
  feature of timestamp on free running PEBSv3, however,
  the timestamp is later overwritten, which makes the effort in vain.

  This patch prevent overwritten by detecting whether
  a timestamp is provided.

Tong Zhang (1):
 x86: Perf, fix overwritten of free running PEBSv3 timestamp
 include/linux/perf_event.h  |  1 +
 kernel/events/core.c        | 45 +++++++++++++++++++++++++++++++++------------
 kernel/events/ring_buffer.c |  4 +++-
 3 files changed, 37 insertions(+), 13 deletions(-)

-- 
2.4.10

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/1] Perf: fix overwritten of free running PEBSv3 timestamp
  2016-02-10 22:38 [PATCH 0/1] Perf: fix overwritten of free running PEBSv3 timestamp Tong Zhang
@ 2016-02-10 22:38 ` Tong Zhang
  2016-02-11 14:57   ` Peter Zijlstra
  0 siblings, 1 reply; 5+ messages in thread
From: Tong Zhang @ 2016-02-10 22:38 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo; +Cc: LKML, Tong Zhang

 commit a7b58d211ba18c9175b139e18b68c86a6bcc3c3f introduced feature of
 timestamp on free running PEBSv3, however, the timestamp is later
 overwritten, which makes the effort in vain.

This patch fixed this problem by detecting whether timestamp is provided.

Signed-off-by: Tong Zhang <ztong@vt.edu>
---
 include/linux/perf_event.h  |  1 +
 kernel/events/core.c        | 45 +++++++++++++++++++++++++++++++++------------
 kernel/events/ring_buffer.c |  4 +++-
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a4..12d7b95 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -815,6 +815,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->weight = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
+	data->time = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06ae52e..0e5a9fa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5113,9 +5113,10 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 		data->tid_entry.tid = perf_event_tid(event, current);
 	}
 
-	if (sample_type & PERF_SAMPLE_TIME)
-		data->time = perf_event_clock(event);
-
+	if (sample_type & PERF_SAMPLE_TIME) {
+		if (data->time == 0)
+			data->time = perf_event_clock(event);
+	}
 	if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
 		data->id = primary_event_id(event);
 
@@ -5567,7 +5568,9 @@ perf_event_read_event(struct perf_event *event,
 			struct task_struct *task)
 {
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	struct perf_read_event read_event = {
 		.header = {
 			.type = PERF_RECORD_READ,
@@ -5691,7 +5694,9 @@ static void perf_event_task_output(struct perf_event *event,
 {
 	struct perf_task_event *task_event = data;
 	struct perf_output_handle handle;
-	struct perf_sample_data	sample;
+	struct perf_sample_data	sample = {
+		.time = 0,
+	};
 	struct task_struct *task = task_event->task;
 	int ret, size = task_event->event_id.header.size;
 
@@ -5787,7 +5792,9 @@ static void perf_event_comm_output(struct perf_event *event,
 {
 	struct perf_comm_event *comm_event = data;
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	int size = comm_event->event_id.header.size;
 	int ret;
 
@@ -5900,7 +5907,9 @@ static void perf_event_mmap_output(struct perf_event *event,
 {
 	struct perf_mmap_event *mmap_event = data;
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	int size = mmap_event->event_id.header.size;
 	int ret;
 
@@ -6105,7 +6114,9 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 			  unsigned long size, u64 flags)
 {
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	struct perf_aux_event {
 		struct perf_event_header	header;
 		u64				offset;
@@ -6141,7 +6152,9 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 void perf_log_lost_samples(struct perf_event *event, u64 lost)
 {
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	int ret;
 
 	struct {
@@ -6192,7 +6205,9 @@ static void perf_event_switch_output(struct perf_event *event, void *data)
 {
 	struct perf_switch_event *se = data;
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	int ret;
 
 	if (!perf_event_switch_match(event))
@@ -6260,7 +6275,9 @@ static void perf_event_switch(struct task_struct *task,
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	int ret;
 
 	struct {
@@ -6297,7 +6314,9 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 static void perf_log_itrace_start(struct perf_event *event)
 {
 	struct perf_output_handle handle;
-	struct perf_sample_data sample;
+	struct perf_sample_data sample = {
+		.time = 0,
+	};
 	struct perf_aux_event {
 		struct perf_event_header        header;
 		u32				pid;
@@ -6468,6 +6487,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 		return;
 
 	for (; overflow; overflow--) {
+		data->time = 0;
 		if (__perf_event_overflow(event, throttle,
 					    data, regs)) {
 			/*
@@ -6608,6 +6628,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 		goto end;
 
 	hlist_for_each_entry_rcu(event, head, hlist_entry) {
+		data->time = 0;
 		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_event(event, nr, data, regs);
 	}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index adfdc05..049379b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -178,7 +178,9 @@ int perf_output_begin(struct perf_output_handle *handle,
 	handle->size = (1UL << page_shift) - offset;
 
 	if (unlikely(have_lost)) {
-		struct perf_sample_data sample_data;
+		struct perf_sample_data sample_data = {
+			.time = 0,
+		};
 
 		lost_event.header.size = sizeof(lost_event);
 		lost_event.header.type = PERF_RECORD_LOST;
-- 
2.4.10

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Perf: fix overwritten of free running PEBSv3 timestamp
  2016-02-10 22:38 ` [PATCH 1/1] " Tong Zhang
@ 2016-02-11 14:57   ` Peter Zijlstra
  2016-02-11 16:39     ` Tong Zhang
  0 siblings, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2016-02-11 14:57 UTC (permalink / raw)
  To: Tong Zhang; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Wed, Feb 10, 2016 at 05:38:02PM -0500, Tong Zhang wrote:
>  commit a7b58d211ba18c9175b139e18b68c86a6bcc3c3f introduced feature of
>  timestamp on free running PEBSv3, however, the timestamp is later
>  overwritten, which makes the effort in vain.
> 
> This patch fixed this problem by detecting whether timestamp is provided.
> 
> Signed-off-by: Tong Zhang <ztong@vt.edu>
> ---
>  include/linux/perf_event.h  |  1 +
>  kernel/events/core.c        | 45 +++++++++++++++++++++++++++++++++------------
>  kernel/events/ring_buffer.c |  4 +++-
>  3 files changed, 37 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f9828a4..12d7b95 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -815,6 +815,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>  	data->weight = 0;
>  	data->data_src.val = PERF_MEM_NA;
>  	data->txn = 0;
> +	data->time = 0;
>  }

Argh, you just touched a new cacheline and made _every_ single event
slower.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Perf: fix overwritten of free running PEBSv3 timestamp
  2016-02-11 14:57   ` Peter Zijlstra
@ 2016-02-11 16:39     ` Tong Zhang
  2016-02-11 17:26       ` Peter Zijlstra
  0 siblings, 1 reply; 5+ messages in thread
From: Tong Zhang @ 2016-02-11 16:39 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

How about moving `time' to the `group', Will this help?

Regards,

Tong

Signed-off-by: Tong Zhang <ztong@vt.edu>
------
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3f590de..3163598 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -797,6 +797,7 @@ struct perf_sample_data {
        u64                             period;
        u64                             weight;
        u64                             txn;
+       u64                             time;
        union  perf_mem_data_src        data_src;

        /*
@@ -809,7 +810,6 @@ struct perf_sample_data {
                u32     pid;
                u32     tid;
        }                               tid_entry;
-       u64                             time;
        u64                             id;
        u64                             stream_id;
        struct {

On Thu, Feb 11, 2016 at 9:57 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Feb 10, 2016 at 05:38:02PM -0500, Tong Zhang wrote:
>>  commit a7b58d211ba18c9175b139e18b68c86a6bcc3c3f introduced feature of
>>  timestamp on free running PEBSv3, however, the timestamp is later
>>  overwritten, which makes the effort in vain.
>>
>> This patch fixed this problem by detecting whether timestamp is provided.
>>
>> Signed-off-by: Tong Zhang <ztong@vt.edu>
>> ---
>>  include/linux/perf_event.h  |  1 +
>>  kernel/events/core.c        | 45 +++++++++++++++++++++++++++++++++------------
>>  kernel/events/ring_buffer.c |  4 +++-
>>  3 files changed, 37 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index f9828a4..12d7b95 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -815,6 +815,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>>       data->weight = 0;
>>       data->data_src.val = PERF_MEM_NA;
>>       data->txn = 0;
>> +     data->time = 0;
>>  }
>
> Argh, you just touched a new cacheline and made _every_ single event
> slower.

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Perf: fix overwritten of free running PEBSv3 timestamp
  2016-02-11 16:39     ` Tong Zhang
@ 2016-02-11 17:26       ` Peter Zijlstra
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2016-02-11 17:26 UTC (permalink / raw)
  To: Tong Zhang; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, Feb 11, 2016 at 11:39:18AM -0500, Tong Zhang wrote:
> How about moving `time' to the `group', Will this help?

Somewhat, but the patch is still really horrid. It makes t==0 a special
case and seems to scatter this all over the place.

It also consumes the last word in that cacheline :/

There's a few other options, but so far I'm not really liking any of
them very much either :/

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-02-11 17:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-10 22:38 [PATCH 0/1] Perf: fix overwritten of free running PEBSv3 timestamp Tong Zhang
2016-02-10 22:38 ` [PATCH 1/1] " Tong Zhang
2016-02-11 14:57   ` Peter Zijlstra
2016-02-11 16:39     ` Tong Zhang
2016-02-11 17:26       ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.