All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf core: Support reading from overwritable ring buffer
@ 2016-03-28  6:41 Wang Nan
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
                   ` (3 more replies)
  0 siblings, 4 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28  6:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

Following Peter and Alexei's suggestion, this patchset:

 1. Append performance test result in commit message of patch 3/4 and 4/4;
 2. Describe history and design choice in commit message of patch 4/4;
 3. Change 'onward' to 'forward';
 4. Squash patches to make a better git log.

Wang Nan (4):
  perf core: Introduce new ioctl options to pause and resume ring buffer
  perf core: Set event's default overflow_handler
  perf core: Prepare writing into ring buffer from end
  perf core: Add backward attribute to perf event

 arch/arm/kernel/hw_breakpoint.c   |  4 +--
 arch/arm64/kernel/hw_breakpoint.c |  4 +--
 include/linux/perf_event.h        | 32 +++++++++++++++--
 include/uapi/linux/perf_event.h   |  4 ++-
 kernel/events/core.c              | 73 +++++++++++++++++++++++++++++++++------
 kernel/events/internal.h          | 11 ++++++
 kernel/events/ring_buffer.c       | 63 +++++++++++++++++++++++++++++----
 7 files changed, 167 insertions(+), 24 deletions(-)

-- 
1.8.3.4

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-28  6:41 [PATCH 0/4] perf core: Support reading from overwritable ring buffer Wang Nan
@ 2016-03-28  6:41 ` Wang Nan
  2016-03-28 10:15     ` Wang Nan
                     ` (3 more replies)
  2016-03-28  6:41 ` [PATCH 2/4] perf core: Set event's default overflow_handler Wang Nan
                   ` (2 subsequent siblings)
  3 siblings, 4 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28  6:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

Add new ioctl() to pause/resume ring-buffer output.

In some situations we want to read from ring buffer only when we
ensure nothing can write to the ring buffer during reading. Without
this patch we have to turn off all events attached to this ring buffer
to achieve this.

This patch is for supporting overwrite ring buffer. Following
commits will introduce new methods support reading from overwrite ring
buffer. Before reading, caller must ensure the ring buffer is frozen, or
the reading is unreliable.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 13 +++++++++++++
 kernel/events/internal.h        | 11 +++++++++++
 kernel/events/ring_buffer.c     |  7 ++++++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..a3c1903 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -401,6 +401,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index de24fbc..cb47da3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4341,6 +4341,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_BPF:
 		return perf_event_set_bpf_prog(event, arg);
 
+	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+		struct ring_buffer *rb;
+
+		rcu_read_lock();
+		rb = rcu_dereference(event->rb);
+		if (!event->rb) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		rb_toggle_paused(rb, !!arg);
+		rcu_read_unlock();
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c..6a93d1b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -18,6 +18,7 @@ struct ring_buffer {
 #endif
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
+	int				paused;		/* can write into ring buffer */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 
@@ -65,6 +66,16 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	rb_free(rb);
 }
 
+static inline void
+rb_toggle_paused(struct ring_buffer *rb,
+		 bool pause)
+{
+	if (!pause && rb->nr_pages)
+		rb->paused = 0;
+	else
+		rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index c61f0cb..17de83b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,8 +125,11 @@ int perf_output_begin(struct perf_output_handle *handle,
 	if (unlikely(!rb))
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
+	if (unlikely(rb->paused)) {
+		if (rb->nr_pages)
+			local_inc(&rb->lost);
 		goto out;
+	}
 
 	handle->rb    = rb;
 	handle->event = event;
@@ -244,6 +247,8 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
 	init_irq_work(&rb->irq_work, rb_irq_work);
+
+	rb->paused = rb->nr_pages ? 0 : 1;
 }
 
 static void ring_buffer_put_async(struct ring_buffer *rb)
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 2/4] perf core: Set event's default overflow_handler
  2016-03-28  6:41 [PATCH 0/4] perf core: Support reading from overwritable ring buffer Wang Nan
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
@ 2016-03-28  6:41 ` Wang Nan
  2016-03-31  9:26   ` [tip:perf/core] perf/core: Set event's default ::overflow_handler() tip-bot for Wang Nan
  2016-03-28  6:41 ` [PATCH 3/4] perf core: Prepare writing into ring buffer from end Wang Nan
  2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
  3 siblings, 1 reply; 37+ messages in thread
From: Wang Nan @ 2016-03-28  6:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

Set a default event->overflow_handler in perf_event_alloc() so don't
need to check event->overflow_handler in __perf_event_overflow().
Following commits can give a different default overflow_handler.

Initial idea comes from Peter at [1]

Since default value of event->overflow_handler is not null, existing
'if (!overflow_handler)' need to be changed.
is_default_overflow_handler() is introduced for this.

No extra performance introduced into hot path because in the original
code we still need reading this handler from memory. A conditional branch
is avoided so actually we remove some instructions.

[1] http://lkml.kernel.org/r/20130708121557.GA17211@twins.programming.kicks-ass.net

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 arch/arm/kernel/hw_breakpoint.c   |  4 ++--
 arch/arm64/kernel/hw_breakpoint.c |  4 ++--
 include/linux/perf_event.h        |  6 ++++++
 kernel/events/core.c              | 14 ++++++++------
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 6284779..b8df458 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	info->address &= ~alignment_mask;
 	info->ctrl.len <<= offset;
 
-	if (!bp->overflow_handler) {
+	if (is_default_overflow_handler(bp)) {
 		/*
 		 * Mismatch breakpoints are required for single-stepping
 		 * breakpoints.
@@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 * mismatch breakpoint so we can single-step over the
 		 * watchpoint trigger.
 		 */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
 
 unlock:
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..4ef5373 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
 		perf_bp_event(bp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!bp->overflow_handler)
+		if (is_default_overflow_handler(bp))
 			step = 1;
 unlock:
 		rcu_read_unlock();
@@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
 		perf_bp_event(wp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			step = 1;
 
 unlock:
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 15588d4..4065ca2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -838,6 +838,12 @@ extern void perf_event_output(struct perf_event *event,
 				struct perf_sample_data *data,
 				struct pt_regs *regs);
 
+static inline bool
+is_default_overflow_handler(struct perf_event *event)
+{
+	return (event->overflow_handler == perf_event_output);
+}
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
 			   struct perf_sample_data *data,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cb47da3..3bd4b2b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6502,10 +6502,7 @@ static int __perf_event_overflow(struct perf_event *event,
 		irq_work_queue(&event->pending);
 	}
 
-	if (event->overflow_handler)
-		event->overflow_handler(event, data, regs);
-	else
-		perf_event_output(event, data, regs);
+	event->overflow_handler(event, data, regs);
 
 	if (*perf_event_fasync(event) && event->pending_kill) {
 		event->pending_wakeup = 1;
@@ -8017,8 +8014,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		context = parent_event->overflow_handler_context;
 	}
 
-	event->overflow_handler	= overflow_handler;
-	event->overflow_handler_context = context;
+	if (overflow_handler) {
+		event->overflow_handler	= overflow_handler;
+		event->overflow_handler_context = context;
+	} else {
+		event->overflow_handler = perf_event_output;
+		event->overflow_handler_context = NULL;
+	}
 
 	perf_event__state_init(event);
 
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 3/4] perf core: Prepare writing into ring buffer from end
  2016-03-28  6:41 [PATCH 0/4] perf core: Support reading from overwritable ring buffer Wang Nan
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
  2016-03-28  6:41 ` [PATCH 2/4] perf core: Set event's default overflow_handler Wang Nan
@ 2016-03-28  6:41 ` Wang Nan
  2016-03-29  0:25   ` Alexei Starovoitov
  2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Prepare writing into the ring-buffer from the end tip-bot for Wang Nan
  2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
  3 siblings, 2 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28  6:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

Convert perf_output_begin to __perf_output_begin and make the later
function able to write records from the end of the ring buffer.
Following commits will utilize the 'backward' flag.

This is the core patch to support writing ring buffer backward, which
would be introduced by following patch to support reading from
overwritable ring buffer.

In theory, this patch should not introduce any extra performance
overhead since we use always_inline.

When CONFIG_OPTIMIZE_INLINING is disabled, the output object is nearly
identical to original one. See [1].

When CONFIG_OPTIMIZE_INLINING is enabled, the resuling object file becomes
smaller:

 $ size kernel/events/ring_buffer.o*
   text       data        bss        dec        hex    filename
   4545          4          8       4557       11cd kernel/events/ring_buffer.o.new
   4641          4          8       4653       122d kernel/events/ring_buffer.o.old

Performance result:

Calling 3000000 times of 'close(-1)', use gettimeofday() to check
duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
system calls. In ns.

Testing environment:

 CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 Kernel : v4.5.0

                     MEAN         STDVAR
  BASE            800214.950    2853.083
  PRE            2253846.700    9997.014
  POST           2257495.540    8516.293

Where 'BASE' is pure performance without capturing. 'PRE' is test
result of pure 'v4.5.0' kernel. 'POST' is test result after this
patch. See [4] for detail experimental setup.

Considering the stdvar, this patch doesn't hurt performance.

For the detail of testing method, please refer to [2].

[1] http://lkml.kernel.org/g/56F52E83.70409@huawei.com
[2] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 kernel/events/ring_buffer.c | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 17de83b..b2c7c15 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -102,8 +102,21 @@ out:
 	preempt_enable();
 }
 
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_event *event, unsigned int size)
+static bool __always_inline
+ring_buffer_has_space(unsigned long head, unsigned long tail,
+		      unsigned long data_size, unsigned int size,
+		      bool backward)
+{
+	if (!backward)
+		return CIRC_SPACE(head, tail, data_size) >= size;
+	else
+		return CIRC_SPACE(tail, head, data_size) >= size;
+}
+
+static int __always_inline
+__perf_output_begin(struct perf_output_handle *handle,
+		    struct perf_event *event, unsigned int size,
+		    bool backward)
 {
 	struct ring_buffer *rb;
 	unsigned long tail, offset, head;
@@ -146,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
 	do {
 		tail = READ_ONCE(rb->user_page->data_tail);
 		offset = head = local_read(&rb->head);
-		if (!rb->overwrite &&
-		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-			goto fail;
+		if (!rb->overwrite) {
+			if (unlikely(!ring_buffer_has_space(head, tail,
+							    perf_data_size(rb),
+							    size, backward)))
+				goto fail;
+		}
 
 		/*
 		 * The above forms a control dependency barrier separating the
@@ -162,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
 		 * See perf_output_put_handle().
 		 */
 
-		head += size;
+		if (!backward)
+			head += size;
+		else
+			head -= size;
 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
 
+	if (backward) {
+		offset = head;
+		head = (u64)(-head);
+	}
+
 	/*
 	 * We rely on the implied barrier() by local_cmpxchg() to ensure
 	 * none of the data stores below can be lifted up by the compiler.
@@ -206,6 +230,12 @@ out:
 	return -ENOSPC;
 }
 
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, false);
+}
+
 unsigned int perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-28  6:41 [PATCH 0/4] perf core: Support reading from overwritable ring buffer Wang Nan
                   ` (2 preceding siblings ...)
  2016-03-28  6:41 ` [PATCH 3/4] perf core: Prepare writing into ring buffer from end Wang Nan
@ 2016-03-28  6:41 ` Wang Nan
  2016-03-28 10:16     ` Wang Nan
                     ` (3 more replies)
  3 siblings, 4 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28  6:41 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

This patch introduces 'write_backward' bit to perf_event_attr, which
controls the direction of a ring buffer. After set, the corresponding
ring buffer is written from end to beginning. This feature is design to
support reading from overwritable ring buffer.

Ring buffer can be created by mapping a perf event fd. Kernel puts event
records into ring buffer, user programs like perf fetch them from
address returned by mmap(). To prevent racing between kernel and perf,
they communicate to each other through 'head' and 'tail' pointers.
Kernel maintains 'head' pointer, points it to the next free area (tail
of the last record). Perf maintains 'tail' pointer, points it to the
tail of last consumed record (record has already been fetched). Kernel
determines the available space in a ring buffer using these two
pointers, prevents to overwrite unfetched records.

By mapping without 'PROT_WRITE', an overwritable ring buffer is created.
Different from normal ring buffer, perf is unable to maintain 'tail'
pointer because writing is forbidden. Therefore, for this type of ring
buffers, kernel overwrite old records unconditionally, works like flight
recorder. This feature would be useful if reading from overwritable ring
buffer were as easy as reading from normal ring buffer. However,
there's an obscure problem.

The following figure demonstrates the state of an overwritable ring
buffer which is nearly full. In this figure, the 'head' pointer points
to the end of last record, and a long record 'E' is pending. For a
normal ring buffer, a 'tail' pointer would have pointed to position (X),
so kernel knows there's no more space in the ring buffer. However, for
an overwritable ring buffer, kernel doesn't care the 'tail' pointer.

   (X)                              head
    .                                |
    .                                V
    +------+-------+----------+------+---+
    |A....A|B.....B|C........C|D....D|   |
    +------+-------+----------+------+---+

After writing record 'E', record 'A' is overwritten.

      head
       |
       V
    +--+---+-------+----------+------+---+
    |.E|..A|B.....B|C........C|D....D|E..|
    +--+---+-------+----------+------+---+

Now perf decides to read from this ring buffer. However, none of the
the two natural positions, 'head' and the start of this ring buffer,
are pointing to the head of a record. Even perf can read the full ring
buffer, it is unable to find the position to start decoding.

The first attempt tries to solve this problem AFAIK can be found from
[1]. It makes kernel to maintain 'tail' pointer: updates it when ring
buffer is half full. However, this approach introduces overhead to
fast path. Test result shows a 1% overhead [2]. In addition, this method
utilizes no more tham 50% records.

Another attempt can be found from [3], which allow putting the size of
an event at the end of each record. This approach allows perf to find
records in a backword manner from 'head' pointer by reading size of a
record from its tail. However, because of alignment requirement, it
needs 8 bytes to record the size of a record, which is a huge waste. Its
performance is also not good, because more data need to be written.
This approach also introduces some extra branch instructions to fast
path.

'write_backward' is a better solution to this problem.

Following figure demonstrates the state of the overwritable ring buffer
when 'write_backward' is set before overwriting:

       head
        |
        V
    +---+------+----------+-------+------+
    |   |D....D|C........C|B.....B|A....A|
    +---+------+----------+-------+------+

and after overwriting:
                                     head
                                      |
                                      V
    +---+------+----------+-------+---+--+
    |..E|D....D|C........C|B.....B|A..|E.|
    +---+------+----------+-------+---+--+

In each situation, 'head' points to the beginning of the newest record.
>From this record, perf can iterate over the full ring buffer, fetching
as mush records as possible one by one.

The only limitation needs to consider is back-to-back reading. Due to
the non-deterministic of user program, it is impossible to ensure the
ring buffer keeps stable during reading. Consider an extreme situation:
perf is scheduled out after reading record 'D', then a burst of events
come, eat up the whole ring buffer (one or multiple rounds), but 'head'
pointer happends to be at the same position when perf comes back.
Continue reading after 'D' is incorrect now.

To prevent this problem, we need to find a way to ensure the ring buffer
is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is
suggested because its overhead is lower than
ioctl(PERF_EVENT_IOC_ENABLE).

This patch utilizes event's default overflow_handler introduced
previously. perf_event_output_backward() is created as the default
overflow handler for backward ring buffers. To avoid extra overhead to
fast path, original perf_event_output() becomes __perf_event_output()
and marked '__always_inline'. In theory, there's no extra overhead
introduced to fast path.

Performance result:

Calling 3000000 times of 'close(-1)', use gettimeofday() to check
duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
system calls. In ns.

Testing environment:

 CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 Kernel : v4.5.0
                   MEAN         STDVAR
BASE            800214.950    2853.083
PRE1           2253846.700    9997.014
PRE2           2257495.540    8516.293
POST           2250896.100    8933.921

Where 'BASE' is pure performance without capturing. 'PRE1' is test
result of pure 'v4.5.0' kernel. 'PRE2' is test result before this
patch. 'POST' is test result after this patch. See [4] for detail
experimental setup.

Considering the stdvar, this patch doesn't introduce performance
overhead to fast path.

[1] http://lkml.iu.edu/hypermail/linux/kernel/1304.1/04584.html
[2] http://lkml.iu.edu/hypermail/linux/kernel/1307.1/00535.html
[3] http://lkml.iu.edu/hypermail/linux/kernel/1512.0/01265.html
[4] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 include/linux/perf_event.h      | 28 +++++++++++++++++++++---
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/events/core.c            | 48 ++++++++++++++++++++++++++++++++++++-----
 kernel/events/ring_buffer.c     | 14 ++++++++++++
 4 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4065ca2..0cc36ad 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -834,14 +834,24 @@ extern int perf_event_overflow(struct perf_event *event,
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+extern void perf_event_output_forward(struct perf_event *event,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs);
+extern void perf_event_output_backward(struct perf_event *event,
+				       struct perf_sample_data *data,
+				       struct pt_regs *regs);
 extern void perf_event_output(struct perf_event *event,
-				struct perf_sample_data *data,
-				struct pt_regs *regs);
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs);
 
 static inline bool
 is_default_overflow_handler(struct perf_event *event)
 {
-	return (event->overflow_handler == perf_event_output);
+	if (likely(event->overflow_handler == perf_event_output_forward))
+		return true;
+	if (unlikely(event->overflow_handler == perf_event_output_backward))
+		return true;
+	return false;
 }
 
 extern void
@@ -1042,8 +1052,20 @@ static inline bool has_aux(struct perf_event *event)
 	return event->pmu->setup_aux;
 }
 
+static inline bool is_write_backward(struct perf_event *event)
+{
+	return !!event->attr.write_backward;
+}
+
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
+extern int perf_output_begin_forward(struct perf_output_handle *handle,
+				    struct perf_event *event,
+				    unsigned int size);
+extern int perf_output_begin_backward(struct perf_output_handle *handle,
+				      struct perf_event *event,
+				      unsigned int size);
+
 extern void perf_output_end(struct perf_output_handle *handle);
 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index a3c1903..43fc8d2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -340,7 +340,8 @@ struct perf_event_attr {
 				comm_exec      :  1, /* flag comm events that are due to an exec */
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
-				__reserved_1   : 37;
+				write_backward :  1, /* Write ring buffer from end to beginning */
+				__reserved_1   : 36;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3bd4b2b..41a2614 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5641,9 +5641,13 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 }
 
-void perf_event_output(struct perf_event *event,
-			struct perf_sample_data *data,
-			struct pt_regs *regs)
+static void __always_inline
+__perf_event_output(struct perf_event *event,
+		    struct perf_sample_data *data,
+		    struct pt_regs *regs,
+		    int (*output_begin)(struct perf_output_handle *,
+					struct perf_event *,
+					unsigned int))
 {
 	struct perf_output_handle handle;
 	struct perf_event_header header;
@@ -5653,7 +5657,7 @@ void perf_event_output(struct perf_event *event,
 
 	perf_prepare_sample(&header, data, event, regs);
 
-	if (perf_output_begin(&handle, event, header.size))
+	if (output_begin(&handle, event, header.size))
 		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
@@ -5664,6 +5668,30 @@ exit:
 	rcu_read_unlock();
 }
 
+void
+perf_event_output_forward(struct perf_event *event,
+			 struct perf_sample_data *data,
+			 struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin_forward);
+}
+
+void
+perf_event_output_backward(struct perf_event *event,
+			   struct perf_sample_data *data,
+			   struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin_backward);
+}
+
+void
+perf_event_output(struct perf_event *event,
+		  struct perf_sample_data *data,
+		  struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin);
+}
+
 /*
  * read event_id
  */
@@ -8017,8 +8045,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	if (overflow_handler) {
 		event->overflow_handler	= overflow_handler;
 		event->overflow_handler_context = context;
+	} else if (is_write_backward(event)){
+		event->overflow_handler = perf_event_output_backward;
+		event->overflow_handler_context = NULL;
 	} else {
-		event->overflow_handler = perf_event_output;
+		event->overflow_handler = perf_event_output_forward;
 		event->overflow_handler_context = NULL;
 	}
 
@@ -8253,6 +8284,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 		goto out;
 
 	/*
+	 * Either writing ring buffer from beginning or from end.
+	 * Mixing is not allowed.
+	 */
+	if (is_write_backward(output_event) != is_write_backward(event))
+		goto out;
+
+	/*
 	 * If both events generate aux data, they must be on the same PMU
 	 */
 	if (has_aux(event) && has_aux(output_event) &&
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index b2c7c15..8e6c4b5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -230,9 +230,23 @@ out:
 	return -ENOSPC;
 }
 
+int perf_output_begin_forward(struct perf_output_handle *handle,
+			     struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, false);
+}
+
+int perf_output_begin_backward(struct perf_output_handle *handle,
+			       struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, true);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event, unsigned int size)
 {
+	if (unlikely(is_write_backward(event)))
+		return __perf_output_begin(handle, event, size, true);
 	return __perf_output_begin(handle, event, size, false);
 }
 
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-03-28 10:15     ` Wang Nan
  0 siblings, 0 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28 10:15 UTC (permalink / raw)
  To: peterz, vince, mtk.manpages
  Cc: linux-kernel, linux-man, pi3orama, lizefan, Wang Nan

Signed-off-by: Wang Nan <wangnan0@huawei.com>
---
 man2/perf_event_open.2 | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index c90fc51..b232cba 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -2719,6 +2719,17 @@ The argument is a BPF program file descriptor that was created by
 a previous
 .BR bpf (2)
 system call.
+.TP
+.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.6)"
+.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-2-git-send-email-wangnan0@huawei.com)
+This allows pausing and resuming the event's ring-buffer. A
+paused ring-buffer does not prevent samples generation, but simply
+discards them. The discarded samples are considered lost, causes
+.BR PERF_RECORD_LOST
+to be generated when possible.
+
+The argument is an integer. Nonzero value pauses the ring-buffer,
+zero value resumes the ring-buffer.
 .SS Using prctl
 A process can enable or disable all the event groups that are
 attached to it using the
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-03-28 10:15     ` Wang Nan
  0 siblings, 0 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28 10:15 UTC (permalink / raw)
  To: peterz-wEGCiKHe2LqWVfeAwA7xHQ, vince-yfjdyHUqu3OsTnJN9+BGXg,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-man-u79uwXL29TY76Z2rM5mHXA, pi3orama-9Onoh4P/yGk,
	lizefan-hv44wF8Li93QT0dZR+AlfA, Wang Nan

Signed-off-by: Wang Nan <wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 man2/perf_event_open.2 | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index c90fc51..b232cba 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -2719,6 +2719,17 @@ The argument is a BPF program file descriptor that was created by
 a previous
 .BR bpf (2)
 system call.
+.TP
+.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.6)"
+.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-2-git-send-email-wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org)
+This allows pausing and resuming the event's ring-buffer. A
+paused ring-buffer does not prevent samples generation, but simply
+discards them. The discarded samples are considered lost, causes
+.BR PERF_RECORD_LOST
+to be generated when possible.
+
+The argument is an integer. Nonzero value pauses the ring-buffer,
+zero value resumes the ring-buffer.
 .SS Using prctl
 A process can enable or disable all the event groups that are
 attached to it using the
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH][manpages 2/2] perf_event_open.2: Document write_backward
@ 2016-03-28 10:16     ` Wang Nan
  0 siblings, 0 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28 10:16 UTC (permalink / raw)
  To: peterz, vince, mtk.manpages
  Cc: linux-kernel, linux-man, pi3orama, lizefan, Wang Nan

Signed-off-by: Wang Nan <wangnan0@huawei.com>
---
 man2/perf_event_open.2 | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index b232cba..942a410 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -234,8 +234,10 @@ struct perf_event_attr {
           mmap2          :  1,  /* include mmap with inode data */
           comm_exec      :  1,  /* flag comm events that are due to exec */
           use_clockid    :  1,  /* use clockid for time fields */
+          context_switch :  1,  /* context switch data */
+          write_backward :  1,  /* Write ring buffer from end to beginning */
 
-          __reserved_1   : 38;
+          __reserved_1   : 36;
 
     union {
         __u32 wakeup_events;    /* wakeup every n events */
@@ -1105,6 +1107,30 @@ field.
 This can make it easier to correlate perf sample times with
 timestamps generated by other tools.
 .TP
+.IR "write_backward" " (since Linux 4.6)"
+.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-5-git-send-email-wangnan0@huawei.com)
+This makes the resuling event use a backward ring-buffer, which
+writes samples from the end of the ring-buffer.
+
+It is not allowed to connect events with backward and forward
+ring-buffer settings together using
+.B PERF_EVENT_IOC_SET_OUTPUT.
+
+Backward ring-buffer is useful when the ring-buffer is overwritable
+(created by readonly
+.BR mmap (2)
+). In this case,
+.IR data_tail
+is useless,
+.IR data_head
+points to the head of the most recent sample in a backward
+ring-buffer. It is easy to iterate over the whole ring-buffer by reading
+samples one by one because size of a sample can be found from decoding
+its header. In contract, in a forward overwritable ring-buffer, the only
+information is the end of the most recent sample which is pointed by
+.IR data_head,
+but the size of a sample can't be determined from the end of it.
+.TP
 .IR "wakeup_events" ", " "wakeup_watermark"
 This union sets how many samples
 .RI ( wakeup_events )
@@ -1634,7 +1660,9 @@ And vice versa:
 .TP
 .I data_head
 This points to the head of the data section.
-The value continuously increases, it does not wrap.
+The value continuously increases (or decrease if
+.IR write_backward
+is set), it does not wrap.
 The value needs to be manually wrapped by the size of the mmap buffer
 before accessing the samples.
 
@@ -2581,6 +2609,24 @@ Starting with Linux 3.18,
 .B POLL_HUP
 is indicated if the event being monitored is attached to a different
 process and that process exits.
+.SS Reading from overwritable ring-buffer
+Reader is unable to update
+.IR data_tail
+if the mapping is not
+.BR PROT_WRITE .
+In this case, kernel will overwrite data without considering whether
+they are read or not, so ring-buffer is overwritable and
+behaves like a flight recorder. To read from an overwritable
+ring-buffer, setting
+.IR write_backward
+is suggested, or it would be hard to find a proper position to start
+decoding. In addition, ring-buffer should be paused before reading
+through
+.BR ioctl (2)
+with
+.B PERF_EVENT_IOC_PAUSE_OUTPUT
+to avoid racing between kernel and reader. Ring-buffer should be resumed
+after finish reading.
 .SS rdpmc instruction
 Starting with Linux 3.4 on x86, you can use the
 .\" commit c7206205d00ab375839bd6c7ddb247d600693c09
@@ -2693,6 +2739,13 @@ The file descriptors must all be on the same CPU.
 
 The argument specifies the desired file descriptor, or \-1 if
 output should be ignored.
+
+Two events with different
+.IR write_backward
+settings are not allowed to be connected together using
+.B PERF_EVENT_IOC_SET_OUTPUT.
+.B EINVAL
+is returned in this case.
 .TP
 .BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
 .\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH][manpages 2/2] perf_event_open.2: Document write_backward
@ 2016-03-28 10:16     ` Wang Nan
  0 siblings, 0 replies; 37+ messages in thread
From: Wang Nan @ 2016-03-28 10:16 UTC (permalink / raw)
  To: peterz-wEGCiKHe2LqWVfeAwA7xHQ, vince-yfjdyHUqu3OsTnJN9+BGXg,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-man-u79uwXL29TY76Z2rM5mHXA, pi3orama-9Onoh4P/yGk,
	lizefan-hv44wF8Li93QT0dZR+AlfA, Wang Nan

Signed-off-by: Wang Nan <wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 man2/perf_event_open.2 | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index b232cba..942a410 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -234,8 +234,10 @@ struct perf_event_attr {
           mmap2          :  1,  /* include mmap with inode data */
           comm_exec      :  1,  /* flag comm events that are due to exec */
           use_clockid    :  1,  /* use clockid for time fields */
+          context_switch :  1,  /* context switch data */
+          write_backward :  1,  /* Write ring buffer from end to beginning */
 
-          __reserved_1   : 38;
+          __reserved_1   : 36;
 
     union {
         __u32 wakeup_events;    /* wakeup every n events */
@@ -1105,6 +1107,30 @@ field.
 This can make it easier to correlate perf sample times with
 timestamps generated by other tools.
 .TP
+.IR "write_backward" " (since Linux 4.6)"
+.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-5-git-send-email-wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org)
+This makes the resuling event use a backward ring-buffer, which
+writes samples from the end of the ring-buffer.
+
+It is not allowed to connect events with backward and forward
+ring-buffer settings together using
+.B PERF_EVENT_IOC_SET_OUTPUT.
+
+Backward ring-buffer is useful when the ring-buffer is overwritable
+(created by readonly
+.BR mmap (2)
+). In this case,
+.IR data_tail
+is useless,
+.IR data_head
+points to the head of the most recent sample in a backward
+ring-buffer. It is easy to iterate over the whole ring-buffer by reading
+samples one by one because size of a sample can be found from decoding
+its header. In contract, in a forward overwritable ring-buffer, the only
+information is the end of the most recent sample which is pointed by
+.IR data_head,
+but the size of a sample can't be determined from the end of it.
+.TP
 .IR "wakeup_events" ", " "wakeup_watermark"
 This union sets how many samples
 .RI ( wakeup_events )
@@ -1634,7 +1660,9 @@ And vice versa:
 .TP
 .I data_head
 This points to the head of the data section.
-The value continuously increases, it does not wrap.
+The value continuously increases (or decrease if
+.IR write_backward
+is set), it does not wrap.
 The value needs to be manually wrapped by the size of the mmap buffer
 before accessing the samples.
 
@@ -2581,6 +2609,24 @@ Starting with Linux 3.18,
 .B POLL_HUP
 is indicated if the event being monitored is attached to a different
 process and that process exits.
+.SS Reading from overwritable ring-buffer
+Reader is unable to update
+.IR data_tail
+if the mapping is not
+.BR PROT_WRITE .
+In this case, kernel will overwrite data without considering whether
+they are read or not, so ring-buffer is overwritable and
+behaves like a flight recorder. To read from an overwritable
+ring-buffer, setting
+.IR write_backward
+is suggested, or it would be hard to find a proper position to start
+decoding. In addition, ring-buffer should be paused before reading
+through
+.BR ioctl (2)
+with
+.B PERF_EVENT_IOC_PAUSE_OUTPUT
+to avoid racing between kernel and reader. Ring-buffer should be resumed
+after finish reading.
 .SS rdpmc instruction
 Starting with Linux 3.4 on x86, you can use the
 .\" commit c7206205d00ab375839bd6c7ddb247d600693c09
@@ -2693,6 +2739,13 @@ The file descriptors must all be on the same CPU.
 
 The argument specifies the desired file descriptor, or \-1 if
 output should be ignored.
+
+Two events with different
+.IR write_backward
+settings are not allowed to be connected together using
+.B PERF_EVENT_IOC_SET_OUTPUT.
+.B EINVAL
+is returned in this case.
 .TP
 .BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
 .\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 3/4] perf core: Prepare writing into ring buffer from end
  2016-03-28  6:41 ` [PATCH 3/4] perf core: Prepare writing into ring buffer from end Wang Nan
@ 2016-03-29  0:25   ` Alexei Starovoitov
  2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Prepare writing into the ring-buffer from the end tip-bot for Wang Nan
  1 sibling, 0 replies; 37+ messages in thread
From: Alexei Starovoitov @ 2016-03-29  0:25 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li

On Mon, Mar 28, 2016 at 06:41:31AM +0000, Wang Nan wrote:
> Convert perf_output_begin to __perf_output_begin and make the later
> function able to write records from the end of the ring buffer.
> Following commits will utilize the 'backward' flag.
> 
> This is the core patch to support writing ring buffer backward, which
> would be introduced by following patch to support reading from
> overwritable ring buffer.
> 
> In theory, this patch should not introduce any extra performance
> overhead since we use always_inline.
> 
> When CONFIG_OPTIMIZE_INLINING is disabled, the output object is nearly
> identical to original one. See [1].
> 
> When CONFIG_OPTIMIZE_INLINING is enabled, the resuling object file becomes
> smaller:
> 
>  $ size kernel/events/ring_buffer.o*
>    text       data        bss        dec        hex    filename
>    4545          4          8       4557       11cd kernel/events/ring_buffer.o.new
>    4641          4          8       4653       122d kernel/events/ring_buffer.o.old
> 
> Performance result:
> 
> Calling 3000000 times of 'close(-1)', use gettimeofday() to check
> duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
> system calls. In ns.
> 
> Testing environment:
> 
>  CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
>  Kernel : v4.5.0
> 
>                      MEAN         STDVAR
>   BASE            800214.950    2853.083
>   PRE            2253846.700    9997.014
>   POST           2257495.540    8516.293
> 
> Where 'BASE' is pure performance without capturing. 'PRE' is test
> result of pure 'v4.5.0' kernel. 'POST' is test result after this
> patch. See [4] for detail experimental setup.
> 
> Considering the stdvar, this patch doesn't hurt performance.
> 
> For the detail of testing method, please refer to [2].
> 
> [1] http://lkml.kernel.org/g/56F52E83.70409@huawei.com
> [2] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com
> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Cc: He Kuang <hekuang@huawei.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Zefan Li <lizefan@huawei.com>
> Cc: pi3orama@163.com
> ---
>  kernel/events/ring_buffer.c | 42 ++++++++++++++++++++++++++++++++++++------
>  1 file changed, 36 insertions(+), 6 deletions(-)

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
  2016-03-28 10:15     ` Wang Nan
@ 2016-03-29  0:27   ` Alexei Starovoitov
  2016-03-29  1:10     ` Wangnan (F)
  2016-03-29  2:05     ` [PATCH 1/4 fix] " Wang Nan
  2016-03-29 12:54   ` [PATCH 1/4] " Peter Zijlstra
  2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer tip-bot for Wang Nan
  3 siblings, 2 replies; 37+ messages in thread
From: Alexei Starovoitov @ 2016-03-29  0:27 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li

On Mon, Mar 28, 2016 at 06:41:29AM +0000, Wang Nan wrote:
> Add new ioctl() to pause/resume ring-buffer output.
> 
> In some situations we want to read from ring buffer only when we
> ensure nothing can write to the ring buffer during reading. Without
> this patch we have to turn off all events attached to this ring buffer
> to achieve this.
> 
> This patch is for supporting overwrite ring buffer. Following
> commits will introduce new methods support reading from overwrite ring
> buffer. Before reading, caller must ensure the ring buffer is frozen, or
> the reading is unreliable.
> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Cc: He Kuang <hekuang@huawei.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Zefan Li <lizefan@huawei.com>
> Cc: pi3orama@163.com
> ---
>  include/uapi/linux/perf_event.h |  1 +
>  kernel/events/core.c            | 13 +++++++++++++
>  kernel/events/internal.h        | 11 +++++++++++
>  kernel/events/ring_buffer.c     |  7 ++++++-
>  4 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 1afe962..a3c1903 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -401,6 +401,7 @@ struct perf_event_attr {
>  #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
>  #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
>  #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
> +#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
>  
>  enum perf_event_ioc_flags {
>  	PERF_IOC_FLAG_GROUP		= 1U << 0,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index de24fbc..cb47da3 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4341,6 +4341,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>  	case PERF_EVENT_IOC_SET_BPF:
>  		return perf_event_set_bpf_prog(event, arg);
>  
> +	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
> +		struct ring_buffer *rb;
> +
> +		rcu_read_lock();
> +		rb = rcu_dereference(event->rb);
> +		if (!event->rb) {

should have been 'if (!rb)', right?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
  2016-03-28 10:16     ` Wang Nan
@ 2016-03-29  0:28   ` Alexei Starovoitov
  2016-03-29  2:01   ` Wangnan (F)
  2016-03-29 14:04   ` Peter Zijlstra
  3 siblings, 0 replies; 37+ messages in thread
From: Alexei Starovoitov @ 2016-03-29  0:28 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li

On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:
> This patch introduces 'write_backward' bit to perf_event_attr, which
> controls the direction of a ring buffer. After set, the corresponding
> ring buffer is written from end to beginning. This feature is design to
> support reading from overwritable ring buffer.
> 
> Ring buffer can be created by mapping a perf event fd. Kernel puts event
> records into ring buffer, user programs like perf fetch them from
> address returned by mmap(). To prevent racing between kernel and perf,
> they communicate to each other through 'head' and 'tail' pointers.
> Kernel maintains 'head' pointer, points it to the next free area (tail
> of the last record). Perf maintains 'tail' pointer, points it to the
> tail of last consumed record (record has already been fetched). Kernel
> determines the available space in a ring buffer using these two
> pointers, prevents to overwrite unfetched records.
> 
> By mapping without 'PROT_WRITE', an overwritable ring buffer is created.
> Different from normal ring buffer, perf is unable to maintain 'tail'
> pointer because writing is forbidden. Therefore, for this type of ring
> buffers, kernel overwrite old records unconditionally, works like flight
> recorder. This feature would be useful if reading from overwritable ring
> buffer were as easy as reading from normal ring buffer. However,
> there's an obscure problem.
> 
> The following figure demonstrates the state of an overwritable ring
> buffer which is nearly full. In this figure, the 'head' pointer points
> to the end of last record, and a long record 'E' is pending. For a
> normal ring buffer, a 'tail' pointer would have pointed to position (X),
> so kernel knows there's no more space in the ring buffer. However, for
> an overwritable ring buffer, kernel doesn't care the 'tail' pointer.
> 
>    (X)                              head
>     .                                |
>     .                                V
>     +------+-------+----------+------+---+
>     |A....A|B.....B|C........C|D....D|   |
>     +------+-------+----------+------+---+
> 
> After writing record 'E', record 'A' is overwritten.
> 
>       head
>        |
>        V
>     +--+---+-------+----------+------+---+
>     |.E|..A|B.....B|C........C|D....D|E..|
>     +--+---+-------+----------+------+---+
> 
> Now perf decides to read from this ring buffer. However, none of the
> the two natural positions, 'head' and the start of this ring buffer,
> are pointing to the head of a record. Even perf can read the full ring
> buffer, it is unable to find the position to start decoding.
> 
> The first attempt tries to solve this problem AFAIK can be found from
> [1]. It makes kernel to maintain 'tail' pointer: updates it when ring
> buffer is half full. However, this approach introduces overhead to
> fast path. Test result shows a 1% overhead [2]. In addition, this method
> utilizes no more tham 50% records.
> 
> Another attempt can be found from [3], which allow putting the size of
> an event at the end of each record. This approach allows perf to find
> records in a backword manner from 'head' pointer by reading size of a
> record from its tail. However, because of alignment requirement, it
> needs 8 bytes to record the size of a record, which is a huge waste. Its
> performance is also not good, because more data need to be written.
> This approach also introduces some extra branch instructions to fast
> path.
> 
> 'write_backward' is a better solution to this problem.
> 
> Following figure demonstrates the state of the overwritable ring buffer
> when 'write_backward' is set before overwriting:
> 
>        head
>         |
>         V
>     +---+------+----------+-------+------+
>     |   |D....D|C........C|B.....B|A....A|
>     +---+------+----------+-------+------+
> 
> and after overwriting:
>                                      head
>                                       |
>                                       V
>     +---+------+----------+-------+---+--+
>     |..E|D....D|C........C|B.....B|A..|E.|
>     +---+------+----------+-------+---+--+
> 
> In each situation, 'head' points to the beginning of the newest record.
> From this record, perf can iterate over the full ring buffer, fetching
> as mush records as possible one by one.
> 
> The only limitation needs to consider is back-to-back reading. Due to
> the non-deterministic of user program, it is impossible to ensure the
> ring buffer keeps stable during reading. Consider an extreme situation:
> perf is scheduled out after reading record 'D', then a burst of events
> come, eat up the whole ring buffer (one or multiple rounds), but 'head'
> pointer happends to be at the same position when perf comes back.
> Continue reading after 'D' is incorrect now.
> 
> To prevent this problem, we need to find a way to ensure the ring buffer
> is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is
> suggested because its overhead is lower than
> ioctl(PERF_EVENT_IOC_ENABLE).
> 
> This patch utilizes event's default overflow_handler introduced
> previously. perf_event_output_backward() is created as the default
> overflow handler for backward ring buffers. To avoid extra overhead to
> fast path, original perf_event_output() becomes __perf_event_output()
> and marked '__always_inline'. In theory, there's no extra overhead
> introduced to fast path.
> 
> Performance result:
> 
> Calling 3000000 times of 'close(-1)', use gettimeofday() to check
> duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
> system calls. In ns.
> 
> Testing environment:
> 
>  CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
>  Kernel : v4.5.0
>                    MEAN         STDVAR
> BASE            800214.950    2853.083
> PRE1           2253846.700    9997.014
> PRE2           2257495.540    8516.293
> POST           2250896.100    8933.921
> 
> Where 'BASE' is pure performance without capturing. 'PRE1' is test
> result of pure 'v4.5.0' kernel. 'PRE2' is test result before this
> patch. 'POST' is test result after this patch. See [4] for detail
> experimental setup.
> 
> Considering the stdvar, this patch doesn't introduce performance
> overhead to fast path.
> 
> [1] http://lkml.iu.edu/hypermail/linux/kernel/1304.1/04584.html
> [2] http://lkml.iu.edu/hypermail/linux/kernel/1307.1/00535.html
> [3] http://lkml.iu.edu/hypermail/linux/kernel/1512.0/01265.html
> [4] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com
> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Cc: He Kuang <hekuang@huawei.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Zefan Li <lizefan@huawei.com>
> Cc: pi3orama@163.com
> ---
>  include/linux/perf_event.h      | 28 +++++++++++++++++++++---
>  include/uapi/linux/perf_event.h |  3 ++-
>  kernel/events/core.c            | 48 ++++++++++++++++++++++++++++++++++++-----
>  kernel/events/ring_buffer.c     | 14 ++++++++++++
>  4 files changed, 84 insertions(+), 9 deletions(-)

Very useful feature. Looks good.
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-29  0:27   ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer Alexei Starovoitov
@ 2016-03-29  1:10     ` Wangnan (F)
  2016-03-29  2:05     ` [PATCH 1/4 fix] " Wang Nan
  1 sibling, 0 replies; 37+ messages in thread
From: Wangnan (F) @ 2016-03-29  1:10 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li



On 2016/3/29 8:27, Alexei Starovoitov wrote:
> On Mon, Mar 28, 2016 at 06:41:29AM +0000, Wang Nan wrote:
>> Add new ioctl() to pause/resume ring-buffer output.
>>
>> In some situations we want to read from ring buffer only when we
>> ensure nothing can write to the ring buffer during reading. Without
>> this patch we have to turn off all events attached to this ring buffer
>> to achieve this.
>>
>> This patch is for supporting overwrite ring buffer. Following
>> commits will introduce new methods support reading from overwrite ring
>> buffer. Before reading, caller must ensure the ring buffer is frozen, or
>> the reading is unreliable.
>>
>> Signed-off-by: Wang Nan <wangnan0@huawei.com>
>> Cc: He Kuang <hekuang@huawei.com>
>> Cc: Alexei Starovoitov <ast@kernel.org>
>> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
>> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
>> Cc: Jiri Olsa <jolsa@kernel.org>
>> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
>> Cc: Namhyung Kim <namhyung@kernel.org>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Zefan Li <lizefan@huawei.com>
>> Cc: pi3orama@163.com
>> ---
>>   include/uapi/linux/perf_event.h |  1 +
>>   kernel/events/core.c            | 13 +++++++++++++
>>   kernel/events/internal.h        | 11 +++++++++++
>>   kernel/events/ring_buffer.c     |  7 ++++++-
>>   4 files changed, 31 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 1afe962..a3c1903 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -401,6 +401,7 @@ struct perf_event_attr {
>>   #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
>>   #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
>>   #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
>> +#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
>>   
>>   enum perf_event_ioc_flags {
>>   	PERF_IOC_FLAG_GROUP		= 1U << 0,
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index de24fbc..cb47da3 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -4341,6 +4341,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
>>   	case PERF_EVENT_IOC_SET_BPF:
>>   		return perf_event_set_bpf_prog(event, arg);
>>   
>> +	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
>> +		struct ring_buffer *rb;
>> +
>> +		rcu_read_lock();
>> +		rb = rcu_dereference(event->rb);
>> +		if (!event->rb) {
> should have been 'if (!rb)', right?

Good catch. Thank you!

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
  2016-03-28 10:16     ` Wang Nan
  2016-03-29  0:28   ` [PATCH 4/4] perf core: Add backward attribute to perf event Alexei Starovoitov
@ 2016-03-29  2:01   ` Wangnan (F)
  2016-03-29  4:59     ` Alexei Starovoitov
  2016-03-29 14:04   ` Peter Zijlstra
  3 siblings, 1 reply; 37+ messages in thread
From: Wangnan (F) @ 2016-03-29  2:01 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li



On 2016/3/28 14:41, Wang Nan wrote:

[SNIP]

>
> To prevent this problem, we need to find a way to ensure the ring buffer
> is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is
> suggested because its overhead is lower than
> ioctl(PERF_EVENT_IOC_ENABLE).
>

Add comment:

By carefully verifying 'header' pointer, reader can avoid pausing the
ring-buffer. For example:

     /* A union of all possible events */
     union perf_event event;

     p = head = perf_mmap__read_head();
     while (true) {
         /* copy header of next event */
         fetch(&event.header, p, sizeof(event.header));

         /* read 'head' pointer */
         head = perf_mmap__read_head();

         /* check overwritten: is the header good? */
         if (!verify(sizeof(event.header), p, head))
             break;

         /* copy the whole event */
         fetch(&event, p, event.header.size);

         /* read 'head' pointer again */
         head = perf_mmap__read_head();

         /* is the whole event good? */
         if (!verify(event.header.size, p, head))
             break;
         p += event.header.size;
     }

However, the overhead is high because:

  a) In-place decoding is unsafe. Copy-verifying-decode is required.
  b) Fetching 'head' pointer requires additional synchronization.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/4 fix] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-29  0:27   ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer Alexei Starovoitov
  2016-03-29  1:10     ` Wangnan (F)
@ 2016-03-29  2:05     ` Wang Nan
  2016-03-29  4:39       ` Alexei Starovoitov
  1 sibling, 1 reply; 37+ messages in thread
From: Wang Nan @ 2016-03-29  2:05 UTC (permalink / raw)
  To: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra
  Cc: linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Wang Nan, Zefan Li

Add new ioctl() to pause/resume ring-buffer output.

In some situations we want to read from ring buffer only when we
ensure nothing can write to the ring buffer during reading. Without
this patch we have to turn off all events attached to this ring buffer
to achieve this.

This patch is for supporting overwrite ring buffer. Following
commits will introduce new methods support reading from overwrite ring
buffer. Before reading, caller must ensure the ring buffer is frozen, or
the reading is unreliable.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 13 +++++++++++++
 kernel/events/internal.h        | 11 +++++++++++
 kernel/events/ring_buffer.c     |  7 ++++++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..a3c1903 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -401,6 +401,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index de24fbc..a1636d3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4341,6 +4341,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_BPF:
 		return perf_event_set_bpf_prog(event, arg);
 
+	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+		struct ring_buffer *rb;
+
+		rcu_read_lock();
+		rb = rcu_dereference(event->rb);
+		if (!rb) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		rb_toggle_paused(rb, !!arg);
+		rcu_read_unlock();
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c..6a93d1b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -18,6 +18,7 @@ struct ring_buffer {
 #endif
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
+	int				paused;		/* can write into ring buffer */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 
@@ -65,6 +66,16 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	rb_free(rb);
 }
 
+static inline void
+rb_toggle_paused(struct ring_buffer *rb,
+		 bool pause)
+{
+	if (!pause && rb->nr_pages)
+		rb->paused = 0;
+	else
+		rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index c61f0cb..17de83b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,8 +125,11 @@ int perf_output_begin(struct perf_output_handle *handle,
 	if (unlikely(!rb))
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
+	if (unlikely(rb->paused)) {
+		if (rb->nr_pages)
+			local_inc(&rb->lost);
 		goto out;
+	}
 
 	handle->rb    = rb;
 	handle->event = event;
@@ -244,6 +247,8 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
 	init_irq_work(&rb->irq_work, rb_irq_work);
+
+	rb->paused = rb->nr_pages ? 0 : 1;
 }
 
 static void ring_buffer_put_async(struct ring_buffer *rb)
-- 
1.8.3.4

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4 fix] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-29  2:05     ` [PATCH 1/4 fix] " Wang Nan
@ 2016-03-29  4:39       ` Alexei Starovoitov
  0 siblings, 0 replies; 37+ messages in thread
From: Alexei Starovoitov @ 2016-03-29  4:39 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li

On Tue, Mar 29, 2016 at 02:05:07AM +0000, Wang Nan wrote:
> Add new ioctl() to pause/resume ring-buffer output.
> 
> In some situations we want to read from ring buffer only when we
> ensure nothing can write to the ring buffer during reading. Without
> this patch we have to turn off all events attached to this ring buffer
> to achieve this.
> 
> This patch is for supporting overwrite ring buffer. Following
> commits will introduce new methods support reading from overwrite ring
> buffer. Before reading, caller must ensure the ring buffer is frozen, or
> the reading is unreliable.
> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Cc: He Kuang <hekuang@huawei.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Zefan Li <lizefan@huawei.com>
> Cc: pi3orama@163.com
> ---
>  include/uapi/linux/perf_event.h |  1 +
>  kernel/events/core.c            | 13 +++++++++++++
>  kernel/events/internal.h        | 11 +++++++++++
>  kernel/events/ring_buffer.c     |  7 ++++++-
>  4 files changed, 31 insertions(+), 1 deletion(-)

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-29  2:01   ` Wangnan (F)
@ 2016-03-29  4:59     ` Alexei Starovoitov
  2016-03-29  5:59       ` Wangnan (F)
  0 siblings, 1 reply; 37+ messages in thread
From: Alexei Starovoitov @ 2016-03-29  4:59 UTC (permalink / raw)
  To: Wangnan (F)
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li

On Tue, Mar 29, 2016 at 10:01:24AM +0800, Wangnan (F) wrote:
> 
> 
> On 2016/3/28 14:41, Wang Nan wrote:
> 
> [SNIP]
> 
> >
> >To prevent this problem, we need to find a way to ensure the ring buffer
> >is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is
> >suggested because its overhead is lower than
> >ioctl(PERF_EVENT_IOC_ENABLE).
> >
> 
> Add comment:
> 
> By carefully verifying 'header' pointer, reader can avoid pausing the
> ring-buffer. For example:
> 
>     /* A union of all possible events */
>     union perf_event event;
> 
>     p = head = perf_mmap__read_head();
>     while (true) {
>         /* copy header of next event */
>         fetch(&event.header, p, sizeof(event.header));
> 
>         /* read 'head' pointer */
>         head = perf_mmap__read_head();
> 
>         /* check overwritten: is the header good? */
>         if (!verify(sizeof(event.header), p, head))
>             break;
> 
>         /* copy the whole event */
>         fetch(&event, p, event.header.size);
> 
>         /* read 'head' pointer again */
>         head = perf_mmap__read_head();
> 
>         /* is the whole event good? */
>         if (!verify(event.header.size, p, head))
>             break;
>         p += event.header.size;
>     }
> 
> However, the overhead is high because:
> 
>  a) In-place decoding is unsafe. Copy-verifying-decode is required.
>  b) Fetching 'head' pointer requires additional synchronization.

Such trick may work, but pause is needed for more than stability
of reading. When we collect the events into overwrite buffer
we're waiting for some other trigger (like all cpu utilization
spike or just one cpu running and all others are idle) and when
it happens the buffer has valuable info from the past. At this
point new events are no longer interesting and buffer should
be paused, events read and unpaused until next trigger comes.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-29  4:59     ` Alexei Starovoitov
@ 2016-03-29  5:59       ` Wangnan (F)
  0 siblings, 0 replies; 37+ messages in thread
From: Wangnan (F) @ 2016-03-29  5:59 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, Peter Zijlstra,
	linux-kernel, Brendan Gregg, He Kuang, Jiri Olsa,
	Masami Hiramatsu, Namhyung Kim, pi3orama, Zefan Li



On 2016/3/29 12:59, Alexei Starovoitov wrote:
> On Tue, Mar 29, 2016 at 10:01:24AM +0800, Wangnan (F) wrote:
>>
>> On 2016/3/28 14:41, Wang Nan wrote:
>>
>> [SNIP]
>>
>>> To prevent this problem, we need to find a way to ensure the ring buffer
>>> is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is
>>> suggested because its overhead is lower than
>>> ioctl(PERF_EVENT_IOC_ENABLE).
>>>
>> Add comment:
>>
>> By carefully verifying 'header' pointer, reader can avoid pausing the
>> ring-buffer. For example:
>>
>>      /* A union of all possible events */
>>      union perf_event event;
>>
>>      p = head = perf_mmap__read_head();
>>      while (true) {
>>          /* copy header of next event */
>>          fetch(&event.header, p, sizeof(event.header));
>>
>>          /* read 'head' pointer */
>>          head = perf_mmap__read_head();
>>
>>          /* check overwritten: is the header good? */
>>          if (!verify(sizeof(event.header), p, head))
>>              break;
>>
>>          /* copy the whole event */
>>          fetch(&event, p, event.header.size);
>>
>>          /* read 'head' pointer again */
>>          head = perf_mmap__read_head();
>>
>>          /* is the whole event good? */
>>          if (!verify(event.header.size, p, head))
>>              break;
>>          p += event.header.size;
>>      }
>>
>> However, the overhead is high because:
>>
>>   a) In-place decoding is unsafe. Copy-verifying-decode is required.
>>   b) Fetching 'head' pointer requires additional synchronization.
> Such trick may work, but pause is needed for more than stability
> of reading. When we collect the events into overwrite buffer
> we're waiting for some other trigger (like all cpu utilization
> spike or just one cpu running and all others are idle) and when
> it happens the buffer has valuable info from the past. At this
> point new events are no longer interesting and buffer should
> be paused, events read and unpaused until next trigger comes.

Agree. I just want to provide an alternative method.
I'm trying to finger out pausing is not mandatory
but highly recommended in man page and commit
messages.

Thank you.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
  2016-03-28 10:15     ` Wang Nan
  2016-03-29  0:27   ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer Alexei Starovoitov
@ 2016-03-29 12:54   ` Peter Zijlstra
  2016-03-29 12:55     ` Peter Zijlstra
  2016-03-30  1:57     ` Wangnan (F)
  2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer tip-bot for Wang Nan
  3 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2016-03-29 12:54 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li

On Mon, Mar 28, 2016 at 06:41:29AM +0000, Wang Nan wrote:
> Add new ioctl() to pause/resume ring-buffer output.
> 
> In some situations we want to read from ring buffer only when we
> ensure nothing can write to the ring buffer during reading. Without
> this patch we have to turn off all events attached to this ring buffer
> to achieve this.
> 
> This patch is for supporting overwrite ring buffer. Following
> commits will introduce new methods support reading from overwrite ring
> buffer. Before reading, caller must ensure the ring buffer is frozen, or
> the reading is unreliable.
> 
> Signed-off-by: Wang Nan <wangnan0@huawei.com>

I made the below changes.

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4346,7 +4346,7 @@ static long _perf_ioctl(struct perf_even
 
 		rcu_read_lock();
 		rb = rcu_dereference(event->rb);
-		if (!event->rb) {
+		if (!event->rb || !event->nr_pages) {
 			rcu_read_unlock();
 			return -EINVAL;
 		}
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -66,9 +66,7 @@ static inline void rb_free_rcu(struct rc
 	rb_free(rb);
 }
 
-static inline void
-rb_toggle_paused(struct ring_buffer *rb,
-		 bool pause)
+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
 {
 	if (!pause && rb->nr_pages)
 		rb->paused = 0;
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -248,7 +248,12 @@ ring_buffer_init(struct ring_buffer *rb,
 	spin_lock_init(&rb->event_lock);
 	init_irq_work(&rb->irq_work, rb_irq_work);
 
-	rb->paused = rb->nr_pages ? 0 : 1;
+	/*
+	 * perf_output_begin() only checks rb->paused, therefore
+	 * rb->paused must be true if we have no pages for output.
+	 */
+	if (!rb->nr_pages)
+		rb->paused = 1;
 }
 
 static void ring_buffer_put_async(struct ring_buffer *rb)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-29 12:54   ` [PATCH 1/4] " Peter Zijlstra
@ 2016-03-29 12:55     ` Peter Zijlstra
  2016-03-30  1:57     ` Wangnan (F)
  1 sibling, 0 replies; 37+ messages in thread
From: Peter Zijlstra @ 2016-03-29 12:55 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li

On Tue, Mar 29, 2016 at 02:54:23PM +0200, Peter Zijlstra wrote:
> +++ b/kernel/events/core.c
> @@ -4346,7 +4346,7 @@ static long _perf_ioctl(struct perf_even
>  
>  		rcu_read_lock();
>  		rb = rcu_dereference(event->rb);
> -		if (!event->rb) {
> +		if (!event->rb || !event->nr_pages) {
>  			rcu_read_unlock();
>  			return -EINVAL;
>  		}

Clearly that should've been:

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4346,7 +4346,7 @@ static long _perf_ioctl(struct perf_even
 
 		rcu_read_lock();
 		rb = rcu_dereference(event->rb);
-		if (!event->rb || !event->nr_pages) {
+		if (!rb || !rb->nr_pages) {
 			rcu_read_unlock();
 			return -EINVAL;
 		}

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
                     ` (2 preceding siblings ...)
  2016-03-29  2:01   ` Wangnan (F)
@ 2016-03-29 14:04   ` Peter Zijlstra
  2016-03-30  2:28     ` Wangnan (F)
  2016-04-07  9:45     ` Wangnan (F)
  3 siblings, 2 replies; 37+ messages in thread
From: Peter Zijlstra @ 2016-03-29 14:04 UTC (permalink / raw)
  To: Wang Nan
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li

On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:

Could you maybe write a perf/tests thingy for this so that _some_
userspace exists that exercises this new code?


>  int perf_output_begin(struct perf_output_handle *handle,
>  		      struct perf_event *event, unsigned int size)
>  {
> +	if (unlikely(is_write_backward(event)))
> +		return __perf_output_begin(handle, event, size, true);
>  	return __perf_output_begin(handle, event, size, false);
>  }

Would something like:

int perf_output_begin(...)
{
	if (unlikely(is_write_backward(event))
		return perf_output_begin_backward(...);
	return perf_output_begin_forward(...);
}

make sense; I'm not sure how much is still using this, but it seems
somewhat excessive to inline two copies of that thing into a single
function.

Alternatively; something like:

int perf_output_begin(...)
{
	return __perf_output_begin(..., unlikely(event->attr.backwards));
}

might make sense too.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-29 12:54   ` [PATCH 1/4] " Peter Zijlstra
  2016-03-29 12:55     ` Peter Zijlstra
@ 2016-03-30  1:57     ` Wangnan (F)
  2016-03-30  6:46       ` Peter Zijlstra
  1 sibling, 1 reply; 37+ messages in thread
From: Wangnan (F) @ 2016-03-30  1:57 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li



On 2016/3/29 20:54, Peter Zijlstra wrote:
> On Mon, Mar 28, 2016 at 06:41:29AM +0000, Wang Nan wrote:
>> Add new ioctl() to pause/resume ring-buffer output.
>>
>> In some situations we want to read from ring buffer only when we
>> ensure nothing can write to the ring buffer during reading. Without
>> this patch we have to turn off all events attached to this ring buffer
>> to achieve this.
>>
>> This patch is for supporting overwrite ring buffer. Following
>> commits will introduce new methods support reading from overwrite ring
>> buffer. Before reading, caller must ensure the ring buffer is frozen, or
>> the reading is unreliable.
>>
>> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> I made the below changes.

Can I add your SOB when I resend it?

> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4346,7 +4346,7 @@ static long _perf_ioctl(struct perf_even
>   
>   		rcu_read_lock();
>   		rb = rcu_dereference(event->rb);
> -		if (!event->rb) {
> +		if (!event->rb || !event->nr_pages) {
>   			rcu_read_unlock();
>   			return -EINVAL;
>   		}
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -66,9 +66,7 @@ static inline void rb_free_rcu(struct rc
>   	rb_free(rb);
>   }
>   
> -static inline void
> -rb_toggle_paused(struct ring_buffer *rb,
> -		 bool pause)
> +static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
>   {
>   	if (!pause && rb->nr_pages)
>   		rb->paused = 0;
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -248,7 +248,12 @@ ring_buffer_init(struct ring_buffer *rb,
>   	spin_lock_init(&rb->event_lock);
>   	init_irq_work(&rb->irq_work, rb_irq_work);
>   
> -	rb->paused = rb->nr_pages ? 0 : 1;
> +	/*
> +	 * perf_output_begin() only checks rb->paused, therefore
> +	 * rb->paused must be true if we have no pages for output.
> +	 */
> +	if (!rb->nr_pages)
> +		rb->paused = 1;
>   }

I still think we need to explicitly set rb->paused to 0
when rb->nr_pages is non-zero to avoid further improvement
re-init an old 'struct ring_buffer':

         rb->paused = 0;
         if (unlikely(!rb->nr_pages))
                 rb->paused = 1;

Thank you.

>   
>   static void ring_buffer_put_async(struct ring_buffer *rb)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-29 14:04   ` Peter Zijlstra
@ 2016-03-30  2:28     ` Wangnan (F)
  2016-03-30  2:38       ` Wangnan (F)
  2016-04-07  9:45     ` Wangnan (F)
  1 sibling, 1 reply; 37+ messages in thread
From: Wangnan (F) @ 2016-03-30  2:28 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li



On 2016/3/29 22:04, Peter Zijlstra wrote:
> On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:
>
> Could you maybe write a perf/tests thingy for this so that _some_
> userspace exists that exercises this new code?
>
>
>>   int perf_output_begin(struct perf_output_handle *handle,
>>   		      struct perf_event *event, unsigned int size)
>>   {
>> +	if (unlikely(is_write_backward(event)))
>> +		return __perf_output_begin(handle, event, size, true);
>>   	return __perf_output_begin(handle, event, size, false);
>>   }
> Would something like:
>
> int perf_output_begin(...)
> {
> 	if (unlikely(is_write_backward(event))
> 		return perf_output_begin_backward(...);
> 	return perf_output_begin_forward(...);
> }
>
> make sense; I'm not sure how much is still using this, but it seems
> somewhat excessive to inline two copies of that thing into a single
> function.

perf_output_begin is useful:

$ grep perf_output_begin ./kernel -r
./kernel/events/ring_buffer.c:     * See perf_output_begin().
./kernel/events/ring_buffer.c:int perf_output_begin(struct 
perf_output_handle *handle,
./kernel/events/ring_buffer.c:     * perf_output_begin() only checks 
rb->paused, therefore
./kernel/events/core.c:    if (perf_output_begin(&handle, event, 
header.size))
./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
read_event.header.size);
./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
rec.header.size);
./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
se->event_id.header.size);
./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
rec.header.size);

Events like PERF_RECORD_MMAP2 uses this function, so we still need to 
consider its overhead.

So I will use your first suggestion.

Thank you.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-30  2:28     ` Wangnan (F)
@ 2016-03-30  2:38       ` Wangnan (F)
  2016-04-05 14:05         ` Wangnan (F)
  0 siblings, 1 reply; 37+ messages in thread
From: Wangnan (F) @ 2016-03-30  2:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li



On 2016/3/30 10:28, Wangnan (F) wrote:
>
>
> On 2016/3/29 22:04, Peter Zijlstra wrote:
>> On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:
>>
>> Could you maybe write a perf/tests thingy for this so that _some_
>> userspace exists that exercises this new code?
>>
>>
>>>   int perf_output_begin(struct perf_output_handle *handle,
>>>                 struct perf_event *event, unsigned int size)
>>>   {
>>> +    if (unlikely(is_write_backward(event)))
>>> +        return __perf_output_begin(handle, event, size, true);
>>>       return __perf_output_begin(handle, event, size, false);
>>>   }
>> Would something like:
>>
>> int perf_output_begin(...)
>> {
>>     if (unlikely(is_write_backward(event))
>>         return perf_output_begin_backward(...);
>>     return perf_output_begin_forward(...);
>> }
>>
>> make sense; I'm not sure how much is still using this, but it seems
>> somewhat excessive to inline two copies of that thing into a single
>> function.
>
> perf_output_begin is useful:
>
> $ grep perf_output_begin ./kernel -r
> ./kernel/events/ring_buffer.c:     * See perf_output_begin().
> ./kernel/events/ring_buffer.c:int perf_output_begin(struct 
> perf_output_handle *handle,
> ./kernel/events/ring_buffer.c:     * perf_output_begin() only checks 
> rb->paused, therefore
> ./kernel/events/core.c:    if (perf_output_begin(&handle, event, 
> header.size))
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
> read_event.header.size);
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
> rec.header.size);
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
> se->event_id.header.size);
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event,
> ./kernel/events/core.c:    ret = perf_output_begin(&handle, event, 
> rec.header.size);
>
> Events like PERF_RECORD_MMAP2 uses this function, so we still need to 
> consider its overhead.
>
> So I will use your first suggestion.
>

Sorry. Your second suggestion seems also good:

My implementation makes a big perf_output_begin(), but introduces only 
one load and one branch.

Your first suggestion introduces one load, one branch and one function call.

Your second suggestion introduces one load, and at least one (and at 
most three) branches.

I need some benchmarking result.

Thank you.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer
  2016-03-30  1:57     ` Wangnan (F)
@ 2016-03-30  6:46       ` Peter Zijlstra
  0 siblings, 0 replies; 37+ messages in thread
From: Peter Zijlstra @ 2016-03-30  6:46 UTC (permalink / raw)
  To: Wangnan (F)
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li

On Wed, Mar 30, 2016 at 09:57:02AM +0800, Wangnan (F) wrote:
> 
> 
> On 2016/3/29 20:54, Peter Zijlstra wrote:
> >On Mon, Mar 28, 2016 at 06:41:29AM +0000, Wang Nan wrote:
> >>Add new ioctl() to pause/resume ring-buffer output.
> >>
> >>In some situations we want to read from ring buffer only when we
> >>ensure nothing can write to the ring buffer during reading. Without
> >>this patch we have to turn off all events attached to this ring buffer
> >>to achieve this.
> >>
> >>This patch is for supporting overwrite ring buffer. Following
> >>commits will introduce new methods support reading from overwrite ring
> >>buffer. Before reading, caller must ensure the ring buffer is frozen, or
> >>the reading is unreliable.
> >>
> >>Signed-off-by: Wang Nan <wangnan0@huawei.com>
> >I made the below changes.
> 
> Can I add your SOB when I resend it?

No need, I've got it.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [tip:perf/core] perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer
  2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
                     ` (2 preceding siblings ...)
  2016-03-29 12:54   ` [PATCH 1/4] " Peter Zijlstra
@ 2016-03-31  9:26   ` tip-bot for Wang Nan
  3 siblings, 0 replies; 37+ messages in thread
From: tip-bot for Wang Nan @ 2016-03-31  9:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: jolsa, acme, namhyung, masami.hiramatsu.pt, jolsa, torvalds,
	eranian, pi3orama, ast, tglx, mingo, alexander.shishkin,
	wangnan0, hpa, brendan.d.gregg, lizefan, vincent.weaver, peterz,
	hekuang, linux-kernel

Commit-ID:  86e7972f690c1017fd086cdfe53d8524e68c661c
Gitweb:     http://git.kernel.org/tip/86e7972f690c1017fd086cdfe53d8524e68c661c
Author:     Wang Nan <wangnan0@huawei.com>
AuthorDate: Mon, 28 Mar 2016 06:41:29 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 31 Mar 2016 10:30:45 +0200

perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer

Add new ioctl() to pause/resume ring-buffer output.

In some situations we want to read from the ring-buffer only when we
ensure nothing can write to the ring-buffer during reading. Without
this patch we have to turn off all events attached to this ring-buffer
to achieve this.

This patch is a prerequisite to enable overwrite support for the
perf ring-buffer support. Following commits will introduce new methods
support reading from overwrite ring buffer. Before reading, caller
must ensure the ring buffer is frozen, or the reading is unreliable.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <pi3orama@163.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1459147292-239310-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 13 +++++++++++++
 kernel/events/internal.h        |  9 +++++++++
 kernel/events/ring_buffer.c     | 12 +++++++++++-
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..a3c1903 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -401,6 +401,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 243df4b..51386e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4379,6 +4379,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_BPF:
 		return perf_event_set_bpf_prog(event, arg);
 
+	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+		struct ring_buffer *rb;
+
+		rcu_read_lock();
+		rb = rcu_dereference(event->rb);
+		if (!rb || !rb->nr_pages) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		rb_toggle_paused(rb, !!arg);
+		rcu_read_unlock();
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2b229fd..2d67327 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -17,6 +17,7 @@ struct ring_buffer {
 #endif
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
+	int				paused;		/* can write into ring buffer */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 
@@ -64,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	rb_free(rb);
 }
 
+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
+{
+	if (!pause && rb->nr_pages)
+		rb->paused = 0;
+	else
+		rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 0ed4555..72d8127 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,8 +125,11 @@ int perf_output_begin(struct perf_output_handle *handle,
 	if (unlikely(!rb))
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
+	if (unlikely(rb->paused)) {
+		if (rb->nr_pages)
+			local_inc(&rb->lost);
 		goto out;
+	}
 
 	handle->rb    = rb;
 	handle->event = event;
@@ -241,6 +244,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
+
+	/*
+	 * perf_output_begin() only checks rb->paused, therefore
+	 * rb->paused must be true if we have no pages for output.
+	 */
+	if (!rb->nr_pages)
+		rb->paused = 1;
 }
 
 /*

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip:perf/core] perf/core: Set event's default ::overflow_handler()
  2016-03-28  6:41 ` [PATCH 2/4] perf core: Set event's default overflow_handler Wang Nan
@ 2016-03-31  9:26   ` tip-bot for Wang Nan
  0 siblings, 0 replies; 37+ messages in thread
From: tip-bot for Wang Nan @ 2016-03-31  9:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, wangnan0, mingo, hekuang, jolsa, alexander.shishkin,
	tglx, masami.hiramatsu.pt, hpa, namhyung, brendan.d.gregg, jolsa,
	ast, pi3orama, acme, linux-kernel, lizefan, torvalds,
	vincent.weaver, eranian

Commit-ID:  1879445dfa7bbd6fe21b09c5cc72f4934798afed
Gitweb:     http://git.kernel.org/tip/1879445dfa7bbd6fe21b09c5cc72f4934798afed
Author:     Wang Nan <wangnan0@huawei.com>
AuthorDate: Mon, 28 Mar 2016 06:41:30 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 31 Mar 2016 10:30:47 +0200

perf/core: Set event's default ::overflow_handler()

Set a default event->overflow_handler in perf_event_alloc() so don't
need to check event->overflow_handler in __perf_event_overflow().
Following commits can give a different default overflow_handler.

Initial idea comes from Peter:

  http://lkml.kernel.org/r/20130708121557.GA17211@twins.programming.kicks-ass.net

Since the default value of event->overflow_handler is not NULL, existing
'if (!overflow_handler)' checks need to be changed.

is_default_overflow_handler() is introduced for this.

No extra performance overhead is introduced into the hot path because in the
original code we still need to read this handler from memory. A conditional
branch is avoided so actually we remove some instructions.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <pi3orama@163.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1459147292-239310-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/hw_breakpoint.c   |  4 ++--
 arch/arm64/kernel/hw_breakpoint.c |  4 ++--
 include/linux/perf_event.h        |  6 ++++++
 kernel/events/core.c              | 14 ++++++++------
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 6284779..b8df458 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	info->address &= ~alignment_mask;
 	info->ctrl.len <<= offset;
 
-	if (!bp->overflow_handler) {
+	if (is_default_overflow_handler(bp)) {
 		/*
 		 * Mismatch breakpoints are required for single-stepping
 		 * breakpoints.
@@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 * mismatch breakpoint so we can single-step over the
 		 * watchpoint trigger.
 		 */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
 
 unlock:
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..4ef5373 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
 		perf_bp_event(bp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!bp->overflow_handler)
+		if (is_default_overflow_handler(bp))
 			step = 1;
 unlock:
 		rcu_read_unlock();
@@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
 		perf_bp_event(wp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			step = 1;
 
 unlock:
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 15588d4..4065ca2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -838,6 +838,12 @@ extern void perf_event_output(struct perf_event *event,
 				struct perf_sample_data *data,
 				struct pt_regs *regs);
 
+static inline bool
+is_default_overflow_handler(struct perf_event *event)
+{
+	return (event->overflow_handler == perf_event_output);
+}
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
 			   struct perf_sample_data *data,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 51386e8..8c3b35f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6628,10 +6628,7 @@ static int __perf_event_overflow(struct perf_event *event,
 		irq_work_queue(&event->pending);
 	}
 
-	if (event->overflow_handler)
-		event->overflow_handler(event, data, regs);
-	else
-		perf_event_output(event, data, regs);
+	event->overflow_handler(event, data, regs);
 
 	if (*perf_event_fasync(event) && event->pending_kill) {
 		event->pending_wakeup = 1;
@@ -8152,8 +8149,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		context = parent_event->overflow_handler_context;
 	}
 
-	event->overflow_handler	= overflow_handler;
-	event->overflow_handler_context = context;
+	if (overflow_handler) {
+		event->overflow_handler	= overflow_handler;
+		event->overflow_handler_context = context;
+	} else {
+		event->overflow_handler = perf_event_output;
+		event->overflow_handler_context = NULL;
+	}
 
 	perf_event__state_init(event);
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [tip:perf/core] perf/ring_buffer: Prepare writing into the ring-buffer from the end
  2016-03-28  6:41 ` [PATCH 3/4] perf core: Prepare writing into ring buffer from end Wang Nan
  2016-03-29  0:25   ` Alexei Starovoitov
@ 2016-03-31  9:26   ` tip-bot for Wang Nan
  1 sibling, 0 replies; 37+ messages in thread
From: tip-bot for Wang Nan @ 2016-03-31  9:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: namhyung, pi3orama, vincent.weaver, peterz, hpa, torvalds,
	linux-kernel, hekuang, jolsa, brendan.d.gregg,
	masami.hiramatsu.pt, mingo, alexander.shishkin, jolsa, ast, acme,
	tglx, eranian, wangnan0, lizefan

Commit-ID:  d1b26c70246bc72922ae61d9f972d5c2588409e7
Gitweb:     http://git.kernel.org/tip/d1b26c70246bc72922ae61d9f972d5c2588409e7
Author:     Wang Nan <wangnan0@huawei.com>
AuthorDate: Mon, 28 Mar 2016 06:41:31 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 31 Mar 2016 10:30:49 +0200

perf/ring_buffer: Prepare writing into the ring-buffer from the end

Convert perf_output_begin() to __perf_output_begin() and make the later
function able to write records from the end of the ring-buffer.

Following commits will utilize the 'backward' flag.

This is the core patch to support writing to the ring-buffer backwards,
which will be introduced by upcoming patches to support reading from
overwritable ring-buffers.

In theory, this patch should not introduce any extra performance
overhead since we use always_inline, but it does not hurt to double
check that assumption:

When CONFIG_OPTIMIZE_INLINING is disabled, the output object is nearly
identical to original one. See:

   http://lkml.kernel.org/g/56F52E83.70409@huawei.com

When CONFIG_OPTIMIZE_INLINING is enabled, the resuling object file becomes
smaller:

 $ size kernel/events/ring_buffer.o*
   text       data        bss        dec        hex    filename
   4641          4          8       4653       122d kernel/events/ring_buffer.o.old
   4545          4          8       4557       11cd kernel/events/ring_buffer.o.new

Performance testing results:

Calling 3000000 times of 'close(-1)', use gettimeofday() to check
duration.  Use 'perf record -o /dev/null -e raw_syscalls:*' to capture
system calls. In ns.

Testing environment:

 CPU    : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 Kernel : v4.5.0

                     MEAN         STDVAR
  BASE            800214.950    2853.083
  PRE            2253846.700    9997.014
  POST           2257495.540    8516.293

Where 'BASE' is pure performance without capturing. 'PRE' is test
result of pure 'v4.5.0' kernel. 'POST' is test result after this
patch.

Considering the stdvar, this patch doesn't hurt performance, within
noise margin.

For testing details, see:

  http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <pi3orama@163.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1459147292-239310-4-git-send-email-wangnan0@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 72d8127..60be55a 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -102,8 +102,21 @@ out:
 	preempt_enable();
 }
 
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_event *event, unsigned int size)
+static bool __always_inline
+ring_buffer_has_space(unsigned long head, unsigned long tail,
+		      unsigned long data_size, unsigned int size,
+		      bool backward)
+{
+	if (!backward)
+		return CIRC_SPACE(head, tail, data_size) >= size;
+	else
+		return CIRC_SPACE(tail, head, data_size) >= size;
+}
+
+static int __always_inline
+__perf_output_begin(struct perf_output_handle *handle,
+		    struct perf_event *event, unsigned int size,
+		    bool backward)
 {
 	struct ring_buffer *rb;
 	unsigned long tail, offset, head;
@@ -146,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
 	do {
 		tail = READ_ONCE(rb->user_page->data_tail);
 		offset = head = local_read(&rb->head);
-		if (!rb->overwrite &&
-		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-			goto fail;
+		if (!rb->overwrite) {
+			if (unlikely(!ring_buffer_has_space(head, tail,
+							    perf_data_size(rb),
+							    size, backward)))
+				goto fail;
+		}
 
 		/*
 		 * The above forms a control dependency barrier separating the
@@ -162,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
 		 * See perf_output_put_handle().
 		 */
 
-		head += size;
+		if (!backward)
+			head += size;
+		else
+			head -= size;
 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
 
+	if (backward) {
+		offset = head;
+		head = (u64)(-head);
+	}
+
 	/*
 	 * We rely on the implied barrier() by local_cmpxchg() to ensure
 	 * none of the data stores below can be lifted up by the compiler.
@@ -206,6 +230,12 @@ out:
 	return -ENOSPC;
 }
 
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, false);
+}
+
 unsigned int perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-30  2:38       ` Wangnan (F)
@ 2016-04-05 14:05         ` Wangnan (F)
  0 siblings, 0 replies; 37+ messages in thread
From: Wangnan (F) @ 2016-04-05 14:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li



On 2016/3/30 10:38, Wangnan (F) wrote:
>
>
> On 2016/3/30 10:28, Wangnan (F) wrote:
>>
>>
>> On 2016/3/29 22:04, Peter Zijlstra wrote:
>>> On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:
>>>
>>> Could you maybe write a perf/tests thingy for this so that _some_
>>> userspace exists that exercises this new code?
>>>
>>>
>>>>   int perf_output_begin(struct perf_output_handle *handle,
>>>>                 struct perf_event *event, unsigned int size)
>>>>   {
>>>> +    if (unlikely(is_write_backward(event)))
>>>> +        return __perf_output_begin(handle, event, size, true);
>>>>       return __perf_output_begin(handle, event, size, false);
>>>>   }
>>> Would something like:
>>>
>>> int perf_output_begin(...)
>>> {
>>>     if (unlikely(is_write_backward(event))
>>>         return perf_output_begin_backward(...);
>>>     return perf_output_begin_forward(...);
>>> }
>>>
>>> make sense; I'm not sure how much is still using this, but it seems
>>> somewhat excessive to inline two copies of that thing into a single
>>> function.
>>
>>

[SNIP]

>
> Sorry. Your second suggestion seems also good:
>
> My implementation makes a big perf_output_begin(), but introduces only 
> one load and one branch.
>
> Your first suggestion introduces one load, one branch and one function 
> call.
>
> Your second suggestion introduces one load, and at least one (and at 
> most three) branches.
>
> I need some benchmarking result.
>
> Thank you.

No obviously performance divergence among all 3 implementations.

Here are some numbers:

I tested the cost of generating PERF_RECORD_COMM event using prctl with
following code:

         ...
         gettimeofday(&tv1, NULL);
         for (i = 0; i < 1000 * 1000 * 3; i++) {
                 char proc_name[10];

                 snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
                 prctl(PR_SET_NAME, proc_name);
         }
         gettimeofday(&tv2, NULL);
         us1 = tv1.tv_sec * 1000000 + tv1.tv_usec;
         us2 = tv2.tv_sec * 1000000 + tv2.tv_usec;
         printf("%ld\n", us2 - us1);
         ...

Run this benchmark 100 time in each experiment. Bind benchmark to core 2
and perf to core 1 to ensure they are on a same CPU.

Result:

BASE    : execute without perf
4.5     : pure v4.5
TIP     : with only patch 1-3/4 in this patch set applied
BIGFUNC : the implementation in my original patch
FUNCCALL: the implememtation in Peter's first suggestion:
    int perf_output_begin(...)
    {
        if (unlikely(is_write_backward(event))
            return perf_output_begin_backward(...);
        return perf_output_begin_forward(...);
    }
BRANCH : the implememtation in Peter's second suggestion:
     int perf_output_begin(...)
     {
         return __perf_output_begin(..., unlikely(event->attr.backwards));
     }


'perf' is executed using:
  # perf record -o /dev/null --no-buildid-cache -e 
syscalls:sys_enter_read ...


Results:

              MEAN       STDVAR
BASE    : 1122968.85   33492.52
4.5     : 2714200.70   26231.69
TIP     : 2646260.46   32610.56
BIGFUNC : 2661308.46   52707.47
FUNCCALL: 2636061.10   52607.80
BRANCH  : 2651335.74   34910.04


Considering the stdvar, the performance result is nearly identical.

I'd like to choose 'BRANCH' because its code looks better.

Thank you.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH 4/4] perf core: Add backward attribute to perf event
  2016-03-29 14:04   ` Peter Zijlstra
  2016-03-30  2:28     ` Wangnan (F)
@ 2016-04-07  9:45     ` Wangnan (F)
  1 sibling, 0 replies; 37+ messages in thread
From: Wangnan (F) @ 2016-04-07  9:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Alexei Starovoitov, Arnaldo Carvalho de Melo, linux-kernel,
	Brendan Gregg, He Kuang, Jiri Olsa, Masami Hiramatsu,
	Namhyung Kim, pi3orama, Zefan Li



On 2016/3/29 22:04, Peter Zijlstra wrote:
> On Mon, Mar 28, 2016 at 06:41:32AM +0000, Wang Nan wrote:
>
> Could you maybe write a perf/tests thingy for this so that _some_
> userspace exists that exercises this new code?
>
>

Yes. Please see:

http://lkml.kernel.org/r/1460022180-61262-1-git-send-email-wangnan0@huawei.com

Thank you.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-10-21  8:56       ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-10-21  8:56 UTC (permalink / raw)
  To: Wang Nan, peterz, vince
  Cc: mtk.manpages, linux-kernel, linux-man, pi3orama, lizefan, Vince Weaver

Thanks for this patch, Wangnan.

Vince, do you have any comments?

Cheers,

Michael


On 03/28/2016 12:15 PM, Wang Nan wrote:
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> ---
>  man2/perf_event_open.2 | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
> index c90fc51..b232cba 100644
> --- a/man2/perf_event_open.2
> +++ b/man2/perf_event_open.2
> @@ -2719,6 +2719,17 @@ The argument is a BPF program file descriptor that was created by
>  a previous
>  .BR bpf (2)
>  system call.
> +.TP
> +.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.6)"
> +.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-2-git-send-email-wangnan0@huawei.com)
> +This allows pausing and resuming the event's ring-buffer. A
> +paused ring-buffer does not prevent samples generation, but simply
> +discards them. The discarded samples are considered lost, causes
> +.BR PERF_RECORD_LOST
> +to be generated when possible.
> +
> +The argument is an integer. Nonzero value pauses the ring-buffer,
> +zero value resumes the ring-buffer.
>  .SS Using prctl
>  A process can enable or disable all the event groups that are
>  attached to it using the
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-10-21  8:56       ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-10-21  8:56 UTC (permalink / raw)
  To: Wang Nan, peterz-wEGCiKHe2LqWVfeAwA7xHQ, vince-yfjdyHUqu3OsTnJN9+BGXg
  Cc: mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-man-u79uwXL29TY76Z2rM5mHXA, pi3orama-9Onoh4P/yGk,
	lizefan-hv44wF8Li93QT0dZR+AlfA, Vince Weaver

Thanks for this patch, Wangnan.

Vince, do you have any comments?

Cheers,

Michael


On 03/28/2016 12:15 PM, Wang Nan wrote:
> Signed-off-by: Wang Nan <wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
> ---
>  man2/perf_event_open.2 | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
> index c90fc51..b232cba 100644
> --- a/man2/perf_event_open.2
> +++ b/man2/perf_event_open.2
> @@ -2719,6 +2719,17 @@ The argument is a BPF program file descriptor that was created by
>  a previous
>  .BR bpf (2)
>  system call.
> +.TP
> +.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.6)"
> +.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-2-git-send-email-wangnan0-hv44wF8Li93QT0dZR+AlfA@public.gmane.org)
> +This allows pausing and resuming the event's ring-buffer. A
> +paused ring-buffer does not prevent samples generation, but simply
> +discards them. The discarded samples are considered lost, causes
> +.BR PERF_RECORD_LOST
> +to be generated when possible.
> +
> +The argument is an integer. Nonzero value pauses the ring-buffer,
> +zero value resumes the ring-buffer.
>  .SS Using prctl
>  A process can enable or disable all the event groups that are
>  attached to it using the
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 2/2] perf_event_open.2: Document write_backward
  2016-03-28 10:16     ` Wang Nan
  (?)
@ 2016-10-21  8:57     ` Michael Kerrisk (man-pages)
  -1 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-10-21  8:57 UTC (permalink / raw)
  To: Wang Nan, peterz, vince
  Cc: mtk.manpages, linux-kernel, linux-man, pi3orama, lizefan, Vince Weaver

Thanks for this patch, Wangnan.

Vince, do you have any comments?

Cheers,

Michael

On 03/28/2016 12:16 PM, Wang Nan wrote:
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> ---
>  man2/perf_event_open.2 | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
> index b232cba..942a410 100644
> --- a/man2/perf_event_open.2
> +++ b/man2/perf_event_open.2
> @@ -234,8 +234,10 @@ struct perf_event_attr {
>            mmap2          :  1,  /* include mmap with inode data */
>            comm_exec      :  1,  /* flag comm events that are due to exec */
>            use_clockid    :  1,  /* use clockid for time fields */
> +          context_switch :  1,  /* context switch data */
> +          write_backward :  1,  /* Write ring buffer from end to beginning */
>  
> -          __reserved_1   : 38;
> +          __reserved_1   : 36;
>  
>      union {
>          __u32 wakeup_events;    /* wakeup every n events */
> @@ -1105,6 +1107,30 @@ field.
>  This can make it easier to correlate perf sample times with
>  timestamps generated by other tools.
>  .TP
> +.IR "write_backward" " (since Linux 4.6)"
> +.\" commit ? (http://lkml.kernel.org/g/1459147292-239310-5-git-send-email-wangnan0@huawei.com)
> +This makes the resuling event use a backward ring-buffer, which
> +writes samples from the end of the ring-buffer.
> +
> +It is not allowed to connect events with backward and forward
> +ring-buffer settings together using
> +.B PERF_EVENT_IOC_SET_OUTPUT.
> +
> +Backward ring-buffer is useful when the ring-buffer is overwritable
> +(created by readonly
> +.BR mmap (2)
> +). In this case,
> +.IR data_tail
> +is useless,
> +.IR data_head
> +points to the head of the most recent sample in a backward
> +ring-buffer. It is easy to iterate over the whole ring-buffer by reading
> +samples one by one because size of a sample can be found from decoding
> +its header. In contract, in a forward overwritable ring-buffer, the only
> +information is the end of the most recent sample which is pointed by
> +.IR data_head,
> +but the size of a sample can't be determined from the end of it.
> +.TP
>  .IR "wakeup_events" ", " "wakeup_watermark"
>  This union sets how many samples
>  .RI ( wakeup_events )
> @@ -1634,7 +1660,9 @@ And vice versa:
>  .TP
>  .I data_head
>  This points to the head of the data section.
> -The value continuously increases, it does not wrap.
> +The value continuously increases (or decrease if
> +.IR write_backward
> +is set), it does not wrap.
>  The value needs to be manually wrapped by the size of the mmap buffer
>  before accessing the samples.
>  
> @@ -2581,6 +2609,24 @@ Starting with Linux 3.18,
>  .B POLL_HUP
>  is indicated if the event being monitored is attached to a different
>  process and that process exits.
> +.SS Reading from overwritable ring-buffer
> +Reader is unable to update
> +.IR data_tail
> +if the mapping is not
> +.BR PROT_WRITE .
> +In this case, kernel will overwrite data without considering whether
> +they are read or not, so ring-buffer is overwritable and
> +behaves like a flight recorder. To read from an overwritable
> +ring-buffer, setting
> +.IR write_backward
> +is suggested, or it would be hard to find a proper position to start
> +decoding. In addition, ring-buffer should be paused before reading
> +through
> +.BR ioctl (2)
> +with
> +.B PERF_EVENT_IOC_PAUSE_OUTPUT
> +to avoid racing between kernel and reader. Ring-buffer should be resumed
> +after finish reading.
>  .SS rdpmc instruction
>  Starting with Linux 3.4 on x86, you can use the
>  .\" commit c7206205d00ab375839bd6c7ddb247d600693c09
> @@ -2693,6 +2739,13 @@ The file descriptors must all be on the same CPU.
>  
>  The argument specifies the desired file descriptor, or \-1 if
>  output should be ignored.
> +
> +Two events with different
> +.IR write_backward
> +settings are not allowed to be connected together using
> +.B PERF_EVENT_IOC_SET_OUTPUT.
> +.B EINVAL
> +is returned in this case.
>  .TP
>  .BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
>  .\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
> 


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-10-21 14:37         ` Vince Weaver
  0 siblings, 0 replies; 37+ messages in thread
From: Vince Weaver @ 2016-10-21 14:37 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Wang Nan, peterz, vince, linux-kernel, linux-man, pi3orama,
	lizefan, Vince Weaver

On Fri, 21 Oct 2016, Michael Kerrisk (man-pages) wrote:

> Thanks for this patch, Wangnan.
> 
> Vince, do you have any comments?
> 

I was catching up chronologically and was still at 4.4, and this was 
still in my queue as a 4.6 change.  I think I thought the patches looked 
good at the time, but I'll revisit them (and write some sample code to 
test out the interface) and get back to you soon.

Vince

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-10-21 14:37         ` Vince Weaver
  0 siblings, 0 replies; 37+ messages in thread
From: Vince Weaver @ 2016-10-21 14:37 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Wang Nan, peterz-wEGCiKHe2LqWVfeAwA7xHQ,
	vince-yfjdyHUqu3OsTnJN9+BGXg,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-man-u79uwXL29TY76Z2rM5mHXA, pi3orama-9Onoh4P/yGk,
	lizefan-hv44wF8Li93QT0dZR+AlfA, Vince Weaver

On Fri, 21 Oct 2016, Michael Kerrisk (man-pages) wrote:

> Thanks for this patch, Wangnan.
> 
> Vince, do you have any comments?
> 

I was catching up chronologically and was still at 4.4, and this was 
still in my queue as a 4.6 change.  I think I thought the patches looked 
good at the time, but I'll revisit them (and write some sample code to 
test out the interface) and get back to you soon.

Vince
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
  2016-10-21 14:37         ` Vince Weaver
@ 2016-10-21 14:49           ` Michael Kerrisk (man-pages)
  -1 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-10-21 14:49 UTC (permalink / raw)
  To: Vince Weaver
  Cc: Wang Nan, Peter Zijlstra, Vince Weaver, lkml, linux-man,
	pi3orama, Li Zefan

Hi Vince,

On 21 October 2016 at 16:37, Vince Weaver <vincent.weaver@maine.edu> wrote:
> On Fri, 21 Oct 2016, Michael Kerrisk (man-pages) wrote:
>
>> Thanks for this patch, Wangnan.
>>
>> Vince, do you have any comments?
>>
>
> I was catching up chronologically and was still at 4.4, and this was
> still in my queue as a 4.6 change.  I think I thought the patches looked
> good at the time, but I'll revisit them (and write some sample code to
> test out the interface) and get back to you soon.

Note that Wang Nan sent out new patches today -- not sure if there
were any changes though.

Cheers,

Michael


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT
@ 2016-10-21 14:49           ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 37+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-10-21 14:49 UTC (permalink / raw)
  To: Vince Weaver
  Cc: Wang Nan, Peter Zijlstra, Vince Weaver, lkml, linux-man,
	pi3orama-9Onoh4P/yGk, Li Zefan

Hi Vince,

On 21 October 2016 at 16:37, Vince Weaver <vincent.weaver-e7X0jjDqjFGHXe+LvDLADg@public.gmane.org> wrote:
> On Fri, 21 Oct 2016, Michael Kerrisk (man-pages) wrote:
>
>> Thanks for this patch, Wangnan.
>>
>> Vince, do you have any comments?
>>
>
> I was catching up chronologically and was still at 4.4, and this was
> still in my queue as a 4.6 change.  I think I thought the patches looked
> good at the time, but I'll revisit them (and write some sample code to
> test out the interface) and get back to you soon.

Note that Wang Nan sent out new patches today -- not sure if there
were any changes though.

Cheers,

Michael


-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2016-10-21 14:50 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-28  6:41 [PATCH 0/4] perf core: Support reading from overwritable ring buffer Wang Nan
2016-03-28  6:41 ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume " Wang Nan
2016-03-28 10:15   ` [PATCH][manpages 1/2] perf_event_open.2: Document PERF_EVENT_IOC_PAUSE_OUTPUT Wang Nan
2016-03-28 10:15     ` Wang Nan
2016-10-21  8:56     ` Michael Kerrisk (man-pages)
2016-10-21  8:56       ` Michael Kerrisk (man-pages)
2016-10-21 14:37       ` Vince Weaver
2016-10-21 14:37         ` Vince Weaver
2016-10-21 14:49         ` Michael Kerrisk (man-pages)
2016-10-21 14:49           ` Michael Kerrisk (man-pages)
2016-03-29  0:27   ` [PATCH 1/4] perf core: Introduce new ioctl options to pause and resume ring buffer Alexei Starovoitov
2016-03-29  1:10     ` Wangnan (F)
2016-03-29  2:05     ` [PATCH 1/4 fix] " Wang Nan
2016-03-29  4:39       ` Alexei Starovoitov
2016-03-29 12:54   ` [PATCH 1/4] " Peter Zijlstra
2016-03-29 12:55     ` Peter Zijlstra
2016-03-30  1:57     ` Wangnan (F)
2016-03-30  6:46       ` Peter Zijlstra
2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer tip-bot for Wang Nan
2016-03-28  6:41 ` [PATCH 2/4] perf core: Set event's default overflow_handler Wang Nan
2016-03-31  9:26   ` [tip:perf/core] perf/core: Set event's default ::overflow_handler() tip-bot for Wang Nan
2016-03-28  6:41 ` [PATCH 3/4] perf core: Prepare writing into ring buffer from end Wang Nan
2016-03-29  0:25   ` Alexei Starovoitov
2016-03-31  9:26   ` [tip:perf/core] perf/ring_buffer: Prepare writing into the ring-buffer from the end tip-bot for Wang Nan
2016-03-28  6:41 ` [PATCH 4/4] perf core: Add backward attribute to perf event Wang Nan
2016-03-28 10:16   ` [PATCH][manpages 2/2] perf_event_open.2: Document write_backward Wang Nan
2016-03-28 10:16     ` Wang Nan
2016-10-21  8:57     ` Michael Kerrisk (man-pages)
2016-03-29  0:28   ` [PATCH 4/4] perf core: Add backward attribute to perf event Alexei Starovoitov
2016-03-29  2:01   ` Wangnan (F)
2016-03-29  4:59     ` Alexei Starovoitov
2016-03-29  5:59       ` Wangnan (F)
2016-03-29 14:04   ` Peter Zijlstra
2016-03-30  2:28     ` Wangnan (F)
2016-03-30  2:38       ` Wangnan (F)
2016-04-05 14:05         ` Wangnan (F)
2016-04-07  9:45     ` Wangnan (F)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.