All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance
@ 2022-01-21 13:06 Mukesh Ojha
  2022-01-21 14:08 ` John Ogness
  0 siblings, 1 reply; 4+ messages in thread
From: Mukesh Ojha @ 2022-01-21 13:06 UTC (permalink / raw)
  To: linux-kernel; +Cc: john.ogness, pmladek, senozhatsky, rostedt, Mukesh Ojha

From: Petr Mladek <pmladek@suse.com>

prb_next_seq() always iterates from the first known sequence number.
In the worst case, it might loop 8k times for 256kB buffer,
15k times for 512kB buffer, and 64k times for 2MB buffer.

It was reported that pooling and reading using syslog interface
might occupy 50% of CPU.

Speedup the search by storing @id of the last finalized descriptor.

The loop is still needed because the @id is stored and read in the best
effort way. An atomic variable is used to keep the @id consistent.
But the stores and reads are not serialized against each other.
The descriptor could get reused in the meantime. The related sequence
number will be used only when it is still valid.

An invalid value should be read _only_ when there is a flood of messages
and the ringbuffer is rapidly reused. The performance is the least
problem in this case.

Link: https://lore.kernel.org/lkml/YXlddJxLh77DKfIO@alley/T/#m43062e8b2a17f8dbc8c6ccdb8851fb0dbaabbb14
Reported-by: Chunlei Wang <chunlei.wang@mediatek.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
Changes against v2:
  Added the hunk suggested by John

Changes against v1:
  Read @seq by the last finalized @id directly in prb_next_seq() (John)

 kernel/printk/printk_ringbuffer.c | 48 +++++++++++++++++++++++++++++++++++----
 kernel/printk/printk_ringbuffer.h |  2 ++
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 8a7b736..297bc18 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -474,8 +474,11 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
 	 * state has been re-checked. A memcpy() for all of @desc
 	 * cannot be used because of the atomic_t @state_var field.
 	 */
-	memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
-	       sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
+	if (desc_out) {
+		memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
+			sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
+	}
+
 	if (seq_out)
 		*seq_out = info->seq; /* also part of desc_read:C */
 	if (caller_id_out)
@@ -528,7 +531,8 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
 	state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
 	d_state = get_desc_state(id, state_val);
 out:
-	atomic_long_set(&desc_out->state_var, state_val);
+	if (desc_out)
+		atomic_long_set(&desc_out->state_var, state_val);
 	return d_state;
 }
 
@@ -1449,6 +1453,7 @@ static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
 
 	atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
 			DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
+	atomic_long_set(&desc_ring->last_finalized_id, id);
 }
 
 /**
@@ -1657,7 +1662,11 @@ void prb_commit(struct prb_reserved_entry *e)
  */
 void prb_final_commit(struct prb_reserved_entry *e)
 {
+	struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
+
 	_prb_commit(e, desc_finalized);
+	/* Best effort to remember the last finalized @id. */
+	atomic_long_set(&desc_ring->last_finalized_id, e->id);
 }
 
 /*
@@ -2005,8 +2014,38 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
  */
 u64 prb_next_seq(struct printk_ringbuffer *rb)
 {
-	u64 seq = 0;
+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
+	enum desc_state d_state;
+	unsigned long id;
+	u64 seq;
+
+	/* Check if the cached @id still points to a valid @seq. */
+	id = atomic_long_read(&desc_ring->last_finalized_id);
+	d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
 
+	if (d_state == desc_finalized || d_state == desc_reusable) {
+		/*
+		 * Begin searching after the last finalized record.
+		 * (On 0, the search must begin at 0 because of hack#2
+		 * of the bootstrapping phase it is not known if a
+		 * record at index 0 exists.)
+		*/
+		if (seq != 0)
+			seq++;
+	} else {
+		/*
+		 * The information about the last finalized sequence number
+		 * has gone. It should happen only when there is a flood of
+		 * new messages and the ringbuffer is rapidly recycled.
+		 * Give up and start from the beginning.
+		 */
+		seq = 0;
+	}
+
+	/*
+	 * The information about the last finalized @seq might be inaccurate.
+	 * Search forward to find the current one.
+	 */
 	/* Search forward from the oldest descriptor. */
 	while (_prb_read_valid(rb, &seq, NULL, NULL))
 		seq++;
@@ -2044,6 +2083,7 @@ void prb_init(struct printk_ringbuffer *rb,
 	rb->desc_ring.infos = infos;
 	atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
 	atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
+	atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits));
 
 	rb->text_data_ring.size_bits = textbits;
 	rb->text_data_ring.data = text_buf;
diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
index 73cc80e..18cd25e 100644
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -75,6 +75,7 @@ struct prb_desc_ring {
 	struct printk_info	*infos;
 	atomic_long_t		head_id;
 	atomic_long_t		tail_id;
+	atomic_long_t		last_finalized_id;
 };
 
 /*
@@ -258,6 +259,7 @@ static struct printk_ringbuffer name = {							\
 		.infos		= &_##name##_infos[0],						\
 		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
 		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
+		.last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)),				\
 	},											\
 	.text_data_ring = {									\
 		.size_bits	= (avgtextbits) + (descbits),					\
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance
  2022-01-21 13:06 [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance Mukesh Ojha
@ 2022-01-21 14:08 ` John Ogness
  2022-01-26  6:55   ` Mukesh Ojha
  2022-01-26 17:08   ` Petr Mladek
  0 siblings, 2 replies; 4+ messages in thread
From: John Ogness @ 2022-01-21 14:08 UTC (permalink / raw)
  To: Mukesh Ojha, linux-kernel; +Cc: pmladek, senozhatsky, rostedt, Mukesh Ojha

Hi Mukesh,

Thanks for pushing this. I think it got lost somewhere. I have a couple
very minor non-functional change requests.

On 2022-01-21, Mukesh Ojha <quic_mojha@quicinc.com> wrote:
> From: Petr Mladek <pmladek@suse.com>
>
> prb_next_seq() always iterates from the first known sequence number.
> In the worst case, it might loop 8k times for 256kB buffer,
> 15k times for 512kB buffer, and 64k times for 2MB buffer.
>
> It was reported that pooling and reading using syslog interface

                       ^^^^^^^ polling

> might occupy 50% of CPU.
>
> Speedup the search by storing @id of the last finalized descriptor.
>
> The loop is still needed because the @id is stored and read in the best
> effort way. An atomic variable is used to keep the @id consistent.
> But the stores and reads are not serialized against each other.
> The descriptor could get reused in the meantime. The related sequence
> number will be used only when it is still valid.
>
> An invalid value should be read _only_ when there is a flood of messages
> and the ringbuffer is rapidly reused. The performance is the least
> problem in this case.
>
> Link: https://lore.kernel.org/lkml/YXlddJxLh77DKfIO@alley/T/#m43062e8b2a17f8dbc8c6ccdb8851fb0dbaabbb14
> Reported-by: Chunlei Wang <chunlei.wang@mediatek.com>
> Signed-off-by: Petr Mladek <pmladek@suse.com>
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
> Changes against v2:
>   Added the hunk suggested by John
>
> Changes against v1:
>   Read @seq by the last finalized @id directly in prb_next_seq() (John)
>
>  kernel/printk/printk_ringbuffer.c | 48 +++++++++++++++++++++++++++++++++++----
>  kernel/printk/printk_ringbuffer.h |  2 ++
>  2 files changed, 46 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
> index 8a7b736..297bc18 100644
> --- a/kernel/printk/printk_ringbuffer.c
> +++ b/kernel/printk/printk_ringbuffer.c
> @@ -2005,8 +2014,38 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
>   */
>  u64 prb_next_seq(struct printk_ringbuffer *rb)
>  {
> -	u64 seq = 0;
> +	struct prb_desc_ring *desc_ring = &rb->desc_ring;
> +	enum desc_state d_state;
> +	unsigned long id;
> +	u64 seq;
> +
> +	/* Check if the cached @id still points to a valid @seq. */
> +	id = atomic_long_read(&desc_ring->last_finalized_id);
> +	d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
>  
> +	if (d_state == desc_finalized || d_state == desc_reusable) {
> +		/*
> +		 * Begin searching after the last finalized record.
> +		 * (On 0, the search must begin at 0 because of hack#2
> +		 * of the bootstrapping phase it is not known if a
> +		 * record at index 0 exists.)
> +		*/

               ^^^ whitespace
               
> +		if (seq != 0)
> +			seq++;
> +	} else {
> +		/*
> +		 * The information about the last finalized sequence number
> +		 * has gone. It should happen only when there is a flood of
> +		 * new messages and the ringbuffer is rapidly recycled.
> +		 * Give up and start from the beginning.
> +		 */
> +		seq = 0;
> +	}
> +
> +	/*
> +	 * The information about the last finalized @seq might be inaccurate.
> +	 * Search forward to find the current one.
> +	 */

It is fine to add this comment. But then the following comment should be
removed. It is redundant.

>  	/* Search forward from the oldest descriptor. */
>  	while (_prb_read_valid(rb, &seq, NULL, NULL))
>  		seq++;

Petr can probably just make the changes when committing. I am not
requesting a v4.

@Petr: Feel free to add:

Reviewed-by: John Ogness <john.ogness@linutronix.de>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance
  2022-01-21 14:08 ` John Ogness
@ 2022-01-26  6:55   ` Mukesh Ojha
  2022-01-26 17:08   ` Petr Mladek
  1 sibling, 0 replies; 4+ messages in thread
From: Mukesh Ojha @ 2022-01-26  6:55 UTC (permalink / raw)
  To: John Ogness, linux-kernel; +Cc: pmladek, senozhatsky, rostedt

Thanks John for the review.

@petr : does it looks fine from your side ?


Regards,
-Mukesh

On 1/21/2022 7:38 PM, John Ogness wrote:
> Hi Mukesh,
>
> Thanks for pushing this. I think it got lost somewhere. I have a couple
> very minor non-functional change requests.
>
> On 2022-01-21, Mukesh Ojha <quic_mojha@quicinc.com> wrote:
>> From: Petr Mladek <pmladek@suse.com>
>>
>> prb_next_seq() always iterates from the first known sequence number.
>> In the worst case, it might loop 8k times for 256kB buffer,
>> 15k times for 512kB buffer, and 64k times for 2MB buffer.
>>
>> It was reported that pooling and reading using syslog interface
>                         ^^^^^^^ polling
>
>> might occupy 50% of CPU.
>>
>> Speedup the search by storing @id of the last finalized descriptor.
>>
>> The loop is still needed because the @id is stored and read in the best
>> effort way. An atomic variable is used to keep the @id consistent.
>> But the stores and reads are not serialized against each other.
>> The descriptor could get reused in the meantime. The related sequence
>> number will be used only when it is still valid.
>>
>> An invalid value should be read _only_ when there is a flood of messages
>> and the ringbuffer is rapidly reused. The performance is the least
>> problem in this case.
>>
>> Link: https://lore.kernel.org/lkml/YXlddJxLh77DKfIO@alley/T/#m43062e8b2a17f8dbc8c6ccdb8851fb0dbaabbb14
>> Reported-by: Chunlei Wang <chunlei.wang@mediatek.com>
>> Signed-off-by: Petr Mladek <pmladek@suse.com>
>> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
>> ---
>> Changes against v2:
>>    Added the hunk suggested by John
>>
>> Changes against v1:
>>    Read @seq by the last finalized @id directly in prb_next_seq() (John)
>>
>>   kernel/printk/printk_ringbuffer.c | 48 +++++++++++++++++++++++++++++++++++----
>>   kernel/printk/printk_ringbuffer.h |  2 ++
>>   2 files changed, 46 insertions(+), 4 deletions(-)
>>
>> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
>> index 8a7b736..297bc18 100644
>> --- a/kernel/printk/printk_ringbuffer.c
>> +++ b/kernel/printk/printk_ringbuffer.c
>> @@ -2005,8 +2014,38 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
>>    */
>>   u64 prb_next_seq(struct printk_ringbuffer *rb)
>>   {
>> -	u64 seq = 0;
>> +	struct prb_desc_ring *desc_ring = &rb->desc_ring;
>> +	enum desc_state d_state;
>> +	unsigned long id;
>> +	u64 seq;
>> +
>> +	/* Check if the cached @id still points to a valid @seq. */
>> +	id = atomic_long_read(&desc_ring->last_finalized_id);
>> +	d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
>>   
>> +	if (d_state == desc_finalized || d_state == desc_reusable) {
>> +		/*
>> +		 * Begin searching after the last finalized record.
>> +		 * (On 0, the search must begin at 0 because of hack#2
>> +		 * of the bootstrapping phase it is not known if a
>> +		 * record at index 0 exists.)
>> +		*/
>                 ^^^ whitespace
>                 
>> +		if (seq != 0)
>> +			seq++;
>> +	} else {
>> +		/*
>> +		 * The information about the last finalized sequence number
>> +		 * has gone. It should happen only when there is a flood of
>> +		 * new messages and the ringbuffer is rapidly recycled.
>> +		 * Give up and start from the beginning.
>> +		 */
>> +		seq = 0;
>> +	}
>> +
>> +	/*
>> +	 * The information about the last finalized @seq might be inaccurate.
>> +	 * Search forward to find the current one.
>> +	 */
> It is fine to add this comment. But then the following comment should be
> removed. It is redundant.
>
>>   	/* Search forward from the oldest descriptor. */
>>   	while (_prb_read_valid(rb, &seq, NULL, NULL))
>>   		seq++;
> Petr can probably just make the changes when committing. I am not
> requesting a v4.
>
> @Petr: Feel free to add:
>
> Reviewed-by: John Ogness <john.ogness@linutronix.de>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance
  2022-01-21 14:08 ` John Ogness
  2022-01-26  6:55   ` Mukesh Ojha
@ 2022-01-26 17:08   ` Petr Mladek
  1 sibling, 0 replies; 4+ messages in thread
From: Petr Mladek @ 2022-01-26 17:08 UTC (permalink / raw)
  To: John Ogness; +Cc: Mukesh Ojha, linux-kernel, senozhatsky, rostedt

On Fri 2022-01-21 15:14:41, John Ogness wrote:
> Hi Mukesh,
> 
> Thanks for pushing this. I think it got lost somewhere. I have a couple
> very minor non-functional change requests.

Yes, thanks for pushing this. I have somehow lost this patch on my radar.

> On 2022-01-21, Mukesh Ojha <quic_mojha@quicinc.com> wrote:
> > From: Petr Mladek <pmladek@suse.com>
> >
> > prb_next_seq() always iterates from the first known sequence number.
> > In the worst case, it might loop 8k times for 256kB buffer,
> > 15k times for 512kB buffer, and 64k times for 2MB buffer.
> >
> > It was reported that pooling and reading using syslog interface
> 
>                        ^^^^^^^ polling
> 
> > might occupy 50% of CPU.
> >
> > Speedup the search by storing @id of the last finalized descriptor.
> >
> > The loop is still needed because the @id is stored and read in the best
> > effort way. An atomic variable is used to keep the @id consistent.
> > But the stores and reads are not serialized against each other.
> > The descriptor could get reused in the meantime. The related sequence
> > number will be used only when it is still valid.
> >
> > An invalid value should be read _only_ when there is a flood of messages
> > and the ringbuffer is rapidly reused. The performance is the least
> > problem in this case.
> >
> > Link: https://lore.kernel.org/lkml/YXlddJxLh77DKfIO@alley/T/#m43062e8b2a17f8dbc8c6ccdb8851fb0dbaabbb14
> > Reported-by: Chunlei Wang <chunlei.wang@mediatek.com>
> > Signed-off-by: Petr Mladek <pmladek@suse.com>
> > Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> > ---
> > Changes against v2:
> >   Added the hunk suggested by John
> >
> Petr can probably just make the changes when committing. I am not
> requesting a v4.
> 
> @Petr: Feel free to add:
> 
> Reviewed-by: John Ogness <john.ogness@linutronix.de>

I made the changes suggested by John. Also I have reverted few more
formatting changes. The result is basically v2 + the hunk suggested
by John.

The patch is committed in printk/linux.git, branch
rework/fast-next-seq. I am going to queue it for 5.18.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-01-26 17:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-21 13:06 [PATCH v3] printk: ringbuffer: Improve prb_next_seq() performance Mukesh Ojha
2022-01-21 14:08 ` John Ogness
2022-01-26  6:55   ` Mukesh Ojha
2022-01-26 17:08   ` Petr Mladek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.