linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
@ 2023-01-06  9:48 Hongchen Zhang
  2023-01-06 19:13 ` Luis Chamberlain
  0 siblings, 1 reply; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-06  9:48 UTC (permalink / raw)
  To: Alexander Viro, Andrew Morton, Kuniyuki Iwashima, Hongchen Zhang,
	Luis Chamberlain, David Howells, Christophe JAILLET,
	Randy Dunlap, Eric Dumazet
  Cc: linux-fsdevel, linux-kernel

Use spinlock in pipe_read/write cost too much time,IMO
pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
On the other hand, we can use __pipe_{lock,unlock} to protect
the pipe->{head,tail} in pipe_resize_ring and
post_one_notification.

I tested this patch using UnixBench's pipe test case on a x86_64
machine,and get the following data:
1) before this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     493023.3    396.3
                                                        ========
System Benchmarks Index Score (Partial Only)              396.3

2) after this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     507551.4    408.0
                                                        ========
System Benchmarks Index Score (Partial Only)              408.0

so we get ~3% speedup.

Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
---
 fs/pipe.c                 | 22 +---------------------
 include/linux/pipe_fs_i.h | 12 ++++++++++++
 kernel/watch_queue.c      |  8 ++++----
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..4355ee5f754e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 	for (;;) {
-		/* Read ->head with a barrier vs post_one_notification() */
-		unsigned int head = smp_load_acquire(&pipe->head);
+		unsigned int head = pipe->head;
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
@@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->rd_wait.lock);
 #ifdef CONFIG_WATCH_QUEUE
 				if (buf->flags & PIPE_BUF_FLAG_LOSS)
 					pipe->note_loss = true;
 #endif
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	if (unlikely(!bufs))
 		return -ENOMEM;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
 	tail = pipe->tail;
 
 	n = pipe_occupancy(head, tail);
 	if (nr_slots < n) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
 		kfree(bufs);
 		return -EBUSY;
 	}
@@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	pipe->tail = tail;
 	pipe->head = head;
 
-	spin_unlock_irq(&pipe->rd_wait.lock);
-
 	/* This might have made more room for writers */
 	wake_up_interruptible(&pipe->wr_wait);
 	return 0;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..f5084daf6eaf 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_PIPE_FS_I_H
 #define _LINUX_PIPE_FS_I_H
 
+#include <linux/fs.h>
+
 #define PIPE_DEF_BUFFERS	16
 
 #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
@@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
 #define PIPE_SIZE		PAGE_SIZE
 
 /* Pipe lock and unlock operations */
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+	mutex_unlock(&pipe->mutex);
+}
+
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..92e46cfe9419 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	if (!pipe)
 		return false;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
+	__pipe_lock(pipe);
 
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
@@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	buf->offset = offset;
 	buf->len = len;
 	buf->flags = PIPE_BUF_FLAG_WHOLE;
-	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+	pipe->head = head + 1;
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
+		__pipe_unlock(pipe);
 		BUG();
 	}
 	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	done = true;
 
 out:
-	spin_unlock_irq(&pipe->rd_wait.lock);
+	__pipe_unlock(pipe);
 	if (done)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	return done;

base-commit: 69b41ac87e4a664de78a395ff97166f0b2943210
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-06  9:48 [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock Hongchen Zhang
@ 2023-01-06 19:13 ` Luis Chamberlain
  2023-01-06 20:33   ` Sedat Dilek
  2023-01-07  0:58   ` Hongchen Zhang
  0 siblings, 2 replies; 18+ messages in thread
From: Luis Chamberlain @ 2023-01-06 19:13 UTC (permalink / raw)
  To: Hongchen Zhang
  Cc: Alexander Viro, Andrew Morton, Kuniyuki Iwashima, David Howells,
	Christophe JAILLET, Randy Dunlap, Eric Dumazet, linux-fsdevel,
	linux-kernel

On Fri, Jan 06, 2023 at 05:48:44PM +0800, Hongchen Zhang wrote:
> Use spinlock in pipe_read/write cost too much time,IMO
> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_{lock,unlock} to protect
> the pipe->{head,tail} in pipe_resize_ring and
> post_one_notification.
> 
> I tested this patch using UnixBench's pipe test case on a x86_64
> machine,and get the following data:
> 1) before this patch
> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> Pipe Throughput                   12440.0     493023.3    396.3
>                                                         ========
> System Benchmarks Index Score (Partial Only)              396.3
> 
> 2) after this patch
> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> Pipe Throughput                   12440.0     507551.4    408.0
>                                                         ========
> System Benchmarks Index Score (Partial Only)              408.0
> 
> so we get ~3% speedup.
> 
> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> ---

After the above "---" line you should have the changlog descrption.
For instance:

v3:
  - fixes bleh blah blah
v2:
  - fixes 0-day report by ... etc..
  - fixes spelling or whatever

I cannot decipher what you did here differently, not do I want to go
looking and diff'ing. So you are making the life of reviewer harder.

  Luis

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-06 19:13 ` Luis Chamberlain
@ 2023-01-06 20:33   ` Sedat Dilek
  2023-01-07  3:31     ` Hongchen Zhang
  2023-01-07  0:58   ` Hongchen Zhang
  1 sibling, 1 reply; 18+ messages in thread
From: Sedat Dilek @ 2023-01-06 20:33 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Hongchen Zhang, Alexander Viro, Andrew Morton, Kuniyuki Iwashima,
	David Howells, Christophe JAILLET, Randy Dunlap, Eric Dumazet,
	linux-fsdevel, linux-kernel

On Fri, Jan 6, 2023 at 8:40 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Fri, Jan 06, 2023 at 05:48:44PM +0800, Hongchen Zhang wrote:
> > Use spinlock in pipe_read/write cost too much time,IMO
> > pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> > On the other hand, we can use __pipe_{lock,unlock} to protect
> > the pipe->{head,tail} in pipe_resize_ring and
> > post_one_notification.
> >
> > I tested this patch using UnixBench's pipe test case on a x86_64
> > machine,and get the following data:
> > 1) before this patch
> > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > Pipe Throughput                   12440.0     493023.3    396.3
> >                                                         ========
> > System Benchmarks Index Score (Partial Only)              396.3
> >
> > 2) after this patch
> > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > Pipe Throughput                   12440.0     507551.4    408.0
> >                                                         ========
> > System Benchmarks Index Score (Partial Only)              408.0
> >
> > so we get ~3% speedup.
> >
> > Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> > ---
>
> After the above "---" line you should have the changlog descrption.
> For instance:
>
> v3:
>   - fixes bleh blah blah
> v2:
>   - fixes 0-day report by ... etc..
>   - fixes spelling or whatever
>
> I cannot decipher what you did here differently, not do I want to go
> looking and diff'ing. So you are making the life of reviewer harder.
>

Happy new 2023.

Positive wording... You can make reviewers' life easy when...
(encourage people).
Life is easy, people live hard.

+1 Adding ChangeLog of patch history

Cannot say...
Might be good to add the link to Linus test-case + your results in the
commit message as well?

...
Link: https://git.kernel.org/linus/0ddad21d3e99 (test-case of Linus
suggested-by Andrew)
...
Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
...

Thanks.

Best regards,
-Sedat-

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-06 19:13 ` Luis Chamberlain
  2023-01-06 20:33   ` Sedat Dilek
@ 2023-01-07  0:58   ` Hongchen Zhang
  1 sibling, 0 replies; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-07  0:58 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Alexander Viro, Andrew Morton, Kuniyuki Iwashima, David Howells,
	Christophe JAILLET, Randy Dunlap, Eric Dumazet, linux-fsdevel,
	linux-kernel

Hi Luis,

On 2023/1/7 上午3:13, Luis Chamberlain wrote:
> On Fri, Jan 06, 2023 at 05:48:44PM +0800, Hongchen Zhang wrote:
>> Use spinlock in pipe_read/write cost too much time,IMO
>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_{lock,unlock} to protect
>> the pipe->{head,tail} in pipe_resize_ring and
>> post_one_notification.
>>
>> I tested this patch using UnixBench's pipe test case on a x86_64
>> machine,and get the following data:
>> 1) before this patch
>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>> Pipe Throughput                   12440.0     493023.3    396.3
>>                                                          ========
>> System Benchmarks Index Score (Partial Only)              396.3
>>
>> 2) after this patch
>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>> Pipe Throughput                   12440.0     507551.4    408.0
>>                                                          ========
>> System Benchmarks Index Score (Partial Only)              408.0
>>
>> so we get ~3% speedup.
>>
>> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
>> ---
> 
> After the above "---" line you should have the changlog descrption.
> For instance:
> 
> v3:
>    - fixes bleh blah blah
> v2:
>    - fixes 0-day report by ... etc..
>    - fixes spelling or whatever
> 
> I cannot decipher what you did here differently, not do I want to go
> looking and diff'ing. So you are making the life of reviewer harder.
> 
>    Luis
> 
Matthew also reminded me to add the change log, but I don't think it is 
necessary to write the change log to fix the errors in the patch. 
Anyway, I think it is a good habit and will add these contents in the 
new v3 version.

Best Regards,
Hongchen Zhang


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-06 20:33   ` Sedat Dilek
@ 2023-01-07  3:31     ` Hongchen Zhang
  0 siblings, 0 replies; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-07  3:31 UTC (permalink / raw)
  To: sedat.dilek, Luis Chamberlain
  Cc: Alexander Viro, Andrew Morton, Kuniyuki Iwashima, David Howells,
	Christophe JAILLET, Randy Dunlap, Eric Dumazet, linux-fsdevel,
	linux-kernel

Hi Sedat,

On 2023/1/7 am 4:33, Sedat Dilek wrote:
> On Fri, Jan 6, 2023 at 8:40 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>>
>> On Fri, Jan 06, 2023 at 05:48:44PM +0800, Hongchen Zhang wrote:
>>> Use spinlock in pipe_read/write cost too much time,IMO
>>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>>> On the other hand, we can use __pipe_{lock,unlock} to protect
>>> the pipe->{head,tail} in pipe_resize_ring and
>>> post_one_notification.
>>>
>>> I tested this patch using UnixBench's pipe test case on a x86_64
>>> machine,and get the following data:
>>> 1) before this patch
>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>>> Pipe Throughput                   12440.0     493023.3    396.3
>>>                                                          ========
>>> System Benchmarks Index Score (Partial Only)              396.3
>>>
>>> 2) after this patch
>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>>> Pipe Throughput                   12440.0     507551.4    408.0
>>>                                                          ========
>>> System Benchmarks Index Score (Partial Only)              408.0
>>>
>>> so we get ~3% speedup.
>>>
>>> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
>>> ---
>>
>> After the above "---" line you should have the changlog descrption.
>> For instance:
>>
>> v3:
>>    - fixes bleh blah blah
>> v2:
>>    - fixes 0-day report by ... etc..
>>    - fixes spelling or whatever
>>
>> I cannot decipher what you did here differently, not do I want to go
>> looking and diff'ing. So you are making the life of reviewer harder.
>>
> 
> Happy new 2023.
> 
> Positive wording... You can make reviewers' life easy when...
> (encourage people).
> Life is easy, people live hard.
> 
> +1 Adding ChangeLog of patch history
> 
> Cannot say...
> Might be good to add the link to Linus test-case + your results in the
> commit message as well?
> 
> ...
> Link: https://git.kernel.org/linus/0ddad21d3e99 (test-case of Linus
> suggested-by Andrew)
> ...
> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> ...
> 
> Thanks.
> 
> Best regards,
> -Sedat-
> 

OK, I have send a new v3 patch with these messages in commit message, 
Please help to check and review again.

Best Regards,
Hongchen Zhang


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16 22:16         ` Andrew Morton
  2023-01-17  6:54           ` Sedat Dilek
@ 2023-01-29  2:29           ` Hongchen Zhang
  1 sibling, 0 replies; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-29  2:29 UTC (permalink / raw)
  To: Andrew Morton, Al Viro
  Cc: Matthew Wilcox, maobibo, David Howells, Sedat Dilek,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

Hi Andrew,

Sorry to reply to you so late, because I took a long holiday.

On 2023/1/17 am 6:16, Andrew Morton wrote:
> On Mon, 16 Jan 2023 21:10:37 +0000 Al Viro <viro@zeniv.linux.org.uk> wrote:
> 
>> On Mon, Jan 16, 2023 at 04:38:01AM +0000, Matthew Wilcox wrote:
>>> On Mon, Jan 16, 2023 at 11:16:13AM +0800, maobibo wrote:
>>>> Hongchen,
>>>>
>>>> I have a glance with this patch, it simply replaces with
>>>> spinlock_irqsave with mutex lock. There may be performance
>>>> improvement with two processes competing with pipe, however
>>>> for N processes, there will be complex context switches
>>>> and ipi interruptts.
>>>>
>>>> Can you find some cases with more than 2 processes competing
>>>> pipe, rather than only unixbench?
>>>
>>> What real applications have pipes with more than 1 writer & 1 reader?
>>> I'm OK with slowing down the weird cases if the common cases go faster.
>>
>> >From commit 0ddad21d3e99c743a3aa473121dc5561679e26bb:
>>      While this isn't a common occurrence in the traditional "use a pipe as a
>>      data transport" case, where you typically only have a single reader and
>>      a single writer process, there is one common special case: using a pipe
>>      as a source of "locking tokens" rather than for data communication.
>>      
>>      In particular, the GNU make jobserver code ends up using a pipe as a way
>>      to limit parallelism, where each job consumes a token by reading a byte
>>      from the jobserver pipe, and releases the token by writing a byte back
>>      to the pipe.
> 
> The author has tested this patch with Linus's test code from 0ddad21d3e
> and the results were OK
> (https://lkml.kernel.org/r/c3cbede6-f19e-3333-ba0f-d3f005e5d599@loongson.cn).
> 
> I've been stalling on this patch until Linus gets back to his desk,
> which now appears to have happened.
> 
> Hongchen, when convenient, please capture this discussion (as well as
> the testing results with Linus's sample code) in the changelog and send
> us a v4, with Linus on cc?
> 
I will send you a v4 and cc to Linus.

Thanks.
Hongchen Zhang


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16 22:16         ` Andrew Morton
@ 2023-01-17  6:54           ` Sedat Dilek
  2023-01-29  2:29           ` Hongchen Zhang
  1 sibling, 0 replies; 18+ messages in thread
From: Sedat Dilek @ 2023-01-17  6:54 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Al Viro, Matthew Wilcox, maobibo, Hongchen Zhang, David Howells,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5548 bytes --]

On Mon, Jan 16, 2023 at 11:16 PM Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> On Mon, 16 Jan 2023 21:10:37 +0000 Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> > On Mon, Jan 16, 2023 at 04:38:01AM +0000, Matthew Wilcox wrote:
> > > On Mon, Jan 16, 2023 at 11:16:13AM +0800, maobibo wrote:
> > > > Hongchen,
> > > >
> > > > I have a glance with this patch, it simply replaces with
> > > > spinlock_irqsave with mutex lock. There may be performance
> > > > improvement with two processes competing with pipe, however
> > > > for N processes, there will be complex context switches
> > > > and ipi interruptts.
> > > >
> > > > Can you find some cases with more than 2 processes competing
> > > > pipe, rather than only unixbench?
> > >
> > > What real applications have pipes with more than 1 writer & 1 reader?
> > > I'm OK with slowing down the weird cases if the common cases go faster.
> >
> > >From commit 0ddad21d3e99c743a3aa473121dc5561679e26bb:
> >     While this isn't a common occurrence in the traditional "use a pipe as a
> >     data transport" case, where you typically only have a single reader and
> >     a single writer process, there is one common special case: using a pipe
> >     as a source of "locking tokens" rather than for data communication.
> >
> >     In particular, the GNU make jobserver code ends up using a pipe as a way
> >     to limit parallelism, where each job consumes a token by reading a byte
> >     from the jobserver pipe, and releases the token by writing a byte back
> >     to the pipe.
>
> The author has tested this patch with Linus's test code from 0ddad21d3e
> and the results were OK
> (https://lkml.kernel.org/r/c3cbede6-f19e-3333-ba0f-d3f005e5d599@loongson.cn).
>

Yesterday, I had some time to play with and without this patch on my
Debian/unstable AMD64 box.

[ TEST-CASE ]

BASE: Linux v6.2-rc4

PATCH: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock

TEST-CASE: Taken from commit 0ddad21d3e99

RUN: gcc-12 -o 0ddad21d3e99 0ddad21d3e99.c

Link: https://lore.kernel.org/all/20230107012324.30698-1-zhanghongchen@loongson.cn/
Link: https://git.kernel.org/linus/0ddad21d3e99


[ INSTRUCTIONS ]

echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
/proc/sys/kernel/perf_event_paranoid

10 runs: /usr/bin/perf stat --repeat=10 ./0ddad21d3e99

echo 1 | sudo tee /proc/sys/kernel/kptr_restrict
/proc/sys/kernel/perf_event_paranoid


[ BEFORE ]

Performance counter stats for './0ddad21d3e99' (10 runs):

        23.985,50 msec task-clock                       #    3,246
CPUs utilized            ( +-  0,20% )
        1.112.822      context-switches                 #   46,696
K/sec                    ( +-  0,30% )
          403.033      cpu-migrations                   #   16,912
K/sec                    ( +-  0,28% )
            1.508      page-faults                      #   63,278
/sec                     ( +-  2,95% )
   39.436.000.959      cycles                           #    1,655 GHz
                     ( +-  0,22% )
   29.364.329.413      stalled-cycles-frontend          #   74,91%
frontend cycles idle     ( +-  0,24% )
   22.139.448.400      stalled-cycles-backend           #   56,48%
backend cycles idle      ( +-  0,23% )
   18.565.538.523      instructions                     #    0,47
insn per cycle
                                                 #    1,57  stalled
cycles per insn  ( +-  0,17% )
    4.059.885.546      branches                         #  170,359
M/sec                    ( +-  0,17% )
       59.991.226      branch-misses                    #    1,48% of
all branches          ( +-  0,19% )

           7,3892 +- 0,0127 seconds time elapsed  ( +-  0,17% )


[ AFTER ]

Performance counter stats for './0ddad21d3e99' (10 runs):

        24.175,94 msec task-clock                       #    3,362
CPUs utilized            ( +-  0,11% )
        1.139.152      context-switches                 #   47,119
K/sec                    ( +-  0,12% )
          407.994      cpu-migrations                   #   16,876
K/sec                    ( +-  0,26% )
            1.555      page-faults                      #   64,319
/sec                     ( +-  3,11% )
   40.904.849.091      cycles                           #    1,692 GHz
                     ( +-  0,13% )
   30.587.623.034      stalled-cycles-frontend          #   74,84%
frontend cycles idle     ( +-  0,15% )
   23.145.533.537      stalled-cycles-backend           #   56,63%
backend cycles idle      ( +-  0,16% )
   18.762.964.037      instructions                     #    0,46
insn per cycle
                                                 #    1,63  stalled
cycles per insn  ( +-  0,11% )
    4.057.182.849      branches                         #  167,817
M/sec                    ( +-  0,09% )
       63.887.806      branch-misses                    #    1,58% of
all branches          ( +-  0,25% )

          7,19157 +- 0,00644 seconds time elapsed  ( +-  0,09% )


[ RESULT ]

seconds time elapsed: - 2,67%

The test-case c-file is attached and for the case the above lines were
truncated I have attached as a README file.

Feel free to add a...

   Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # LLVM v15.0.3 (x86-64)

If you need further information, please let me know.

-Sedat-

> I've been stalling on this patch until Linus gets back to his desk,
> which now appears to have happened.
>
> Hongchen, when convenient, please capture this discussion (as well as
> the testing results with Linus's sample code) in the changelog and send
> us a v4, with Linus on cc?
>

[-- Attachment #2: 0ddad21d3e99.c --]
[-- Type: text/x-csrc, Size: 787 bytes --]

// Test-case: Benchmark pipe performance
//    Author: Linus Torvalds
//      Link: https://git.kernel.org/linus/0ddad21d3e99
//
//   Compile: gcc-12 -o 0ddad21d3e99 0ddad21d3e99.c
//
#include <unistd.h>

int main(int argc, char **argv)
    {
        int fd[2], counters[2];

        pipe(fd);
        counters[0] = 0;
        counters[1] = -1;
        write(fd[1], counters, sizeof(counters));

        /* 64 processes */
        fork(); fork(); fork(); fork(); fork(); fork();

        do {
                int i;
                read(fd[0], &i, sizeof(i));
                if (i < 0)
                        continue;
                counters[0] = i+1;
                write(fd[1], counters, (1+(i & 1)) *sizeof(int));
        } while (counters[0] < 1000000);
        return 0;
    }

[-- Attachment #3: README_zhanghongchen-pipe-v3-0ddad21d3e99 --]
[-- Type: application/octet-stream, Size: 3234 bytes --]

[ TEST-CASE ]

BASE: Linux v6.2-rc4

PATCH: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock

TEST-CASE: Taken from commit 0ddad21d3e99

RUN: gcc-12 -o 0ddad21d3e99 0ddad21d3e99.c

Link: https://lore.kernel.org/all/20230107012324.30698-1-zhanghongchen@loongson.cn/
Link: https://git.kernel.org/linus/0ddad21d3e99


[ INSTRUCTIONS ]

echo 0 | sudo tee /proc/sys/kernel/kptr_restrict /proc/sys/kernel/perf_event_paranoid

10 runs: /usr/bin/perf stat --repeat=10 ./0ddad21d3e99

echo 1 | sudo tee /proc/sys/kernel/kptr_restrict /proc/sys/kernel/perf_event_paranoid


[ BEFORE ]

 Performance counter stats for './0ddad21d3e99' (10 runs):

         23.985,50 msec task-clock                       #    3,246 CPUs utilized            ( +-  0,20% )
         1.112.822      context-switches                 #   46,696 K/sec                    ( +-  0,30% )
           403.033      cpu-migrations                   #   16,912 K/sec                    ( +-  0,28% )
             1.508      page-faults                      #   63,278 /sec                     ( +-  2,95% )
    39.436.000.959      cycles                           #    1,655 GHz                      ( +-  0,22% )
    29.364.329.413      stalled-cycles-frontend          #   74,91% frontend cycles idle     ( +-  0,24% )
    22.139.448.400      stalled-cycles-backend           #   56,48% backend cycles idle      ( +-  0,23% )
    18.565.538.523      instructions                     #    0,47  insn per cycle         
                                                  #    1,57  stalled cycles per insn  ( +-  0,17% )
     4.059.885.546      branches                         #  170,359 M/sec                    ( +-  0,17% )
        59.991.226      branch-misses                    #    1,48% of all branches          ( +-  0,19% )

            7,3892 +- 0,0127 seconds time elapsed  ( +-  0,17% )


[ AFTER ]

 Performance counter stats for './0ddad21d3e99' (10 runs):

         24.175,94 msec task-clock                       #    3,362 CPUs utilized            ( +-  0,11% )
         1.139.152      context-switches                 #   47,119 K/sec                    ( +-  0,12% )
           407.994      cpu-migrations                   #   16,876 K/sec                    ( +-  0,26% )
             1.555      page-faults                      #   64,319 /sec                     ( +-  3,11% )
    40.904.849.091      cycles                           #    1,692 GHz                      ( +-  0,13% )
    30.587.623.034      stalled-cycles-frontend          #   74,84% frontend cycles idle     ( +-  0,15% )
    23.145.533.537      stalled-cycles-backend           #   56,63% backend cycles idle      ( +-  0,16% )
    18.762.964.037      instructions                     #    0,46  insn per cycle         
                                                  #    1,63  stalled cycles per insn  ( +-  0,11% )
     4.057.182.849      branches                         #  167,817 M/sec                    ( +-  0,09% )
        63.887.806      branch-misses                    #    1,58% of all branches          ( +-  0,25% )

           7,19157 +- 0,00644 seconds time elapsed  ( +-  0,09% )


[ RESULT ]

seconds time elapsed: - 2,67%


-dileks // 16-Jan-2023

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16 21:10       ` Al Viro
@ 2023-01-16 22:16         ` Andrew Morton
  2023-01-17  6:54           ` Sedat Dilek
  2023-01-29  2:29           ` Hongchen Zhang
  0 siblings, 2 replies; 18+ messages in thread
From: Andrew Morton @ 2023-01-16 22:16 UTC (permalink / raw)
  To: Al Viro
  Cc: Matthew Wilcox, maobibo, Hongchen Zhang, David Howells,
	Sedat Dilek, Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Mon, 16 Jan 2023 21:10:37 +0000 Al Viro <viro@zeniv.linux.org.uk> wrote:

> On Mon, Jan 16, 2023 at 04:38:01AM +0000, Matthew Wilcox wrote:
> > On Mon, Jan 16, 2023 at 11:16:13AM +0800, maobibo wrote:
> > > Hongchen,
> > > 
> > > I have a glance with this patch, it simply replaces with
> > > spinlock_irqsave with mutex lock. There may be performance
> > > improvement with two processes competing with pipe, however
> > > for N processes, there will be complex context switches
> > > and ipi interruptts.
> > > 
> > > Can you find some cases with more than 2 processes competing
> > > pipe, rather than only unixbench?
> > 
> > What real applications have pipes with more than 1 writer & 1 reader?
> > I'm OK with slowing down the weird cases if the common cases go faster.
> 
> >From commit 0ddad21d3e99c743a3aa473121dc5561679e26bb:
>     While this isn't a common occurrence in the traditional "use a pipe as a
>     data transport" case, where you typically only have a single reader and
>     a single writer process, there is one common special case: using a pipe
>     as a source of "locking tokens" rather than for data communication.
>     
>     In particular, the GNU make jobserver code ends up using a pipe as a way
>     to limit parallelism, where each job consumes a token by reading a byte
>     from the jobserver pipe, and releases the token by writing a byte back
>     to the pipe.

The author has tested this patch with Linus's test code from 0ddad21d3e
and the results were OK
(https://lkml.kernel.org/r/c3cbede6-f19e-3333-ba0f-d3f005e5d599@loongson.cn).

I've been stalling on this patch until Linus gets back to his desk,
which now appears to have happened.

Hongchen, when convenient, please capture this discussion (as well as
the testing results with Linus's sample code) in the changelog and send
us a v4, with Linus on cc?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16  4:38     ` Matthew Wilcox
@ 2023-01-16 21:10       ` Al Viro
  2023-01-16 22:16         ` Andrew Morton
  0 siblings, 1 reply; 18+ messages in thread
From: Al Viro @ 2023-01-16 21:10 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: maobibo, Hongchen Zhang, Andrew Morton, David Howells,
	Sedat Dilek, Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Mon, Jan 16, 2023 at 04:38:01AM +0000, Matthew Wilcox wrote:
> On Mon, Jan 16, 2023 at 11:16:13AM +0800, maobibo wrote:
> > Hongchen,
> > 
> > I have a glance with this patch, it simply replaces with
> > spinlock_irqsave with mutex lock. There may be performance
> > improvement with two processes competing with pipe, however
> > for N processes, there will be complex context switches
> > and ipi interruptts.
> > 
> > Can you find some cases with more than 2 processes competing
> > pipe, rather than only unixbench?
> 
> What real applications have pipes with more than 1 writer & 1 reader?
> I'm OK with slowing down the weird cases if the common cases go faster.

From commit 0ddad21d3e99c743a3aa473121dc5561679e26bb:
    While this isn't a common occurrence in the traditional "use a pipe as a
    data transport" case, where you typically only have a single reader and
    a single writer process, there is one common special case: using a pipe
    as a source of "locking tokens" rather than for data communication.
    
    In particular, the GNU make jobserver code ends up using a pipe as a way
    to limit parallelism, where each job consumes a token by reading a byte
    from the jobserver pipe, and releases the token by writing a byte back
    to the pipe.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16  3:16   ` maobibo
@ 2023-01-16  4:38     ` Matthew Wilcox
  2023-01-16 21:10       ` Al Viro
  0 siblings, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2023-01-16  4:38 UTC (permalink / raw)
  To: maobibo
  Cc: Hongchen Zhang, Andrew Morton, David Howells, Sedat Dilek,
	Alexander Viro, Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Mon, Jan 16, 2023 at 11:16:13AM +0800, maobibo wrote:
> Hongchen,
> 
> I have a glance with this patch, it simply replaces with
> spinlock_irqsave with mutex lock. There may be performance
> improvement with two processes competing with pipe, however
> for N processes, there will be complex context switches
> and ipi interruptts.
> 
> Can you find some cases with more than 2 processes competing
> pipe, rather than only unixbench?

What real applications have pipes with more than 1 writer & 1 reader?
I'm OK with slowing down the weird cases if the common cases go faster.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-13  3:19 ` Hongchen Zhang
  2023-01-13  9:32   ` Sedat Dilek
@ 2023-01-16  3:16   ` maobibo
  2023-01-16  4:38     ` Matthew Wilcox
  1 sibling, 1 reply; 18+ messages in thread
From: maobibo @ 2023-01-16  3:16 UTC (permalink / raw)
  To: Hongchen Zhang, Andrew Morton, David Howells, Sedat Dilek,
	Matthew Wilcox
  Cc: Alexander Viro, Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

Hongchen,

I have a glance with this patch, it simply replaces with
spinlock_irqsave with mutex lock. There may be performance
improvement with two processes competing with pipe, however
for N processes, there will be complex context switches
and ipi interruptts.

Can you find some cases with more than 2 processes competing
pipe, rather than only unixbench?

regards
bibo, mao

在 2023/1/13 11:19, Hongchen Zhang 写道:
> Hi All,
> any question about this patch, can it be merged?
> 
> Thanks
> On 2023/1/7 am 9:23, Hongchen Zhang wrote:
>> Use spinlock in pipe_read/write cost too much time,IMO
>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_{lock,unlock} to protect
>> the pipe->{head,tail} in pipe_resize_ring and
>> post_one_notification.
>>
>> Reminded by Matthew, I tested this patch using UnixBench's pipe
>> test case on a x86_64 machine,and get the following data:
>> 1) before this patch
>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>> Pipe Throughput                   12440.0     493023.3    396.3
>>                                                          ========
>> System Benchmarks Index Score (Partial Only)              396.3
>>
>> 2) after this patch
>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>> Pipe Throughput                   12440.0     507551.4    408.0
>>                                                          ========
>> System Benchmarks Index Score (Partial Only)              408.0
>>
>> so we get ~3% speedup.
>>
>> Reminded by Andrew, I tested this patch with the test code in
>> Linus's 0ddad21d3e99 add get following result:
>> 1) before this patch
>>           13,136.54 msec task-clock           #    3.870 CPUs utilized
>>           1,186,779      context-switches     #   90.342 K/sec
>>             668,867      cpu-migrations       #   50.917 K/sec
>>                 895      page-faults          #   68.131 /sec
>>      29,875,711,543      cycles               #    2.274 GHz
>>      12,372,397,462      instructions         #    0.41  insn per cycle
>>       2,480,235,723      branches             #  188.804 M/sec
>>          47,191,943      branch-misses        #    1.90% of all branches
>>
>>         3.394806886 seconds time elapsed
>>
>>         0.037869000 seconds user
>>         0.189346000 seconds sys
>>
>> 2) after this patch
>>
>>           12,395.63 msec task-clock          #    4.138 CPUs utilized
>>           1,193,381      context-switches    #   96.274 K/sec
>>             585,543      cpu-migrations      #   47.238 K/sec
>>               1,063      page-faults         #   85.756 /sec
>>      27,691,587,226      cycles              #    2.234 GHz
>>      11,738,307,999      instructions        #    0.42  insn per cycle
>>       2,351,299,522      branches            #  189.688 M/sec
>>          45,404,526      branch-misses       #    1.93% of all branches
>>
>>         2.995280878 seconds time elapsed
>>
>>         0.010615000 seconds user
>>         0.206999000 seconds sys
>> After adding this patch, the time used on this test program becomes less.
>>
>> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
>>
>> v3:
>>    - fixes the error reported by kernel test robot <oliver.sang@intel.com>
>>      Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com
>>    - add perf stat data for the test code in Linus's 0ddad21d3e99 in
>>      commit message.
>> v2:
>>    - add UnixBench test data in commit message
>>    - fixes the test error reported by kernel test robot <lkp@intel.com>
>>      by adding the missing fs.h header file.
>> ---
>>   fs/pipe.c                 | 22 +---------------------
>>   include/linux/pipe_fs_i.h | 12 ++++++++++++
>>   kernel/watch_queue.c      |  8 ++++----
>>   3 files changed, 17 insertions(+), 25 deletions(-)
>>
>> diff --git a/fs/pipe.c b/fs/pipe.c
>> index 42c7ff41c2db..4355ee5f754e 100644
>> --- a/fs/pipe.c
>> +++ b/fs/pipe.c
>> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>>   }
>>   EXPORT_SYMBOL(pipe_unlock);
>>   -static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> -{
>> -    mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> -}
>> -
>> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> -{
>> -    mutex_unlock(&pipe->mutex);
>> -}
>> -
>>   void pipe_double_lock(struct pipe_inode_info *pipe1,
>>                 struct pipe_inode_info *pipe2)
>>   {
>> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>>        */
>>       was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>>       for (;;) {
>> -        /* Read ->head with a barrier vs post_one_notification() */
>> -        unsigned int head = smp_load_acquire(&pipe->head);
>> +        unsigned int head = pipe->head;
>>           unsigned int tail = pipe->tail;
>>           unsigned int mask = pipe->ring_size - 1;
>>   @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>>                 if (!buf->len) {
>>                   pipe_buf_release(pipe, buf);
>> -                spin_lock_irq(&pipe->rd_wait.lock);
>>   #ifdef CONFIG_WATCH_QUEUE
>>                   if (buf->flags & PIPE_BUF_FLAG_LOSS)
>>                       pipe->note_loss = true;
>>   #endif
>>                   tail++;
>>                   pipe->tail = tail;
>> -                spin_unlock_irq(&pipe->rd_wait.lock);
>>               }
>>               total_len -= chars;
>>               if (!total_len)
>> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>>                * it, either the reader will consume it or it'll still
>>                * be there for the next write.
>>                */
>> -            spin_lock_irq(&pipe->rd_wait.lock);
>>                 head = pipe->head;
>>               if (pipe_full(head, pipe->tail, pipe->max_usage)) {
>> -                spin_unlock_irq(&pipe->rd_wait.lock);
>>                   continue;
>>               }
>>                 pipe->head = head + 1;
>> -            spin_unlock_irq(&pipe->rd_wait.lock);
>>                 /* Insert it into the buffer array */
>>               buf = &pipe->bufs[head & mask];
>> @@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>>       if (unlikely(!bufs))
>>           return -ENOMEM;
>>   -    spin_lock_irq(&pipe->rd_wait.lock);
>>       mask = pipe->ring_size - 1;
>>       head = pipe->head;
>>       tail = pipe->tail;
>>         n = pipe_occupancy(head, tail);
>>       if (nr_slots < n) {
>> -        spin_unlock_irq(&pipe->rd_wait.lock);
>>           kfree(bufs);
>>           return -EBUSY;
>>       }
>> @@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>>       pipe->tail = tail;
>>       pipe->head = head;
>>   -    spin_unlock_irq(&pipe->rd_wait.lock);
>> -
>>       /* This might have made more room for writers */
>>       wake_up_interruptible(&pipe->wr_wait);
>>       return 0;
>> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
>> index 6cb65df3e3ba..f5084daf6eaf 100644
>> --- a/include/linux/pipe_fs_i.h
>> +++ b/include/linux/pipe_fs_i.h
>> @@ -2,6 +2,8 @@
>>   #ifndef _LINUX_PIPE_FS_I_H
>>   #define _LINUX_PIPE_FS_I_H
>>   +#include <linux/fs.h>
>> +
>>   #define PIPE_DEF_BUFFERS    16
>>     #define PIPE_BUF_FLAG_LRU    0x01    /* page is on the LRU */
>> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
>>   #define PIPE_SIZE        PAGE_SIZE
>>     /* Pipe lock and unlock operations */
>> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> +{
>> +    mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> +}
>> +
>> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> +{
>> +    mutex_unlock(&pipe->mutex);
>> +}
>> +
>>   void pipe_lock(struct pipe_inode_info *);
>>   void pipe_unlock(struct pipe_inode_info *);
>>   void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
>> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
>> index a6f9bdd956c3..92e46cfe9419 100644
>> --- a/kernel/watch_queue.c
>> +++ b/kernel/watch_queue.c
>> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
>>       if (!pipe)
>>           return false;
>>   -    spin_lock_irq(&pipe->rd_wait.lock);
>> +    __pipe_lock(pipe);
>>         mask = pipe->ring_size - 1;
>>       head = pipe->head;
>> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
>>       buf->offset = offset;
>>       buf->len = len;
>>       buf->flags = PIPE_BUF_FLAG_WHOLE;
>> -    smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
>> +    pipe->head = head + 1;
>>         if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
>> -        spin_unlock_irq(&pipe->rd_wait.lock);
>> +        __pipe_unlock(pipe);
>>           BUG();
>>       }
>>       wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>>       done = true;
>>     out:
>> -    spin_unlock_irq(&pipe->rd_wait.lock);
>> +    __pipe_unlock(pipe);
>>       if (done)
>>           kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>>       return done;
>>
>> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16  2:16       ` Hongchen Zhang
@ 2023-01-16  2:42         ` Sedat Dilek
  0 siblings, 0 replies; 18+ messages in thread
From: Sedat Dilek @ 2023-01-16  2:42 UTC (permalink / raw)
  To: Hongchen Zhang
  Cc: Andrew Morton, David Howells, Matthew Wilcox, Alexander Viro,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Mon, Jan 16, 2023 at 3:16 AM Hongchen Zhang
<zhanghongchen@loongson.cn> wrote:
>
> Hi sedat,
>
>
> On 2023/1/16 am9:52, Sedat Dilek wrote:
> > On Fri, Jan 13, 2023 at 10:32 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >>
> >> On Fri, Jan 13, 2023 at 4:19 AM Hongchen Zhang
> >> <zhanghongchen@loongson.cn> wrote:
> >>>
> >>> Hi All,
> >>> any question about this patch, can it be merged?
> >>>
> >>> Thanks
> >>> On 2023/1/7 am 9:23, Hongchen Zhang wrote:
> >>>> Use spinlock in pipe_read/write cost too much time,IMO
> >>>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> >>>> On the other hand, we can use __pipe_{lock,unlock} to protect
> >>>> the pipe->{head,tail} in pipe_resize_ring and
> >>>> post_one_notification.
> >>>>
> >>>> Reminded by Matthew, I tested this patch using UnixBench's pipe
> >>>> test case on a x86_64 machine,and get the following data:
> >>>> 1) before this patch
> >>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> >>>> Pipe Throughput                   12440.0     493023.3    396.3
> >>>>                                                           ========
> >>>> System Benchmarks Index Score (Partial Only)              396.3
> >>>>
> >>>> 2) after this patch
> >>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> >>>> Pipe Throughput                   12440.0     507551.4    408.0
> >>>>                                                           ========
> >>>> System Benchmarks Index Score (Partial Only)              408.0
> >>>>
> >>>> so we get ~3% speedup.
> >>>>
> >>>> Reminded by Andrew, I tested this patch with the test code in
> >>>> Linus's 0ddad21d3e99 add get following result:
> >>
> >> Happy new 2023 Hongchen Zhang,
> >>
> >> Thanks for the update and sorry for the late response.
> >>
> >> Should be "...s/add/and get following result:"
> >>
> >> I cannot say much about the patch itself or tested it in my build-environment.
> >>
> >> Best regards,
> >> -Sedat-
> >>
> >
> > I have applied v3 on top of Linux v6.2-rc4.
> >
> > Used pipebench for a quick testing.
> >
> > # fdisk -l /dev/sdb
> > Disk /dev/sdb: 14,91 GiB, 16013942784 bytes, 31277232 sectors
> > Disk model: SanDisk iSSD P4
> > Units: sectors of 1 * 512 = 512 bytes
> > Sector size (logical/physical): 512 bytes / 512 bytes
> > I/O size (minimum/optimal): 512 bytes / 512 bytes
> > Disklabel type: dos
> > Disk identifier: 0x74f02dea
> >
> > Device     Boot Start      End  Sectors  Size Id Type
> > /dev/sdb1        2048 31277231 31275184 14,9G 83 Linux
> >
> > # cat /dev/sdb | pipebench > /dev/null
> > Summary:
> > Piped   14.91 GB in 00h01m34.20s:  162.12 MB/second
> >
> > Not tested/benchmarked with the kernel w/o your patch.
> >
> > -Sedat-
> >
> OK, If there is any problem, let's continue to discuss it
> and hope it can be merged into the main line.
>

Can you give me a hand on the perf stat line?

I tried with:

$ /usr/bin/perf stat --repeat=1 ./0ddad21d3e99

But that gives in both cases no context-switches and cpu-migrations values.

-Sedat-

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-16  1:52     ` Sedat Dilek
@ 2023-01-16  2:16       ` Hongchen Zhang
  2023-01-16  2:42         ` Sedat Dilek
  0 siblings, 1 reply; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-16  2:16 UTC (permalink / raw)
  To: sedat.dilek
  Cc: Andrew Morton, David Howells, Matthew Wilcox, Alexander Viro,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

Hi sedat,


On 2023/1/16 am9:52, Sedat Dilek wrote:
> On Fri, Jan 13, 2023 at 10:32 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>
>> On Fri, Jan 13, 2023 at 4:19 AM Hongchen Zhang
>> <zhanghongchen@loongson.cn> wrote:
>>>
>>> Hi All,
>>> any question about this patch, can it be merged?
>>>
>>> Thanks
>>> On 2023/1/7 am 9:23, Hongchen Zhang wrote:
>>>> Use spinlock in pipe_read/write cost too much time,IMO
>>>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>>>> On the other hand, we can use __pipe_{lock,unlock} to protect
>>>> the pipe->{head,tail} in pipe_resize_ring and
>>>> post_one_notification.
>>>>
>>>> Reminded by Matthew, I tested this patch using UnixBench's pipe
>>>> test case on a x86_64 machine,and get the following data:
>>>> 1) before this patch
>>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>>>> Pipe Throughput                   12440.0     493023.3    396.3
>>>>                                                           ========
>>>> System Benchmarks Index Score (Partial Only)              396.3
>>>>
>>>> 2) after this patch
>>>> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
>>>> Pipe Throughput                   12440.0     507551.4    408.0
>>>>                                                           ========
>>>> System Benchmarks Index Score (Partial Only)              408.0
>>>>
>>>> so we get ~3% speedup.
>>>>
>>>> Reminded by Andrew, I tested this patch with the test code in
>>>> Linus's 0ddad21d3e99 add get following result:
>>
>> Happy new 2023 Hongchen Zhang,
>>
>> Thanks for the update and sorry for the late response.
>>
>> Should be "...s/add/and get following result:"
>>
>> I cannot say much about the patch itself or tested it in my build-environment.
>>
>> Best regards,
>> -Sedat-
>>
> 
> I have applied v3 on top of Linux v6.2-rc4.
> 
> Used pipebench for a quick testing.
> 
> # fdisk -l /dev/sdb
> Disk /dev/sdb: 14,91 GiB, 16013942784 bytes, 31277232 sectors
> Disk model: SanDisk iSSD P4
> Units: sectors of 1 * 512 = 512 bytes
> Sector size (logical/physical): 512 bytes / 512 bytes
> I/O size (minimum/optimal): 512 bytes / 512 bytes
> Disklabel type: dos
> Disk identifier: 0x74f02dea
> 
> Device     Boot Start      End  Sectors  Size Id Type
> /dev/sdb1        2048 31277231 31275184 14,9G 83 Linux
> 
> # cat /dev/sdb | pipebench > /dev/null
> Summary:
> Piped   14.91 GB in 00h01m34.20s:  162.12 MB/second
> 
> Not tested/benchmarked with the kernel w/o your patch.
> 
> -Sedat-
> 
OK, If there is any problem, let's continue to discuss it
and hope it can be merged into the main line.

Best Regards,
Hongchen Zhang


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-13  9:32   ` Sedat Dilek
@ 2023-01-16  1:52     ` Sedat Dilek
  2023-01-16  2:16       ` Hongchen Zhang
  0 siblings, 1 reply; 18+ messages in thread
From: Sedat Dilek @ 2023-01-16  1:52 UTC (permalink / raw)
  To: Hongchen Zhang
  Cc: Andrew Morton, David Howells, Matthew Wilcox, Alexander Viro,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Fri, Jan 13, 2023 at 10:32 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Fri, Jan 13, 2023 at 4:19 AM Hongchen Zhang
> <zhanghongchen@loongson.cn> wrote:
> >
> > Hi All,
> > any question about this patch, can it be merged?
> >
> > Thanks
> > On 2023/1/7 am 9:23, Hongchen Zhang wrote:
> > > Use spinlock in pipe_read/write cost too much time,IMO
> > > pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> > > On the other hand, we can use __pipe_{lock,unlock} to protect
> > > the pipe->{head,tail} in pipe_resize_ring and
> > > post_one_notification.
> > >
> > > Reminded by Matthew, I tested this patch using UnixBench's pipe
> > > test case on a x86_64 machine,and get the following data:
> > > 1) before this patch
> > > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > > Pipe Throughput                   12440.0     493023.3    396.3
> > >                                                          ========
> > > System Benchmarks Index Score (Partial Only)              396.3
> > >
> > > 2) after this patch
> > > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > > Pipe Throughput                   12440.0     507551.4    408.0
> > >                                                          ========
> > > System Benchmarks Index Score (Partial Only)              408.0
> > >
> > > so we get ~3% speedup.
> > >
> > > Reminded by Andrew, I tested this patch with the test code in
> > > Linus's 0ddad21d3e99 add get following result:
>
> Happy new 2023 Hongchen Zhang,
>
> Thanks for the update and sorry for the late response.
>
> Should be "...s/add/and get following result:"
>
> I cannot say much about the patch itself or tested it in my build-environment.
>
> Best regards,
> -Sedat-
>

I have applied v3 on top of Linux v6.2-rc4.

Used pipebench for a quick testing.

# fdisk -l /dev/sdb
Disk /dev/sdb: 14,91 GiB, 16013942784 bytes, 31277232 sectors
Disk model: SanDisk iSSD P4
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disklabel type: dos
Disk identifier: 0x74f02dea

Device     Boot Start      End  Sectors  Size Id Type
/dev/sdb1        2048 31277231 31275184 14,9G 83 Linux

# cat /dev/sdb | pipebench > /dev/null
Summary:
Piped   14.91 GB in 00h01m34.20s:  162.12 MB/second

Not tested/benchmarked with the kernel w/o your patch.

-Sedat-

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-13  3:19 ` Hongchen Zhang
@ 2023-01-13  9:32   ` Sedat Dilek
  2023-01-16  1:52     ` Sedat Dilek
  2023-01-16  3:16   ` maobibo
  1 sibling, 1 reply; 18+ messages in thread
From: Sedat Dilek @ 2023-01-13  9:32 UTC (permalink / raw)
  To: Hongchen Zhang
  Cc: Andrew Morton, David Howells, Matthew Wilcox, Alexander Viro,
	Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

On Fri, Jan 13, 2023 at 4:19 AM Hongchen Zhang
<zhanghongchen@loongson.cn> wrote:
>
> Hi All,
> any question about this patch, can it be merged?
>
> Thanks
> On 2023/1/7 am 9:23, Hongchen Zhang wrote:
> > Use spinlock in pipe_read/write cost too much time,IMO
> > pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> > On the other hand, we can use __pipe_{lock,unlock} to protect
> > the pipe->{head,tail} in pipe_resize_ring and
> > post_one_notification.
> >
> > Reminded by Matthew, I tested this patch using UnixBench's pipe
> > test case on a x86_64 machine,and get the following data:
> > 1) before this patch
> > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > Pipe Throughput                   12440.0     493023.3    396.3
> >                                                          ========
> > System Benchmarks Index Score (Partial Only)              396.3
> >
> > 2) after this patch
> > System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> > Pipe Throughput                   12440.0     507551.4    408.0
> >                                                          ========
> > System Benchmarks Index Score (Partial Only)              408.0
> >
> > so we get ~3% speedup.
> >
> > Reminded by Andrew, I tested this patch with the test code in
> > Linus's 0ddad21d3e99 add get following result:

Happy new 2023 Hongchen Zhang,

Thanks for the update and sorry for the late response.

Should be "...s/add/and get following result:"

I cannot say much about the patch itself or tested it in my build-environment.

Best regards,
-Sedat-

> > 1) before this patch
> >           13,136.54 msec task-clock           #    3.870 CPUs utilized
> >           1,186,779      context-switches     #   90.342 K/sec
> >             668,867      cpu-migrations       #   50.917 K/sec
> >                 895      page-faults          #   68.131 /sec
> >      29,875,711,543      cycles               #    2.274 GHz
> >      12,372,397,462      instructions         #    0.41  insn per cycle
> >       2,480,235,723      branches             #  188.804 M/sec
> >          47,191,943      branch-misses        #    1.90% of all branches
> >
> >         3.394806886 seconds time elapsed
> >
> >         0.037869000 seconds user
> >         0.189346000 seconds sys
> >
> > 2) after this patch
> >
> >           12,395.63 msec task-clock          #    4.138 CPUs utilized
> >           1,193,381      context-switches    #   96.274 K/sec
> >             585,543      cpu-migrations      #   47.238 K/sec
> >               1,063      page-faults         #   85.756 /sec
> >      27,691,587,226      cycles              #    2.234 GHz
> >      11,738,307,999      instructions        #    0.42  insn per cycle
> >       2,351,299,522      branches            #  189.688 M/sec
> >          45,404,526      branch-misses       #    1.93% of all branches
> >
> >         2.995280878 seconds time elapsed
> >
> >         0.010615000 seconds user
> >         0.206999000 seconds sys
> > After adding this patch, the time used on this test program becomes less.
> >
> > Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> >
> > v3:
> >    - fixes the error reported by kernel test robot <oliver.sang@intel.com>
> >      Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com
> >    - add perf stat data for the test code in Linus's 0ddad21d3e99 in
> >      commit message.
> > v2:
> >    - add UnixBench test data in commit message
> >    - fixes the test error reported by kernel test robot <lkp@intel.com>
> >      by adding the missing fs.h header file.
> > ---
> >   fs/pipe.c                 | 22 +---------------------
> >   include/linux/pipe_fs_i.h | 12 ++++++++++++
> >   kernel/watch_queue.c      |  8 ++++----
> >   3 files changed, 17 insertions(+), 25 deletions(-)
> >
> > diff --git a/fs/pipe.c b/fs/pipe.c
> > index 42c7ff41c2db..4355ee5f754e 100644
> > --- a/fs/pipe.c
> > +++ b/fs/pipe.c
> > @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> >   }
> >   EXPORT_SYMBOL(pipe_unlock);
> >
> > -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > -}
> > -
> > -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_unlock(&pipe->mutex);
> > -}
> > -
> >   void pipe_double_lock(struct pipe_inode_info *pipe1,
> >                     struct pipe_inode_info *pipe2)
> >   {
> > @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >        */
> >       was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> >       for (;;) {
> > -             /* Read ->head with a barrier vs post_one_notification() */
> > -             unsigned int head = smp_load_acquire(&pipe->head);
> > +             unsigned int head = pipe->head;
> >               unsigned int tail = pipe->tail;
> >               unsigned int mask = pipe->ring_size - 1;
> >
> > @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >
> >                       if (!buf->len) {
> >                               pipe_buf_release(pipe, buf);
> > -                             spin_lock_irq(&pipe->rd_wait.lock);
> >   #ifdef CONFIG_WATCH_QUEUE
> >                               if (buf->flags & PIPE_BUF_FLAG_LOSS)
> >                                       pipe->note_loss = true;
> >   #endif
> >                               tail++;
> >                               pipe->tail = tail;
> > -                             spin_unlock_irq(&pipe->rd_wait.lock);
> >                       }
> >                       total_len -= chars;
> >                       if (!total_len)
> > @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> >                        * it, either the reader will consume it or it'll still
> >                        * be there for the next write.
> >                        */
> > -                     spin_lock_irq(&pipe->rd_wait.lock);
> >
> >                       head = pipe->head;
> >                       if (pipe_full(head, pipe->tail, pipe->max_usage)) {
> > -                             spin_unlock_irq(&pipe->rd_wait.lock);
> >                               continue;
> >                       }
> >
> >                       pipe->head = head + 1;
> > -                     spin_unlock_irq(&pipe->rd_wait.lock);
> >
> >                       /* Insert it into the buffer array */
> >                       buf = &pipe->bufs[head & mask];
> > @@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
> >       if (unlikely(!bufs))
> >               return -ENOMEM;
> >
> > -     spin_lock_irq(&pipe->rd_wait.lock);
> >       mask = pipe->ring_size - 1;
> >       head = pipe->head;
> >       tail = pipe->tail;
> >
> >       n = pipe_occupancy(head, tail);
> >       if (nr_slots < n) {
> > -             spin_unlock_irq(&pipe->rd_wait.lock);
> >               kfree(bufs);
> >               return -EBUSY;
> >       }
> > @@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
> >       pipe->tail = tail;
> >       pipe->head = head;
> >
> > -     spin_unlock_irq(&pipe->rd_wait.lock);
> > -
> >       /* This might have made more room for writers */
> >       wake_up_interruptible(&pipe->wr_wait);
> >       return 0;
> > diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> > index 6cb65df3e3ba..f5084daf6eaf 100644
> > --- a/include/linux/pipe_fs_i.h
> > +++ b/include/linux/pipe_fs_i.h
> > @@ -2,6 +2,8 @@
> >   #ifndef _LINUX_PIPE_FS_I_H
> >   #define _LINUX_PIPE_FS_I_H
> >
> > +#include <linux/fs.h>
> > +
> >   #define PIPE_DEF_BUFFERS    16
> >
> >   #define PIPE_BUF_FLAG_LRU   0x01    /* page is on the LRU */
> > @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
> >   #define PIPE_SIZE           PAGE_SIZE
> >
> >   /* Pipe lock and unlock operations */
> > +static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > +{
> > +     mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > +}
> > +
> > +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > +{
> > +     mutex_unlock(&pipe->mutex);
> > +}
> > +
> >   void pipe_lock(struct pipe_inode_info *);
> >   void pipe_unlock(struct pipe_inode_info *);
> >   void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
> > diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
> > index a6f9bdd956c3..92e46cfe9419 100644
> > --- a/kernel/watch_queue.c
> > +++ b/kernel/watch_queue.c
> > @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
> >       if (!pipe)
> >               return false;
> >
> > -     spin_lock_irq(&pipe->rd_wait.lock);
> > +     __pipe_lock(pipe);
> >
> >       mask = pipe->ring_size - 1;
> >       head = pipe->head;
> > @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
> >       buf->offset = offset;
> >       buf->len = len;
> >       buf->flags = PIPE_BUF_FLAG_WHOLE;
> > -     smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
> > +     pipe->head = head + 1;
> >
> >       if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
> > -             spin_unlock_irq(&pipe->rd_wait.lock);
> > +             __pipe_unlock(pipe);
> >               BUG();
> >       }
> >       wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> >       done = true;
> >
> >   out:
> > -     spin_unlock_irq(&pipe->rd_wait.lock);
> > +     __pipe_unlock(pipe);
> >       if (done)
> >               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> >       return done;
> >
> > base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
> >
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
  2023-01-07  1:23 Hongchen Zhang
@ 2023-01-13  3:19 ` Hongchen Zhang
  2023-01-13  9:32   ` Sedat Dilek
  2023-01-16  3:16   ` maobibo
  0 siblings, 2 replies; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-13  3:19 UTC (permalink / raw)
  To: Andrew Morton, David Howells, Sedat Dilek, Matthew Wilcox
  Cc: Alexander Viro, Christian Brauner (Microsoft),
	Luis Chamberlain, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET, linux-fsdevel,
	linux-kernel

Hi All,
any question about this patch, can it be merged?

Thanks
On 2023/1/7 am 9:23, Hongchen Zhang wrote:
> Use spinlock in pipe_read/write cost too much time,IMO
> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_{lock,unlock} to protect
> the pipe->{head,tail} in pipe_resize_ring and
> post_one_notification.
> 
> Reminded by Matthew, I tested this patch using UnixBench's pipe
> test case on a x86_64 machine,and get the following data:
> 1) before this patch
> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> Pipe Throughput                   12440.0     493023.3    396.3
>                                                          ========
> System Benchmarks Index Score (Partial Only)              396.3
> 
> 2) after this patch
> System Benchmarks Partial Index  BASELINE       RESULT    INDEX
> Pipe Throughput                   12440.0     507551.4    408.0
>                                                          ========
> System Benchmarks Index Score (Partial Only)              408.0
> 
> so we get ~3% speedup.
> 
> Reminded by Andrew, I tested this patch with the test code in
> Linus's 0ddad21d3e99 add get following result:
> 1) before this patch
>           13,136.54 msec task-clock           #    3.870 CPUs utilized
>           1,186,779      context-switches     #   90.342 K/sec
>             668,867      cpu-migrations       #   50.917 K/sec
>                 895      page-faults          #   68.131 /sec
>      29,875,711,543      cycles               #    2.274 GHz
>      12,372,397,462      instructions         #    0.41  insn per cycle
>       2,480,235,723      branches             #  188.804 M/sec
>          47,191,943      branch-misses        #    1.90% of all branches
> 
>         3.394806886 seconds time elapsed
> 
>         0.037869000 seconds user
>         0.189346000 seconds sys
> 
> 2) after this patch
> 
>           12,395.63 msec task-clock          #    4.138 CPUs utilized
>           1,193,381      context-switches    #   96.274 K/sec
>             585,543      cpu-migrations      #   47.238 K/sec
>               1,063      page-faults         #   85.756 /sec
>      27,691,587,226      cycles              #    2.234 GHz
>      11,738,307,999      instructions        #    0.42  insn per cycle
>       2,351,299,522      branches            #  189.688 M/sec
>          45,404,526      branch-misses       #    1.93% of all branches
> 
>         2.995280878 seconds time elapsed
> 
>         0.010615000 seconds user
>         0.206999000 seconds sys
> After adding this patch, the time used on this test program becomes less.
> 
> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> 
> v3:
>    - fixes the error reported by kernel test robot <oliver.sang@intel.com>
>      Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com
>    - add perf stat data for the test code in Linus's 0ddad21d3e99 in
>      commit message.
> v2:
>    - add UnixBench test data in commit message
>    - fixes the test error reported by kernel test robot <lkp@intel.com>
>      by adding the missing fs.h header file.
> ---
>   fs/pipe.c                 | 22 +---------------------
>   include/linux/pipe_fs_i.h | 12 ++++++++++++
>   kernel/watch_queue.c      |  8 ++++----
>   3 files changed, 17 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 42c7ff41c2db..4355ee5f754e 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>   }
>   EXPORT_SYMBOL(pipe_unlock);
>   
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> -	mutex_unlock(&pipe->mutex);
> -}
> -
>   void pipe_double_lock(struct pipe_inode_info *pipe1,
>   		      struct pipe_inode_info *pipe2)
>   {
> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   	 */
>   	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>   	for (;;) {
> -		/* Read ->head with a barrier vs post_one_notification() */
> -		unsigned int head = smp_load_acquire(&pipe->head);
> +		unsigned int head = pipe->head;
>   		unsigned int tail = pipe->tail;
>   		unsigned int mask = pipe->ring_size - 1;
>   
> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   
>   			if (!buf->len) {
>   				pipe_buf_release(pipe, buf);
> -				spin_lock_irq(&pipe->rd_wait.lock);
>   #ifdef CONFIG_WATCH_QUEUE
>   				if (buf->flags & PIPE_BUF_FLAG_LOSS)
>   					pipe->note_loss = true;
>   #endif
>   				tail++;
>   				pipe->tail = tail;
> -				spin_unlock_irq(&pipe->rd_wait.lock);
>   			}
>   			total_len -= chars;
>   			if (!total_len)
> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>   			 * it, either the reader will consume it or it'll still
>   			 * be there for the next write.
>   			 */
> -			spin_lock_irq(&pipe->rd_wait.lock);
>   
>   			head = pipe->head;
>   			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
> -				spin_unlock_irq(&pipe->rd_wait.lock);
>   				continue;
>   			}
>   
>   			pipe->head = head + 1;
> -			spin_unlock_irq(&pipe->rd_wait.lock);
>   
>   			/* Insert it into the buffer array */
>   			buf = &pipe->bufs[head & mask];
> @@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>   	if (unlikely(!bufs))
>   		return -ENOMEM;
>   
> -	spin_lock_irq(&pipe->rd_wait.lock);
>   	mask = pipe->ring_size - 1;
>   	head = pipe->head;
>   	tail = pipe->tail;
>   
>   	n = pipe_occupancy(head, tail);
>   	if (nr_slots < n) {
> -		spin_unlock_irq(&pipe->rd_wait.lock);
>   		kfree(bufs);
>   		return -EBUSY;
>   	}
> @@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>   	pipe->tail = tail;
>   	pipe->head = head;
>   
> -	spin_unlock_irq(&pipe->rd_wait.lock);
> -
>   	/* This might have made more room for writers */
>   	wake_up_interruptible(&pipe->wr_wait);
>   	return 0;
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index 6cb65df3e3ba..f5084daf6eaf 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -2,6 +2,8 @@
>   #ifndef _LINUX_PIPE_FS_I_H
>   #define _LINUX_PIPE_FS_I_H
>   
> +#include <linux/fs.h>
> +
>   #define PIPE_DEF_BUFFERS	16
>   
>   #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
>   #define PIPE_SIZE		PAGE_SIZE
>   
>   /* Pipe lock and unlock operations */
> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
> +{
> +	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> +}
> +
> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> +{
> +	mutex_unlock(&pipe->mutex);
> +}
> +
>   void pipe_lock(struct pipe_inode_info *);
>   void pipe_unlock(struct pipe_inode_info *);
>   void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
> index a6f9bdd956c3..92e46cfe9419 100644
> --- a/kernel/watch_queue.c
> +++ b/kernel/watch_queue.c
> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
>   	if (!pipe)
>   		return false;
>   
> -	spin_lock_irq(&pipe->rd_wait.lock);
> +	__pipe_lock(pipe);
>   
>   	mask = pipe->ring_size - 1;
>   	head = pipe->head;
> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
>   	buf->offset = offset;
>   	buf->len = len;
>   	buf->flags = PIPE_BUF_FLAG_WHOLE;
> -	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
> +	pipe->head = head + 1;
>   
>   	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
> -		spin_unlock_irq(&pipe->rd_wait.lock);
> +		__pipe_unlock(pipe);
>   		BUG();
>   	}
>   	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>   	done = true;
>   
>   out:
> -	spin_unlock_irq(&pipe->rd_wait.lock);
> +	__pipe_unlock(pipe);
>   	if (done)
>   		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>   	return done;
> 
> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
@ 2023-01-07  1:23 Hongchen Zhang
  2023-01-13  3:19 ` Hongchen Zhang
  0 siblings, 1 reply; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-07  1:23 UTC (permalink / raw)
  To: Alexander Viro, Andrew Morton, Christian Brauner (Microsoft),
	Kuniyuki Iwashima, Hongchen Zhang, Luis Chamberlain,
	David Howells, Mauro Carvalho Chehab, Eric Dumazet,
	Fabio M. De Francesco, Christophe JAILLET
  Cc: linux-fsdevel, linux-kernel

Use spinlock in pipe_read/write cost too much time,IMO
pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
On the other hand, we can use __pipe_{lock,unlock} to protect
the pipe->{head,tail} in pipe_resize_ring and
post_one_notification.

Reminded by Matthew, I tested this patch using UnixBench's pipe
test case on a x86_64 machine,and get the following data:
1) before this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     493023.3    396.3
                                                        ========
System Benchmarks Index Score (Partial Only)              396.3

2) after this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     507551.4    408.0
                                                        ========
System Benchmarks Index Score (Partial Only)              408.0

so we get ~3% speedup.

Reminded by Andrew, I tested this patch with the test code in
Linus's 0ddad21d3e99 add get following result:
1) before this patch
         13,136.54 msec task-clock           #    3.870 CPUs utilized
         1,186,779      context-switches     #   90.342 K/sec
           668,867      cpu-migrations       #   50.917 K/sec
               895      page-faults          #   68.131 /sec
    29,875,711,543      cycles               #    2.274 GHz
    12,372,397,462      instructions         #    0.41  insn per cycle
     2,480,235,723      branches             #  188.804 M/sec
        47,191,943      branch-misses        #    1.90% of all branches

       3.394806886 seconds time elapsed

       0.037869000 seconds user
       0.189346000 seconds sys

2) after this patch

         12,395.63 msec task-clock          #    4.138 CPUs utilized
         1,193,381      context-switches    #   96.274 K/sec
           585,543      cpu-migrations      #   47.238 K/sec
             1,063      page-faults         #   85.756 /sec
    27,691,587,226      cycles              #    2.234 GHz
    11,738,307,999      instructions        #    0.42  insn per cycle
     2,351,299,522      branches            #  189.688 M/sec
        45,404,526      branch-misses       #    1.93% of all branches

       2.995280878 seconds time elapsed

       0.010615000 seconds user
       0.206999000 seconds sys
After adding this patch, the time used on this test program becomes less.

Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>

v3:
  - fixes the error reported by kernel test robot <oliver.sang@intel.com>
    Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com
  - add perf stat data for the test code in Linus's 0ddad21d3e99 in
    commit message.
v2:
  - add UnixBench test data in commit message
  - fixes the test error reported by kernel test robot <lkp@intel.com>
    by adding the missing fs.h header file.
---
 fs/pipe.c                 | 22 +---------------------
 include/linux/pipe_fs_i.h | 12 ++++++++++++
 kernel/watch_queue.c      |  8 ++++----
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..4355ee5f754e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 	for (;;) {
-		/* Read ->head with a barrier vs post_one_notification() */
-		unsigned int head = smp_load_acquire(&pipe->head);
+		unsigned int head = pipe->head;
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
@@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->rd_wait.lock);
 #ifdef CONFIG_WATCH_QUEUE
 				if (buf->flags & PIPE_BUF_FLAG_LOSS)
 					pipe->note_loss = true;
 #endif
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	if (unlikely(!bufs))
 		return -ENOMEM;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
 	tail = pipe->tail;
 
 	n = pipe_occupancy(head, tail);
 	if (nr_slots < n) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
 		kfree(bufs);
 		return -EBUSY;
 	}
@@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	pipe->tail = tail;
 	pipe->head = head;
 
-	spin_unlock_irq(&pipe->rd_wait.lock);
-
 	/* This might have made more room for writers */
 	wake_up_interruptible(&pipe->wr_wait);
 	return 0;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..f5084daf6eaf 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_PIPE_FS_I_H
 #define _LINUX_PIPE_FS_I_H
 
+#include <linux/fs.h>
+
 #define PIPE_DEF_BUFFERS	16
 
 #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
@@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
 #define PIPE_SIZE		PAGE_SIZE
 
 /* Pipe lock and unlock operations */
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+	mutex_unlock(&pipe->mutex);
+}
+
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..92e46cfe9419 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	if (!pipe)
 		return false;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
+	__pipe_lock(pipe);
 
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
@@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	buf->offset = offset;
 	buf->len = len;
 	buf->flags = PIPE_BUF_FLAG_WHOLE;
-	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+	pipe->head = head + 1;
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
+		__pipe_unlock(pipe);
 		BUG();
 	}
 	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	done = true;
 
 out:
-	spin_unlock_irq(&pipe->rd_wait.lock);
+	__pipe_unlock(pipe);
 	if (done)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	return done;

base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock
@ 2023-01-06  9:28 Hongchen Zhang
  0 siblings, 0 replies; 18+ messages in thread
From: Hongchen Zhang @ 2023-01-06  9:28 UTC (permalink / raw)
  To: Alexander Viro, Andrew Morton, Kuniyuki Iwashima, Hongchen Zhang,
	Luis Chamberlain, David Howells, Christophe JAILLET,
	Randy Dunlap, Eric Dumazet
  Cc: linux-fsdevel, linux-kernel

Use spinlock in pipe_read/write cost too much time,IMO
pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
On the other hand, we can use __pipe_{lock,unlock} to protect
the pipe->{head,tail} in pipe_resize_ring and
post_one_notification.

I tested this patch using UnixBench's pipe test case on a x86_64
machine,and get the following data:
1) before this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     493023.3    396.3
                                                        ========
System Benchmarks Index Score (Partial Only)              396.3

2) after this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     507551.4    408.0
                                                        ========
System Benchmarks Index Score (Partial Only)              408.0

so we get ~3% speedup.

Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
---
 fs/pipe.c                 | 22 +---------------------
 include/linux/pipe_fs_i.h | 10 ++++++++++
 kernel/watch_queue.c      |  8 ++++----
 3 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..4355ee5f754e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 	for (;;) {
-		/* Read ->head with a barrier vs post_one_notification() */
-		unsigned int head = smp_load_acquire(&pipe->head);
+		unsigned int head = pipe->head;
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
@@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->rd_wait.lock);
 #ifdef CONFIG_WATCH_QUEUE
 				if (buf->flags & PIPE_BUF_FLAG_LOSS)
 					pipe->note_loss = true;
 #endif
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -1260,14 +1244,12 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	if (unlikely(!bufs))
 		return -ENOMEM;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
 	tail = pipe->tail;
 
 	n = pipe_occupancy(head, tail);
 	if (nr_slots < n) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
 		kfree(bufs);
 		return -EBUSY;
 	}
@@ -1303,8 +1285,6 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	pipe->tail = tail;
 	pipe->head = head;
 
-	spin_unlock_irq(&pipe->rd_wait.lock);
-
 	/* This might have made more room for writers */
 	wake_up_interruptible(&pipe->wr_wait);
 	return 0;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..baae3d062422 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -223,6 +223,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
 #define PIPE_SIZE		PAGE_SIZE
 
 /* Pipe lock and unlock operations */
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+	mutex_unlock(&pipe->mutex);
+}
+
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..92e46cfe9419 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	if (!pipe)
 		return false;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
+	__pipe_lock(pipe);
 
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
@@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	buf->offset = offset;
 	buf->len = len;
 	buf->flags = PIPE_BUF_FLAG_WHOLE;
-	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+	pipe->head = head + 1;
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
+		__pipe_unlock(pipe);
 		BUG();
 	}
 	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	done = true;
 
 out:
-	spin_unlock_irq(&pipe->rd_wait.lock);
+	__pipe_unlock(pipe);
 	if (done)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	return done;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2023-01-29  2:29 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-06  9:48 [PATCH v3] pipe: use __pipe_{lock,unlock} instead of spinlock Hongchen Zhang
2023-01-06 19:13 ` Luis Chamberlain
2023-01-06 20:33   ` Sedat Dilek
2023-01-07  3:31     ` Hongchen Zhang
2023-01-07  0:58   ` Hongchen Zhang
  -- strict thread matches above, loose matches on Subject: below --
2023-01-07  1:23 Hongchen Zhang
2023-01-13  3:19 ` Hongchen Zhang
2023-01-13  9:32   ` Sedat Dilek
2023-01-16  1:52     ` Sedat Dilek
2023-01-16  2:16       ` Hongchen Zhang
2023-01-16  2:42         ` Sedat Dilek
2023-01-16  3:16   ` maobibo
2023-01-16  4:38     ` Matthew Wilcox
2023-01-16 21:10       ` Al Viro
2023-01-16 22:16         ` Andrew Morton
2023-01-17  6:54           ` Sedat Dilek
2023-01-29  2:29           ` Hongchen Zhang
2023-01-06  9:28 Hongchen Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).