All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
@ 2022-05-20 13:50 Jason A. Donenfeld
  2022-05-20 14:38 ` Jens Axboe
  2022-05-20 15:09 ` Al Viro
  0 siblings, 2 replies; 9+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 13:50 UTC (permalink / raw)
  To: gregkh, linux-kernel; +Cc: Jason A. Donenfeld, Al Viro, Jens Axboe

Currently mem.c implements both the {read,write}_iter functions and the
{read,write} functions. But with {read,write} going away at some point
in the future, and most kernel code made to prefer {read,write}_iter,
there's no point in keeping around the old code. Actually, this comment
in __kernel_read() indicates that having both might be plain wrong:

        /*
         * Also fail if ->read_iter and ->read are both wired up as that
         * implies very convoluted semantics.
         */
        if (unlikely(!file->f_op->read_iter || file->f_op->read))
                return warn_unsupported(file, "read");

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/mem.c | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cc296f0823bd..0eb36b784b29 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -444,18 +444,6 @@ static ssize_t write_port(struct file *file, const char __user *buf,
 	return tmp-buf;
 }
 
-static ssize_t read_null(struct file *file, char __user *buf,
-			 size_t count, loff_t *ppos)
-{
-	return 0;
-}
-
-static ssize_t write_null(struct file *file, const char __user *buf,
-			  size_t count, loff_t *ppos)
-{
-	return count;
-}
-
 static ssize_t read_iter_null(struct kiocb *iocb, struct iov_iter *to)
 {
 	return 0;
@@ -504,33 +492,6 @@ static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter)
 	return written;
 }
 
-static ssize_t read_zero(struct file *file, char __user *buf,
-			 size_t count, loff_t *ppos)
-{
-	size_t cleared = 0;
-
-	while (count) {
-		size_t chunk = min_t(size_t, count, PAGE_SIZE);
-		size_t left;
-
-		left = clear_user(buf + cleared, chunk);
-		if (unlikely(left)) {
-			cleared += (chunk - left);
-			if (!cleared)
-				return -EFAULT;
-			break;
-		}
-		cleared += chunk;
-		count -= chunk;
-
-		if (signal_pending(current))
-			break;
-		cond_resched();
-	}
-
-	return cleared;
-}
-
 static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 {
 #ifndef CONFIG_MMU
@@ -640,7 +601,6 @@ static int open_port(struct inode *inode, struct file *filp)
 
 #define zero_lseek	null_lseek
 #define full_lseek      null_lseek
-#define write_zero	write_null
 #define write_iter_zero	write_iter_null
 #define open_mem	open_port
 
@@ -658,8 +618,6 @@ static const struct file_operations __maybe_unused mem_fops = {
 
 static const struct file_operations null_fops = {
 	.llseek		= null_lseek,
-	.read		= read_null,
-	.write		= write_null,
 	.read_iter	= read_iter_null,
 	.write_iter	= write_iter_null,
 	.splice_write	= splice_write_null,
@@ -674,9 +632,7 @@ static const struct file_operations __maybe_unused port_fops = {
 
 static const struct file_operations zero_fops = {
 	.llseek		= zero_lseek,
-	.write		= write_zero,
 	.read_iter	= read_iter_zero,
-	.read		= read_zero,
 	.write_iter	= write_iter_zero,
 	.mmap		= mmap_zero,
 	.get_unmapped_area = get_unmapped_area_zero,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 13:50 [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions Jason A. Donenfeld
@ 2022-05-20 14:38 ` Jens Axboe
  2022-05-20 15:09 ` Al Viro
  1 sibling, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-05-20 14:38 UTC (permalink / raw)
  To: Jason A. Donenfeld, gregkh, linux-kernel; +Cc: Al Viro

On 5/20/22 7:50 AM, Jason A. Donenfeld wrote:
> Currently mem.c implements both the {read,write}_iter functions and the
> {read,write} functions. But with {read,write} going away at some point
> in the future, and most kernel code made to prefer {read,write}_iter,
> there's no point in keeping around the old code. Actually, this comment
> in __kernel_read() indicates that having both might be plain wrong:
> 
>         /*
>          * Also fail if ->read_iter and ->read are both wired up as that
>          * implies very convoluted semantics.
>          */
>         if (unlikely(!file->f_op->read_iter || file->f_op->read))
>                 return warn_unsupported(file, "read");

Nice, just another bit of wasted space due to not having clearly
defined iter vs non-iter.

Reviewed-by: Jens Axboe <axboe@kernel.dk>

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 13:50 [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions Jason A. Donenfeld
  2022-05-20 14:38 ` Jens Axboe
@ 2022-05-20 15:09 ` Al Viro
  2022-05-20 15:11   ` Jens Axboe
  2022-05-20 15:24   ` Jason A. Donenfeld
  1 sibling, 2 replies; 9+ messages in thread
From: Al Viro @ 2022-05-20 15:09 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: gregkh, linux-kernel, Jens Axboe

On Fri, May 20, 2022 at 03:50:30PM +0200, Jason A. Donenfeld wrote:
> Currently mem.c implements both the {read,write}_iter functions and the
> {read,write} functions. But with {read,write} going away at some point
> in the future,

Not likely to happen, unfortunately.

> and most kernel code made to prefer {read,write}_iter,
> there's no point in keeping around the old code.

Profile and you'll see ;-/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:09 ` Al Viro
@ 2022-05-20 15:11   ` Jens Axboe
  2022-05-20 15:32     ` Jens Axboe
  2022-05-20 15:24   ` Jason A. Donenfeld
  1 sibling, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2022-05-20 15:11 UTC (permalink / raw)
  To: Al Viro, Jason A. Donenfeld; +Cc: gregkh, linux-kernel

On 5/20/22 9:09 AM, Al Viro wrote:
> On Fri, May 20, 2022 at 03:50:30PM +0200, Jason A. Donenfeld wrote:
>> Currently mem.c implements both the {read,write}_iter functions and the
>> {read,write} functions. But with {read,write} going away at some point
>> in the future,
> 
> Not likely to happen, unfortunately.
> 
>> and most kernel code made to prefer {read,write}_iter,
>> there's no point in keeping around the old code.
> 
> Profile and you'll see ;-/

Weren't you working on bits to get us to performance parity there?
What's the status of that?

It really is an unfortunate situation we're currently in with two
methods for either read or write, with one being greatly preferred as we
can pass in non-file associated state (like IOCB_NOWAIT, etc) but the
older variant being a bit faster. It lives us in a bad place, imho.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:09 ` Al Viro
  2022-05-20 15:11   ` Jens Axboe
@ 2022-05-20 15:24   ` Jason A. Donenfeld
  1 sibling, 0 replies; 9+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:24 UTC (permalink / raw)
  To: Al Viro; +Cc: gregkh, linux-kernel, Jens Axboe

Hi Al,

On Fri, May 20, 2022 at 03:09:19PM +0000, Al Viro wrote:
> On Fri, May 20, 2022 at 03:50:30PM +0200, Jason A. Donenfeld wrote:
> > Currently mem.c implements both the {read,write}_iter functions and the
> > {read,write} functions. But with {read,write} going away at some point
> > in the future,
> 
> Not likely to happen, unfortunately.
> 
> > and most kernel code made to prefer {read,write}_iter,
> > there's no point in keeping around the old code.
> 
> Profile and you'll see ;-/

Huh, yea, I lose around 1 GiB/s of perf on my machine sending /dev/zero
into /dev/null.

Jason

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:11   ` Jens Axboe
@ 2022-05-20 15:32     ` Jens Axboe
  2022-05-20 15:44       ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2022-05-20 15:32 UTC (permalink / raw)
  To: Al Viro, Jason A. Donenfeld; +Cc: gregkh, linux-kernel

On 5/20/22 9:11 AM, Jens Axboe wrote:
> On 5/20/22 9:09 AM, Al Viro wrote:
>> On Fri, May 20, 2022 at 03:50:30PM +0200, Jason A. Donenfeld wrote:
>>> Currently mem.c implements both the {read,write}_iter functions and the
>>> {read,write} functions. But with {read,write} going away at some point
>>> in the future,
>>
>> Not likely to happen, unfortunately.
>>
>>> and most kernel code made to prefer {read,write}_iter,
>>> there's no point in keeping around the old code.
>>
>> Profile and you'll see ;-/
> 
> Weren't you working on bits to get us to performance parity there?
> What's the status of that?

Totally unscientific test on the current kernel, running:

dd if=/dev/zero of=/dev/null bs=4k status=progress

With the current tree, I get 8.8GB/sec, and if I drop fops->read() for
/dev/zero, then I get 8.6GB/sec. That's 1%, which isn't nothing, but
it's also not a huge loss for moving us in the right direction.

Looking at a perf diff, it's mostly:

               +0.34%  [kernel.kallsyms]  [k] new_sync_read
               +0.33%  [kernel.kallsyms]  [k] init_sync_kiocb
               +0.07%  [kernel.kallsyms]  [k] iov_iter_init
               +0.80%  [kernel.kallsyms]  [k] iov_iter_zero

with these being gone after switch to ->read_iter():

     0.63%             [kernel.kallsyms]  [k] read_zero
     0.13%             [kernel.kallsyms]  [k] __clear_user

Didn't look closer, but I'm assuming this is _mostly_ tied to needing to
init 48 bytes of kiocb for each one. There might be ways to embed a
sync_kiocb inside the kiocb for the bits we need there, at least that
could get us down to 32 bytes.

> It really is an unfortunate situation we're currently in with two
> methods for either read or write, with one being greatly preferred as we
> can pass in non-file associated state (like IOCB_NOWAIT, etc) but the
> older variant being a bit faster. It lives us in a bad place, imho.

And splice etc, for example...

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:32     ` Jens Axboe
@ 2022-05-20 15:44       ` Al Viro
  2022-05-20 15:46         ` Jens Axboe
  2022-05-21 17:51         ` Al Viro
  0 siblings, 2 replies; 9+ messages in thread
From: Al Viro @ 2022-05-20 15:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jason A. Donenfeld, gregkh, linux-kernel

On Fri, May 20, 2022 at 09:32:34AM -0600, Jens Axboe wrote:

> Didn't look closer, but I'm assuming this is _mostly_ tied to needing to
> init 48 bytes of kiocb for each one. There might be ways to embed a
> sync_kiocb inside the kiocb for the bits we need there, at least that
> could get us down to 32 bytes.

My bet would be on iocb_flags() (and kiocb_set_rw_flags()) tests and
pointer-chasing, actually.  I'd been sick on and off since early November,
trying to dig myself from under the piles right now.  Christoph's
patches in that area are somewhere in the pile ;-/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:44       ` Al Viro
@ 2022-05-20 15:46         ` Jens Axboe
  2022-05-21 17:51         ` Al Viro
  1 sibling, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-05-20 15:46 UTC (permalink / raw)
  To: Al Viro; +Cc: Jason A. Donenfeld, gregkh, linux-kernel

On 5/20/22 9:44 AM, Al Viro wrote:
> On Fri, May 20, 2022 at 09:32:34AM -0600, Jens Axboe wrote:
> 
>> Didn't look closer, but I'm assuming this is _mostly_ tied to needing to
>> init 48 bytes of kiocb for each one. There might be ways to embed a
>> sync_kiocb inside the kiocb for the bits we need there, at least that
>> could get us down to 32 bytes.
> 
> My bet would be on iocb_flags() (and kiocb_set_rw_flags()) tests and
> pointer-chasing, actually.

That would be my guess too, around init/setup of the kiocb. But as per
previous email, for some reason it seems _worse_ on bigger reads (1k and
4k vs 32 bytes), which would seem to indicate that it's not necessarily
just the setup but something else too. And hopefully that "something
else" is workable.

> I'd been sick on and off since early November, trying to dig myself
> from under the piles right now.  Christoph's patches in that area are
> somewhere in the pile ;-/

Sorry to hear that, hope you're feeling better.

Do you have a pointer to those patches?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions
  2022-05-20 15:44       ` Al Viro
  2022-05-20 15:46         ` Jens Axboe
@ 2022-05-21 17:51         ` Al Viro
  1 sibling, 0 replies; 9+ messages in thread
From: Al Viro @ 2022-05-21 17:51 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jason A. Donenfeld, gregkh, linux-kernel

On Fri, May 20, 2022 at 03:44:04PM +0000, Al Viro wrote:
> On Fri, May 20, 2022 at 09:32:34AM -0600, Jens Axboe wrote:
> 
> > Didn't look closer, but I'm assuming this is _mostly_ tied to needing to
> > init 48 bytes of kiocb for each one. There might be ways to embed a
> > sync_kiocb inside the kiocb for the bits we need there, at least that
> > could get us down to 32 bytes.
> 
> My bet would be on iocb_flags() (and kiocb_set_rw_flags()) tests and
> pointer-chasing, actually.  I'd been sick on and off since early November,
> trying to dig myself from under the piles right now.  Christoph's
> patches in that area are somewhere in the pile ;-/

FWIW, I can't find them, assuming I'm not misremembering in the first place.

iocb_flags() is an atrocity, indeed.  Look what happens in new_sync_write():

[edx holds file->f_flags]
        movl    %edx, %eax
        shrl    $6, %eax
        andl    $16, %eax	// eax = (edx >> 6) & 16; maps O_APPEND to IOCB_APPEND
        movl    %eax, %ecx
        orl     $131072, %ecx
        testb   $64, %dh
        cmovne  %ecx, %eax	// eax = edh & 0x4000 ? eax | 0x20000;
        testb   $16, %dh        // if (edx & O_DSYNC)
        jne     .L175           //	goto L175;	branch not taken
        movq    216(%rdi), %rcx // rcx = file->f_mapping;	// fetch
        movq    (%rcx), %rcx    // rcx = rcx->host;		// fetch
        movq    40(%rcx), %rsi	// rsi = rcx->i_sb;		// fetch
        testb   $16, 80(%rsi)   // if (!(rsi->s_flags & 0x10))	// fetch
        je      .L198           //	goto L198;		// branch likely taken
L175:
        orl     $2, %eax        // eax |= IOCB_DSYNC;
L176:  
        movl    %eax, %ecx
        orl     $4, %ecx
        andl    $1048576, %edx
        cmovne  %ecx, %eax	// eax = edx & __O_SYNC ? eax | IOCB_SYNC : eax;

        movq %gs:current_task, %rdx     # rdx = current;
        movq    2192(%rdx), %rcx        # rcx = rdx->io_contenxt;
        movl    $16388, %edx    #       edx = IOPRIO_DEFAULT;
        testq   %rcx, %rcx      #       if (!rcx)
        je      .L178           #               goto L178;
        movzwl  12(%rcx), %edx  #       edx = rcx->ioprio;
L178:
	...
	fill iov_iter
	call ->write_iter() and bugger off

L198:
        testb   $1, 12(%rcx)    //  if (rcx->i_flags & S_SYNC)
        je      .L176
        jmp     .L175

IOCB_DSYNC part is bloody awful.  To add insult to injury, we end up
doing it in new_sync_read() as well ;-/  Its validity is dubious, BTW -
we only get away with that since O_DSYNC is ignored for all in-tree
character devices and since for block ones ->f_mapping->host->i_sb
ends up pointing to blockdev_superblock, which won't be mounted
sync.

I've sent a patch into old thread ([RFC] what to do with IOCB_DSYNC?);
it's completely untested, though.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-05-21 17:52 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-20 13:50 [PATCH] char/mem: only use {read,write}_iter, not the old {read,write} functions Jason A. Donenfeld
2022-05-20 14:38 ` Jens Axboe
2022-05-20 15:09 ` Al Viro
2022-05-20 15:11   ` Jens Axboe
2022-05-20 15:32     ` Jens Axboe
2022-05-20 15:44       ` Al Viro
2022-05-20 15:46         ` Jens Axboe
2022-05-21 17:51         ` Al Viro
2022-05-20 15:24   ` Jason A. Donenfeld

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.