All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/3] random: convert to using iters, for Al Viro
@ 2022-05-20  9:44 Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 1/3] random: convert to using fops->read_iter() Jason A. Donenfeld
                   ` (4 more replies)
  0 siblings, 5 replies; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20  9:44 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro
  Cc: Jason A. Donenfeld

Hi Al,

I've incorporated your suggestions into Jens' patches and simplified a
lot of the control flow. Could you take a look at these and let me know
if it looks sane? In particular, I'm using the property you mentioned in
which copy_{to,from}_iter() can take a maximum and do less if the
remaining length is too small.

Jason

Jens Axboe (3):
  random: convert to using fops->read_iter()
  random: convert to using fops->write_iter()
  random: wire up fops->splice_{read,write}_iter()

 drivers/char/random.c | 126 +++++++++++++++++++++---------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v4 1/3] random: convert to using fops->read_iter()
  2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
@ 2022-05-20  9:44 ` Jason A. Donenfeld
  2022-05-20 13:37   ` Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 2/3] random: convert to using fops->write_iter() Jason A. Donenfeld
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20  9:44 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro
  Cc: Jason A . Donenfeld

From: Jens Axboe <axboe@kernel.dk>

This is a pre-requisite to wiring up splice() again for the random
and urandom drivers. It also allows us to remove the INT_MAX check in
getrandom(), because import_single_range() applies capping internally.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
[Jason: rewrote get_random_bytes_user() to simplify and also incorporate
 additional suggestions from Al.]
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 55 +++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0958fa91a964..f8cd747acec2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -397,13 +397,13 @@ void get_random_bytes(void *buf, size_t len)
 }
 EXPORT_SYMBOL(get_random_bytes);
 
-static ssize_t get_random_bytes_user(void __user *ubuf, size_t len)
+static ssize_t get_random_bytes_user(struct iov_iter *to)
 {
-	size_t block_len, left, ret = 0;
 	u32 chacha_state[CHACHA_STATE_WORDS];
 	u8 output[CHACHA_BLOCK_SIZE];
+	size_t ret = 0, copied;
 
-	if (!len)
+	if (!iov_iter_count(to))
 		return 0;
 
 	/*
@@ -417,8 +417,8 @@ static ssize_t get_random_bytes_user(void __user *ubuf, size_t len)
 	 * use chacha_state after, so we can simply return those bytes to
 	 * the user directly.
 	 */
-	if (len <= CHACHA_KEY_SIZE) {
-		ret = len - copy_to_user(ubuf, &chacha_state[4], len);
+	if (iov_iter_count(to) <= CHACHA_KEY_SIZE) {
+		ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, to);
 		goto out_zero_chacha;
 	}
 
@@ -427,17 +427,9 @@ static ssize_t get_random_bytes_user(void __user *ubuf, size_t len)
 		if (unlikely(chacha_state[12] == 0))
 			++chacha_state[13];
 
-		block_len = min_t(size_t, len, CHACHA_BLOCK_SIZE);
-		left = copy_to_user(ubuf, output, block_len);
-		if (left) {
-			ret += block_len - left;
-			break;
-		}
-
-		ubuf += block_len;
-		ret += block_len;
-		len -= block_len;
-		if (!len)
+		copied = copy_to_iter(output, CHACHA_BLOCK_SIZE, to);
+		ret += copied;
+		if (!iov_iter_count(to) || copied != CHACHA_BLOCK_SIZE)
 			break;
 
 		BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0);
@@ -1248,6 +1240,10 @@ static void __cold try_to_generate_entropy(void)
 
 SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags)
 {
+	struct iov_iter to;
+	struct iovec iov;
+	int ret;
+
 	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
 		return -EINVAL;
 
@@ -1258,19 +1254,18 @@ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags
 	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
 		return -EINVAL;
 
-	if (len > INT_MAX)
-		len = INT_MAX;
-
 	if (!crng_ready() && !(flags & GRND_INSECURE)) {
-		int ret;
-
 		if (flags & GRND_NONBLOCK)
 			return -EAGAIN;
 		ret = wait_for_random_bytes();
 		if (unlikely(ret))
 			return ret;
 	}
-	return get_random_bytes_user(ubuf, len);
+
+	ret = import_single_range(READ, ubuf, len, &iov, &to);
+	if (unlikely(ret))
+		return ret;
+	return get_random_bytes_user(&to);
 }
 
 static __poll_t random_poll(struct file *file, poll_table *wait)
@@ -1314,8 +1309,7 @@ static ssize_t random_write(struct file *file, const char __user *ubuf,
 	return (ssize_t)len;
 }
 
-static ssize_t urandom_read(struct file *file, char __user *ubuf,
-			    size_t len, loff_t *ppos)
+static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 {
 	static int maxwarn = 10;
 
@@ -1332,22 +1326,21 @@ static ssize_t urandom_read(struct file *file, char __user *ubuf,
 		else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
 			--maxwarn;
 			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
-				  current->comm, len);
+				  current->comm, iov_iter_count(to));
 		}
 	}
 
-	return get_random_bytes_user(ubuf, len);
+	return get_random_bytes_user(to);
 }
 
-static ssize_t random_read(struct file *file, char __user *ubuf,
-			   size_t len, loff_t *ppos)
+static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 {
 	int ret;
 
 	ret = wait_for_random_bytes();
 	if (ret != 0)
 		return ret;
-	return get_random_bytes_user(ubuf, len);
+	return get_random_bytes_user(to);
 }
 
 static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
@@ -1409,7 +1402,7 @@ static int random_fasync(int fd, struct file *filp, int on)
 }
 
 const struct file_operations random_fops = {
-	.read = random_read,
+	.read_iter = random_read_iter,
 	.write = random_write,
 	.poll = random_poll,
 	.unlocked_ioctl = random_ioctl,
@@ -1419,7 +1412,7 @@ const struct file_operations random_fops = {
 };
 
 const struct file_operations urandom_fops = {
-	.read = urandom_read,
+	.read_iter = urandom_read_iter,
 	.write = random_write,
 	.unlocked_ioctl = random_ioctl,
 	.compat_ioctl = compat_ptr_ioctl,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v4 2/3] random: convert to using fops->write_iter()
  2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 1/3] random: convert to using fops->read_iter() Jason A. Donenfeld
@ 2022-05-20  9:44 ` Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 3/3] random: wire up fops->splice_{read,write}_iter() Jason A. Donenfeld
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20  9:44 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro
  Cc: Jason A . Donenfeld

From: Jens Axboe <axboe@kernel.dk>

Now that the read side has been converted to fix a regression with
splice, convert the write side as well to have some symmetry in the
interface used (and help deprecate ->write()).

Signed-off-by: Jens Axboe <axboe@kernel.dk>
[Jason: cleaned up random_ioctl a bit, require full writes in
 RNDADDENTROPY since it's crediting entropy, simplify control flow of
 write_pool(), and incorporate suggestions from Al.]
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 67 ++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index f8cd747acec2..831cafcd1034 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1274,39 +1274,31 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 	return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM;
 }
 
-static int write_pool(const char __user *ubuf, size_t len)
+static ssize_t write_pool(struct iov_iter *from)
 {
-	size_t block_len;
-	int ret = 0;
 	u8 block[BLAKE2S_BLOCK_SIZE];
+	ssize_t ret = 0;
+	size_t copied;
 
-	while (len) {
-		block_len = min(len, sizeof(block));
-		if (copy_from_user(block, ubuf, block_len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		len -= block_len;
-		ubuf += block_len;
-		mix_pool_bytes(block, block_len);
+	if (!iov_iter_count(from))
+		return 0;
+
+	for (;;) {
+		copied = copy_from_iter(block, sizeof(block), from);
+		ret += copied;
+		mix_pool_bytes(block, copied);
+		if (!iov_iter_count(from) || copied != sizeof(block))
+			break;
 		cond_resched();
 	}
 
-out:
 	memzero_explicit(block, sizeof(block));
-	return ret;
+	return ret ? ret : -EFAULT;
 }
 
-static ssize_t random_write(struct file *file, const char __user *ubuf,
-			    size_t len, loff_t *ppos)
+static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 {
-	int ret;
-
-	ret = write_pool(ubuf, len);
-	if (ret)
-		return ret;
-
-	return (ssize_t)len;
+	return write_pool(from);
 }
 
 static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *to)
@@ -1345,9 +1337,8 @@ static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 
 static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
-	int size, ent_count;
 	int __user *p = (int __user *)arg;
-	int retval;
+	int ent_count;
 
 	switch (cmd) {
 	case RNDGETENTCNT:
@@ -1364,20 +1355,32 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EINVAL;
 		credit_init_bits(ent_count);
 		return 0;
-	case RNDADDENTROPY:
+	case RNDADDENTROPY: {
+		struct iov_iter from;
+		struct iovec iov;
+		ssize_t ret;
+		int len;
+
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		if (get_user(ent_count, p++))
 			return -EFAULT;
 		if (ent_count < 0)
 			return -EINVAL;
-		if (get_user(size, p++))
+		if (get_user(len, p++))
+			return -EFAULT;
+
+		ret = import_single_range(WRITE, p, len, &iov, &from);
+		if (unlikely(ret))
+			return ret;
+		ret = write_pool(&from);
+		if (unlikely(ret < 0))
+			return ret;
+		if (unlikely(ret != len))
 			return -EFAULT;
-		retval = write_pool((const char __user *)p, size);
-		if (retval < 0)
-			return retval;
 		credit_init_bits(ent_count);
 		return 0;
+	}
 	case RNDZAPENTCNT:
 	case RNDCLEARPOOL:
 		/* No longer has any effect. */
@@ -1403,7 +1406,7 @@ static int random_fasync(int fd, struct file *filp, int on)
 
 const struct file_operations random_fops = {
 	.read_iter = random_read_iter,
-	.write = random_write,
+	.write_iter = random_write_iter,
 	.poll = random_poll,
 	.unlocked_ioctl = random_ioctl,
 	.compat_ioctl = compat_ptr_ioctl,
@@ -1413,7 +1416,7 @@ const struct file_operations random_fops = {
 
 const struct file_operations urandom_fops = {
 	.read_iter = urandom_read_iter,
-	.write = random_write,
+	.write_iter = random_write_iter,
 	.unlocked_ioctl = random_ioctl,
 	.compat_ioctl = compat_ptr_ioctl,
 	.fasync = random_fasync,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v4 3/3] random: wire up fops->splice_{read,write}_iter()
  2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 1/3] random: convert to using fops->read_iter() Jason A. Donenfeld
  2022-05-20  9:44 ` [PATCH v4 2/3] random: convert to using fops->write_iter() Jason A. Donenfeld
@ 2022-05-20  9:44 ` Jason A. Donenfeld
  2022-05-20 12:16 ` [PATCH v4 0/3] random: convert to using iters, for Al Viro Jens Axboe
  2022-05-20 15:25 ` Jason A. Donenfeld
  4 siblings, 0 replies; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20  9:44 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro
  Cc: Jason A . Donenfeld

From: Jens Axboe <axboe@kernel.dk>

Now that random/urandom is using {read,write}_iter, we can wire it up to
using the generic splice handlers.

Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
[Jason: added the splice_write path. Note that sendfile() and such still
 does not work for read, though it does for write, because of the
 outdated file type restriction in splice_direct_to_actor(), which I'll
 address separately.]
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 831cafcd1034..15a9e5ea1b3f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1412,6 +1412,8 @@ const struct file_operations random_fops = {
 	.compat_ioctl = compat_ptr_ioctl,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
+	.splice_read = generic_file_splice_read,
+	.splice_write = iter_file_splice_write,
 };
 
 const struct file_operations urandom_fops = {
@@ -1421,6 +1423,8 @@ const struct file_operations urandom_fops = {
 	.compat_ioctl = compat_ptr_ioctl,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
+	.splice_read = generic_file_splice_read,
+	.splice_write = iter_file_splice_write,
 };
 
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
                   ` (2 preceding siblings ...)
  2022-05-20  9:44 ` [PATCH v4 3/3] random: wire up fops->splice_{read,write}_iter() Jason A. Donenfeld
@ 2022-05-20 12:16 ` Jens Axboe
  2022-05-20 15:25 ` Jason A. Donenfeld
  4 siblings, 0 replies; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 12:16 UTC (permalink / raw)
  To: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 3:44 AM, Jason A. Donenfeld wrote:
> Hi Al,
> 
> I've incorporated your suggestions into Jens' patches and simplified a
> lot of the control flow. Could you take a look at these and let me know
> if it looks sane? In particular, I'm using the property you mentioned in
> which copy_{to,from}_iter() can take a maximum and do less if the
> remaining length is too small.

FWIW, ran my (small) tests and this series looks and tests fine to me.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 1/3] random: convert to using fops->read_iter()
  2022-05-20  9:44 ` [PATCH v4 1/3] random: convert to using fops->read_iter() Jason A. Donenfeld
@ 2022-05-20 13:37   ` Jason A. Donenfeld
  2022-05-20 14:36     ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 13:37 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On Fri, May 20, 2022 at 11:44:57AM +0200, Jason A. Donenfeld wrote:
>  const struct file_operations urandom_fops = {
> -	.read = urandom_read,
> +	.read_iter = urandom_read_iter,

One thing I noticed is that drivers/char/mem.c has both the .read and
the .read_iter functions for /dev/zero and /dev/null and such. I wonder
if the .read ones can be removed?

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 1/3] random: convert to using fops->read_iter()
  2022-05-20 13:37   ` Jason A. Donenfeld
@ 2022-05-20 14:36     ` Jens Axboe
  2022-05-20 14:39       ` Jason A. Donenfeld
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 14:36 UTC (permalink / raw)
  To: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 7:37 AM, Jason A. Donenfeld wrote:
> On Fri, May 20, 2022 at 11:44:57AM +0200, Jason A. Donenfeld wrote:
>>  const struct file_operations urandom_fops = {
>> -	.read = urandom_read,
>> +	.read_iter = urandom_read_iter,
> 
> One thing I noticed is that drivers/char/mem.c has both the .read and
> the .read_iter functions for /dev/zero and /dev/null and such. I wonder
> if the .read ones can be removed?

I'm not sure if we have a clear "always use this if available" set of
rules for this. Ideally we'd want it to be:

1) Use ->read_iter() if available
2) If not, use ->read()

Might require a bit of auditing to ensure that's the case, and if we
can say that it is, then we could clean that up too.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 1/3] random: convert to using fops->read_iter()
  2022-05-20 14:36     ` Jens Axboe
@ 2022-05-20 14:39       ` Jason A. Donenfeld
  2022-05-20 15:12         ` Al Viro
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 14:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 08:36:17AM -0600, Jens Axboe wrote:
> On 5/20/22 7:37 AM, Jason A. Donenfeld wrote:
> > On Fri, May 20, 2022 at 11:44:57AM +0200, Jason A. Donenfeld wrote:
> >>  const struct file_operations urandom_fops = {
> >> -	.read = urandom_read,
> >> +	.read_iter = urandom_read_iter,
> > 
> > One thing I noticed is that drivers/char/mem.c has both the .read and
> > the .read_iter functions for /dev/zero and /dev/null and such. I wonder
> > if the .read ones can be removed?
> 
> I'm not sure if we have a clear "always use this if available" set of
> rules for this. Ideally we'd want it to be:
> 
> 1) Use ->read_iter() if available
> 2) If not, use ->read()
> 
> Might require a bit of auditing to ensure that's the case, and if we
> can say that it is, then we could clean that up too.

The only case I found where it wasn't in that order was:
https://lore.kernel.org/lkml/20220520135103.166972-1-Jason@zx2c4.com/

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 1/3] random: convert to using fops->read_iter()
  2022-05-20 14:39       ` Jason A. Donenfeld
@ 2022-05-20 15:12         ` Al Viro
  0 siblings, 0 replies; 28+ messages in thread
From: Al Viro @ 2022-05-20 15:12 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML

On Fri, May 20, 2022 at 04:39:41PM +0200, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 08:36:17AM -0600, Jens Axboe wrote:
> > On 5/20/22 7:37 AM, Jason A. Donenfeld wrote:
> > > On Fri, May 20, 2022 at 11:44:57AM +0200, Jason A. Donenfeld wrote:
> > >>  const struct file_operations urandom_fops = {
> > >> -	.read = urandom_read,
> > >> +	.read_iter = urandom_read_iter,
> > > 
> > > One thing I noticed is that drivers/char/mem.c has both the .read and
> > > the .read_iter functions for /dev/zero and /dev/null and such. I wonder
> > > if the .read ones can be removed?
> > 
> > I'm not sure if we have a clear "always use this if available" set of
> > rules for this. Ideally we'd want it to be:
> > 
> > 1) Use ->read_iter() if available
> > 2) If not, use ->read()
> > 
> > Might require a bit of auditing to ensure that's the case, and if we
> > can say that it is, then we could clean that up too.
> 
> The only case I found where it wasn't in that order was:
> https://lore.kernel.org/lkml/20220520135103.166972-1-Jason@zx2c4.com/

See reply to another mail.  Again, we might be able to retire infinibarf,
but /dev/snd/pcm* is somewhat more entrenched.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
                   ` (3 preceding siblings ...)
  2022-05-20 12:16 ` [PATCH v4 0/3] random: convert to using iters, for Al Viro Jens Axboe
@ 2022-05-20 15:25 ` Jason A. Donenfeld
  2022-05-20 15:34   ` Jens Axboe
  4 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:25 UTC (permalink / raw)
  To: Jens Axboe, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 11:44:56AM +0200, Jason A. Donenfeld wrote:
> Jens Axboe (3):
>   random: convert to using fops->read_iter()
>   random: convert to using fops->write_iter()
>   random: wire up fops->splice_{read,write}_iter()

FYI, this series makes reads from /dev/urandom slower, from around 616
MiB/s to 598 MiB/s on my system. That seems rather unfortunate.

Are we sure we really want to do this and need to do this?

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:25 ` Jason A. Donenfeld
@ 2022-05-20 15:34   ` Jens Axboe
  2022-05-20 15:39     ` Jason A. Donenfeld
                       ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 15:34 UTC (permalink / raw)
  To: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 9:25 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 11:44:56AM +0200, Jason A. Donenfeld wrote:
>> Jens Axboe (3):
>>   random: convert to using fops->read_iter()
>>   random: convert to using fops->write_iter()
>>   random: wire up fops->splice_{read,write}_iter()
> 
> FYI, this series makes reads from /dev/urandom slower, from around 616
> MiB/s to 598 MiB/s on my system. That seems rather unfortunate.

How reproducible is that? That seems like a huge difference for the
change. How big are the reads?

> Are we sure we really want to do this and need to do this?

I'm very sure, otherwise we're just accepting that we're breaking real
world applications. Alternatively, you can keep the ->read() and add the
->read_iter() as well, but I'm a bit skeptical at your initial results
there.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:34   ` Jens Axboe
@ 2022-05-20 15:39     ` Jason A. Donenfeld
  2022-05-20 15:44       ` Jens Axboe
  2022-05-20 15:46     ` Jason A. Donenfeld
  2022-05-20 15:47     ` Al Viro
  2 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
> On 5/20/22 9:25 AM, Jason A. Donenfeld wrote:
> > Hi Jens,
> > 
> > On Fri, May 20, 2022 at 11:44:56AM +0200, Jason A. Donenfeld wrote:
> >> Jens Axboe (3):
> >>   random: convert to using fops->read_iter()
> >>   random: convert to using fops->write_iter()
> >>   random: wire up fops->splice_{read,write}_iter()
> > 
> > FYI, this series makes reads from /dev/urandom slower, from around 616
> > MiB/s to 598 MiB/s on my system. That seems rather unfortunate.
> 
> How reproducible is that? That seems like a huge difference for the
> change. How big are the reads?

Fairly reproducible. Actually, if anything, it reproduces consistently
with worst results; I chose the most favorable ones for the new code.
This isn't any fancy `perf` profiling, but just running:

$ pv /dev/urandom > /dev/null

From looking at strace, the read size appears to be 131072.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:39     ` Jason A. Donenfeld
@ 2022-05-20 15:44       ` Jens Axboe
  2022-05-20 15:55         ` Jason A. Donenfeld
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 15:44 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 9:39 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
>> On 5/20/22 9:25 AM, Jason A. Donenfeld wrote:
>>> Hi Jens,
>>>
>>> On Fri, May 20, 2022 at 11:44:56AM +0200, Jason A. Donenfeld wrote:
>>>> Jens Axboe (3):
>>>>   random: convert to using fops->read_iter()
>>>>   random: convert to using fops->write_iter()
>>>>   random: wire up fops->splice_{read,write}_iter()
>>>
>>> FYI, this series makes reads from /dev/urandom slower, from around 616
>>> MiB/s to 598 MiB/s on my system. That seems rather unfortunate.
>>
>> How reproducible is that? That seems like a huge difference for the
>> change. How big are the reads?
> 
> Fairly reproducible. Actually, if anything, it reproduces consistently
> with worst results; I chose the most favorable ones for the new code.
> This isn't any fancy `perf` profiling, but just running:
> 
> $ pv /dev/urandom > /dev/null
> 
> From looking at strace, the read size appears to be 131072.

Ran 32, 1k, 4k here and it does seem to be down aboout 3%. Which is
definitely bigger than I expected, particularly for larger reads. If
anything, the 32b read seems comparably better than eg 1k or 4k, which
is also unexpected. Let me do a bit of profiling to see what is up.

If you're worried about it, I'd just keep the read/write and add the
iter variants on the side.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:34   ` Jens Axboe
  2022-05-20 15:39     ` Jason A. Donenfeld
@ 2022-05-20 15:46     ` Jason A. Donenfeld
  2022-05-20 15:51       ` Jens Axboe
  2022-05-20 15:47     ` Al Viro
  2 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:46 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
> On 5/20/22 9:25 AM, Jason A. Donenfeld wrote:
> > Are we sure we really want to do this and need to do this?
> 
> I'm very sure, otherwise we're just accepting that we're breaking real
> world applications.

Would we really? I always thought splice() and copy_file_range() and
sendfile() were all kind of "special" in that they mostly do not work
for many things, and so all userspaces need fallback code. And the state
of "mostly not working" has always just been the norm. So broken today,
working tomorrow, broken next week would be par for the course? I might
be *super* wrong here, so feel free to say so, but this has been my
general impression.

Anyway, I do like the idea of supporting splice() and sendfile(). The
performance hit is just kind of sad.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:34   ` Jens Axboe
  2022-05-20 15:39     ` Jason A. Donenfeld
  2022-05-20 15:46     ` Jason A. Donenfeld
@ 2022-05-20 15:47     ` Al Viro
  2022-05-20 15:53       ` Jens Axboe
  2 siblings, 1 reply; 28+ messages in thread
From: Al Viro @ 2022-05-20 15:47 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML

On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:

> I'm very sure, otherwise we're just accepting that we're breaking real
> world applications.

"Breaking" as in "it used to work with earlier kernels, doesn't work with
recent ones"?  Details, please...

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:46     ` Jason A. Donenfeld
@ 2022-05-20 15:51       ` Jens Axboe
  2022-05-20 15:58         ` Jason A. Donenfeld
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 15:51 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 9:46 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
>> On 5/20/22 9:25 AM, Jason A. Donenfeld wrote:
>>> Are we sure we really want to do this and need to do this?
>>
>> I'm very sure, otherwise we're just accepting that we're breaking real
>> world applications.
> 
> Would we really? I always thought splice() and copy_file_range() and
> sendfile() were all kind of "special" in that they mostly do not work
> for many things, and so all userspaces need fallback code. And the state
> of "mostly not working" has always just been the norm. So broken today,
> working tomorrow, broken next week would be par for the course? I might
> be *super* wrong here, so feel free to say so, but this has been my
> general impression.

No, I think that is exactly the wrong impression. If you have an
application that is written using eg splice from /dev/urandom, then it
should indeed be safe to expect that it will indeed continue working. If
we have one core tenet in the kernel it's that you should ALWAYS be able
to upgrade your kernel and not have any breakage in terms of userspace
ABI. Obviously that can happen sometimes, but I think this one is
exactly the poster child of breakage that should NOT happen. We took
away a feature that someone depended on.

This situation of splice being flakily availably is new from when it was
decided that it was OK to only have it be available if ->read_iter() and
->splice_read() is set. And that is very unfortunate.

> Anyway, I do like the idea of supporting splice() and sendfile(). The
> performance hit is just kind of sad.

I like the idea too, but this is deeper than that. We simply cannot just
break existing use cases like that.

Thankfully we do have an option here as I outlined in the previous email
- keep the ->read() and just add ->read_iter() on the side so that
splice still works. Is that ideal? No, it needs more code to support.
But hopefully that can die at some point when the performance gap is
such that we no longer need to worry about having ->read() for those
cases.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:47     ` Al Viro
@ 2022-05-20 15:53       ` Jens Axboe
  2022-05-20 16:15         ` Al Viro
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 15:53 UTC (permalink / raw)
  To: Al Viro; +Cc: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML

On 5/20/22 9:47 AM, Al Viro wrote:
> On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
> 
>> I'm very sure, otherwise we're just accepting that we're breaking real
>> world applications.
> 
> "Breaking" as in "it used to work with earlier kernels, doesn't work with
> recent ones"?  Details, please...

Yes, as in exactly that. This is what drove this addition of
->read_iter() for urandom. See commit:

ommit 36e2c7421f02a22f71c9283e55fdb672a9eb58e7
Author: Christoph Hellwig <hch@lst.de>
Date:   Thu Sep 3 16:22:34 2020 +0200

    fs: don't allow splice read/write without explicit ops

related to the set_fs() changes, and now go look for any commit that
has:

Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")

in it and see that this isn't an isolated incident at all.

tldr - splice from /dev/urandom used to work, and I recently got a
report internally on an application that broke on upgrade from 5.6 to
5.12 exactly because it now just just -EINVAL's instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:44       ` Jens Axboe
@ 2022-05-20 15:55         ` Jason A. Donenfeld
  2022-05-20 15:58           ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:55 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 09:44:25AM -0600, Jens Axboe wrote:
> Ran 32, 1k, 4k here and it does seem to be down aboout 3%. Which is
> definitely bigger than I expected, particularly for larger reads. If
> anything, the 32b read seems comparably better than eg 1k or 4k, which
> is also unexpected. Let me do a bit of profiling to see what is up.

Something to keep in mind wrt 32b is that for complicated crypto
reasons, the function has this logic:

- If len <= 32, generate one 64 byte block and give <= 32 bytes of it to
  the caller.

- If len > 32, generate one 64 byte block, but give 0 of it to the
  caller. Then generate ⌈len/64⌉ blocks for the caller.

Put together, this means:

- 1..32, 1 block
- 33..64, 2 blocks
- 65..128, 3 blocks
- 129..196, 4 blocks

So you get this sort of shelf where the amortization benefits don't
really kick in until after 3 blocks.

> If you're worried about it, I'd just keep the read/write and add the
> iter variants on the side.
 
Not a chance of that. These functions are already finicky as-is; I would
really hate to have to duplicate all of these paths.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:51       ` Jens Axboe
@ 2022-05-20 15:58         ` Jason A. Donenfeld
  2022-05-20 16:00           ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 15:58 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 09:51:06AM -0600, Jens Axboe wrote:
> ABI. Obviously that can happen sometimes, but I think this one is
> exactly the poster child of breakage that should NOT happen. We took
> away a feature that someone depended on.

I suppose somebody (Meta, I presume) *did* notice the breakage, which is
sign enough.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:55         ` Jason A. Donenfeld
@ 2022-05-20 15:58           ` Jens Axboe
  2022-05-20 16:03             ` Jason A. Donenfeld
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 15:58 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 9:55 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 09:44:25AM -0600, Jens Axboe wrote:
>> Ran 32, 1k, 4k here and it does seem to be down aboout 3%. Which is
>> definitely bigger than I expected, particularly for larger reads. If
>> anything, the 32b read seems comparably better than eg 1k or 4k, which
>> is also unexpected. Let me do a bit of profiling to see what is up.
> 
> Something to keep in mind wrt 32b is that for complicated crypto
> reasons, the function has this logic:
> 
> - If len <= 32, generate one 64 byte block and give <= 32 bytes of it to
>   the caller.
> 
> - If len > 32, generate one 64 byte block, but give 0 of it to the
>   caller. Then generate ?len/64? blocks for the caller.
> 
> Put together, this means:
> 
> - 1..32, 1 block
> - 33..64, 2 blocks
> - 65..128, 3 blocks
> - 129..196, 4 blocks
> 
> So you get this sort of shelf where the amortization benefits don't
> really kick in until after 3 blocks.

Ah I see, I can see if 64b is closer to the change for eg 1k.

>> If you're worried about it, I'd just keep the read/write and add the
>> iter variants on the side.
>  
> Not a chance of that. These functions are already finicky as-is; I would
> really hate to have to duplicate all of these paths.

Then I'd say there are only two options:

- Add a helper that provides splice for something that only has
  read/write set.

- Just accept that we're 3% slower reading from /dev/urandom for now,
  and maybe 1-2% for small reads. Can't really imagine this being a huge
  issue, how many high throughput /dev/urandom read situations exist in
  the real world?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:58         ` Jason A. Donenfeld
@ 2022-05-20 16:00           ` Jens Axboe
  0 siblings, 0 replies; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 16:00 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 9:58 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 09:51:06AM -0600, Jens Axboe wrote:
>> ABI. Obviously that can happen sometimes, but I think this one is
>> exactly the poster child of breakage that should NOT happen. We took
>> away a feature that someone depended on.
> 
> I suppose somebody (Meta, I presume) *did* notice the breakage, which is
> sign enough.

Indeed, this is how I found out. And then you have to wonder how often
this has happened elsewhere where nobody bothered to report it, or how
many are still waiting to happen because they are still on an older
kernel and just haven't upgraded yet. Or upgraded and rolled back
perhaps.

None of the latter really matters, it's eventual breakage there, and
very real breakage right now for the first case.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:58           ` Jens Axboe
@ 2022-05-20 16:03             ` Jason A. Donenfeld
  2022-05-20 16:06               ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 16:03 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

Hi Jens,

On Fri, May 20, 2022 at 09:58:28AM -0600, Jens Axboe wrote:
> On 5/20/22 9:55 AM, Jason A. Donenfeld wrote:
> > Hi Jens,
> > 
> > On Fri, May 20, 2022 at 09:44:25AM -0600, Jens Axboe wrote:
> >> Ran 32, 1k, 4k here and it does seem to be down aboout 3%. Which is
> >> definitely bigger than I expected, particularly for larger reads. If
> >> anything, the 32b read seems comparably better than eg 1k or 4k, which
> >> is also unexpected. Let me do a bit of profiling to see what is up.
> > 
> > Something to keep in mind wrt 32b is that for complicated crypto
> > reasons, the function has this logic:
> > 
> > - If len <= 32, generate one 64 byte block and give <= 32 bytes of it to
> >   the caller.
> > 
> > - If len > 32, generate one 64 byte block, but give 0 of it to the
> >   caller. Then generate ?len/64? blocks for the caller.
> > 
> > Put together, this means:
> > 
> > - 1..32, 1 block
> > - 33..64, 2 blocks
> > - 65..128, 3 blocks
> > - 129..196, 4 blocks
> > 
> > So you get this sort of shelf where the amortization benefits don't
> > really kick in until after 3 blocks.
> 
> Ah I see, I can see if 64b is closer to the change for eg 1k.

What I meant by providing all that detail is that from a cycles-per-byte
perspective, smaller=more expensive. So it's possible that the
difference in the patchset is less visible as it gets lost in the more
expensive operation.

> >> If you're worried about it, I'd just keep the read/write and add the
> >> iter variants on the side.
> >  
> > Not a chance of that. These functions are already finicky as-is; I would
> > really hate to have to duplicate all of these paths.
> 
> Then I'd say there are only two options:
> 
> - Add a helper that provides splice for something that only has
>   read/write set.

That'd be fine with me, but wouldn't it involve bringing back set_fs(),
because of the copy_to_user() in there?

> - Just accept that we're 3% slower reading from /dev/urandom for now,
>   and maybe 1-2% for small reads. Can't really imagine this being a huge
>   issue, how many high throughput /dev/urandom read situations exist in
>   the real world?

An option three might be that eventually the VFS overhead is worked out
and read_iter() reaches parity. One can hope, I guess.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 16:03             ` Jason A. Donenfeld
@ 2022-05-20 16:06               ` Jens Axboe
  0 siblings, 0 replies; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 16:06 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Theodore Ts'o, Christoph Hellwig, LKML, Al Viro

On 5/20/22 10:03 AM, Jason A. Donenfeld wrote:
>> Then I'd say there are only two options:
>>
>> - Add a helper that provides splice for something that only has
>>   read/write set.
> 
> That'd be fine with me, but wouldn't it involve bringing back set_fs(),
> because of the copy_to_user() in there?

I haven't even looked into whether it's currently feasible or not, just
mentioning it as a potential option. But the better one is definetely
the next one...

>> - Just accept that we're 3% slower reading from /dev/urandom for now,
>>   and maybe 1-2% for small reads. Can't really imagine this being a huge
>>   issue, how many high throughput /dev/urandom read situations exist in
>>   the real world?
> 
> An option three might be that eventually the VFS overhead is worked out
> and read_iter() reaches parity. One can hope, I guess.

And that will certainly happen, especially as we have other paths that
don't really have the choice, they have to use the iterator versions.
And if we can get a bit closer, then that also opens the door more
generic conversions so we can kill ->read/->write for almost all cases
(except those weirdo ones that Al pointed out).

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 15:53       ` Jens Axboe
@ 2022-05-20 16:15         ` Al Viro
  2022-05-20 16:24           ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Al Viro @ 2022-05-20 16:15 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML

On Fri, May 20, 2022 at 09:53:30AM -0600, Jens Axboe wrote:
> On 5/20/22 9:47 AM, Al Viro wrote:
> > On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
> > 
> >> I'm very sure, otherwise we're just accepting that we're breaking real
> >> world applications.
> > 
> > "Breaking" as in "it used to work with earlier kernels, doesn't work with
> > recent ones"?  Details, please...
> 
> Yes, as in exactly that. This is what drove this addition of
> ->read_iter() for urandom. See commit:
> 
> ommit 36e2c7421f02a22f71c9283e55fdb672a9eb58e7
> Author: Christoph Hellwig <hch@lst.de>
> Date:   Thu Sep 3 16:22:34 2020 +0200
> 
>     fs: don't allow splice read/write without explicit ops
> 
> related to the set_fs() changes, and now go look for any commit that
> has:
> 
> Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")
> 
> in it and see that this isn't an isolated incident at all.
> 
> tldr - splice from /dev/urandom used to work, and I recently got a
> report internally on an application that broke on upgrade from 5.6 to
> 5.12 exactly because it now just just -EINVAL's instead.

IIRC, Linus' position at the time had been along the lines of
"splice is not so good ABI anyway, so let's do it and fix up
the places that do get real-world complaints once such appear".
So /dev/urandom is one such place...

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 16:15         ` Al Viro
@ 2022-05-20 16:24           ` Jens Axboe
  2022-05-20 16:39             ` Jason A. Donenfeld
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 16:24 UTC (permalink / raw)
  To: Al Viro; +Cc: Jason A. Donenfeld, Theodore Ts'o, Christoph Hellwig, LKML

On 5/20/22 10:15 AM, Al Viro wrote:
> On Fri, May 20, 2022 at 09:53:30AM -0600, Jens Axboe wrote:
>> On 5/20/22 9:47 AM, Al Viro wrote:
>>> On Fri, May 20, 2022 at 09:34:46AM -0600, Jens Axboe wrote:
>>>
>>>> I'm very sure, otherwise we're just accepting that we're breaking real
>>>> world applications.
>>>
>>> "Breaking" as in "it used to work with earlier kernels, doesn't work with
>>> recent ones"?  Details, please...
>>
>> Yes, as in exactly that. This is what drove this addition of
>> ->read_iter() for urandom. See commit:
>>
>> ommit 36e2c7421f02a22f71c9283e55fdb672a9eb58e7
>> Author: Christoph Hellwig <hch@lst.de>
>> Date:   Thu Sep 3 16:22:34 2020 +0200
>>
>>     fs: don't allow splice read/write without explicit ops
>>
>> related to the set_fs() changes, and now go look for any commit that
>> has:
>>
>> Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")
>>
>> in it and see that this isn't an isolated incident at all.
>>
>> tldr - splice from /dev/urandom used to work, and I recently got a
>> report internally on an application that broke on upgrade from 5.6 to
>> 5.12 exactly because it now just just -EINVAL's instead.
> 
> IIRC, Linus' position at the time had been along the lines of
> "splice is not so good ABI anyway, so let's do it and fix up
> the places that do get real-world complaints once such appear".
> So /dev/urandom is one such place...

That's what Christoph said too. Honestly that's a very odd way to
attempt to justify breakage like this, even if it is tempting to
facilitate the set_fs() removal. But then be honest about it and say
it like it is, rather than some hand wavy explanation that frankly
doesn't make any sense.

The referenced change doesn't change the splice ABI at all, hence the
justification seems very random to me. It kept what we already have,
except we randomly break some use cases.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 16:24           ` Jens Axboe
@ 2022-05-20 16:39             ` Jason A. Donenfeld
  2022-05-20 16:41               ` Jens Axboe
  0 siblings, 1 reply; 28+ messages in thread
From: Jason A. Donenfeld @ 2022-05-20 16:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Al Viro, Theodore Ts'o, Christoph Hellwig, LKML

Hi Jens,

On Fri, May 20, 2022 at 10:24:36AM -0600, Jens Axboe wrote:
> On 5/20/22 10:15 AM, Al Viro wrote:
> > IIRC, Linus' position at the time had been along the lines of
> > "splice is not so good ABI anyway, so let's do it and fix up
> > the places that do get real-world complaints once such appear".
> > So /dev/urandom is one such place...
> 
> That's what Christoph said too. Honestly that's a very odd way to
> attempt to justify breakage like this, even if it is tempting to
> facilitate the set_fs() removal. But then be honest about it and say
> it like it is, rather than some hand wavy explanation that frankly
> doesn't make any sense.
> 
> The referenced change doesn't change the splice ABI at all, hence the
> justification seems very random to me. It kept what we already have,
> except we randomly break some use cases.
 
It looks like Al is right in the sense that Linus must certainly be
aware of the breakage. He fixed tty in 9bb48c82aced ("tty: implement
write_iter").

Anyway, it seems like the iter functions are the way forward, so this v4
is queued up now (with a few minor cosmetic changes) and pushed to:
https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git/log/
I'll send an early 5.19 pull for everything either tonight or Sunday.
And then next week I'll start on backports. (Though, 5.12 is a weird
kernel version; I assume this is some Meta kernel that has its own
backport team?)

Meanwhile, hopefully Al can pick up the splice.c sendfile(2) chardev
patch for 5.19, so at least there'll be some silver lining to the
performance hit.

Jason

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 16:39             ` Jason A. Donenfeld
@ 2022-05-20 16:41               ` Jens Axboe
  2022-05-24  4:52                 ` Eric W. Biederman
  0 siblings, 1 reply; 28+ messages in thread
From: Jens Axboe @ 2022-05-20 16:41 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: Al Viro, Theodore Ts'o, Christoph Hellwig, LKML

On 5/20/22 10:39 AM, Jason A. Donenfeld wrote:
> Hi Jens,
> 
> On Fri, May 20, 2022 at 10:24:36AM -0600, Jens Axboe wrote:
>> On 5/20/22 10:15 AM, Al Viro wrote:
>>> IIRC, Linus' position at the time had been along the lines of
>>> "splice is not so good ABI anyway, so let's do it and fix up
>>> the places that do get real-world complaints once such appear".
>>> So /dev/urandom is one such place...
>>
>> That's what Christoph said too. Honestly that's a very odd way to
>> attempt to justify breakage like this, even if it is tempting to
>> facilitate the set_fs() removal. But then be honest about it and say
>> it like it is, rather than some hand wavy explanation that frankly
>> doesn't make any sense.
>>
>> The referenced change doesn't change the splice ABI at all, hence the
>> justification seems very random to me. It kept what we already have,
>> except we randomly break some use cases.
>  
> It looks like Al is right in the sense that Linus must certainly be
> aware of the breakage. He fixed tty in 9bb48c82aced ("tty: implement
> write_iter").

I don't think anyone is disputing that, but I also know that Linus wants
these fixed up as they are discovered. And I agree with him on that,
even if I disagree on the process to get there as it introduced
frivolous breakage...

> Anyway, it seems like the iter functions are the way forward, so this v4
> is queued up now (with a few minor cosmetic changes) and pushed to:
> https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git/log/
> I'll send an early 5.19 pull for everything either tonight or Sunday.
> And then next week I'll start on backports. (Though, 5.12 is a weird
> kernel version; I assume this is some Meta kernel that has its own
> backport team?)

Thanks!

> Meanwhile, hopefully Al can pick up the splice.c sendfile(2) chardev
> patch for 5.19, so at least there'll be some silver lining to the
> performance hit.

Let's hope so.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v4 0/3] random: convert to using iters, for Al Viro
  2022-05-20 16:41               ` Jens Axboe
@ 2022-05-24  4:52                 ` Eric W. Biederman
  0 siblings, 0 replies; 28+ messages in thread
From: Eric W. Biederman @ 2022-05-24  4:52 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Jason A. Donenfeld, Al Viro, Theodore Ts'o, Christoph Hellwig, LKML

Jens Axboe <axboe@kernel.dk> writes:

> On 5/20/22 10:39 AM, Jason A. Donenfeld wrote:
>> Hi Jens,
>> 
>> On Fri, May 20, 2022 at 10:24:36AM -0600, Jens Axboe wrote:
>>> On 5/20/22 10:15 AM, Al Viro wrote:
>>>> IIRC, Linus' position at the time had been along the lines of
>>>> "splice is not so good ABI anyway, so let's do it and fix up
>>>> the places that do get real-world complaints once such appear".
>>>> So /dev/urandom is one such place...
>>>
>>> That's what Christoph said too. Honestly that's a very odd way to
>>> attempt to justify breakage like this, even if it is tempting to
>>> facilitate the set_fs() removal. But then be honest about it and say
>>> it like it is, rather than some hand wavy explanation that frankly
>>> doesn't make any sense.
>>>
>>> The referenced change doesn't change the splice ABI at all, hence the
>>> justification seems very random to me. It kept what we already have,
>>> except we randomly break some use cases.
>>  
>> It looks like Al is right in the sense that Linus must certainly be
>> aware of the breakage. He fixed tty in 9bb48c82aced ("tty: implement
>> write_iter").
>
> I don't think anyone is disputing that, but I also know that Linus wants
> these fixed up as they are discovered. And I agree with him on that,
> even if I disagree on the process to get there as it introduced
> frivolous breakage...

I believe the hypothesis at the time was that on many of these
interfaces splice is not used on them so it did matter.

With everything being fixed in the places the hypothesis turned out to
be wrong.

The rule is no regressions, not bug comparability forever.  This allows
things like removing a.out binary support, and many other things that
are just dead code these days.

Sometimes the only way to discover what has users is to remove support
and wait a while and see if anyone complains.  Sometimes doing that
is worth it, other times it is not.

My general sense is that there have been few enough reports that users
who care about splice support have been few and far between.  Which
suggests the hypothesis was not an unreasonable one when Linus made it.

The truly unfortunate part is that no one knew enough about these users
to be able to step up and say that they care about splice on those
interfaces at the start of this process.

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2022-05-24  4:53 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-20  9:44 [PATCH v4 0/3] random: convert to using iters, for Al Viro Jason A. Donenfeld
2022-05-20  9:44 ` [PATCH v4 1/3] random: convert to using fops->read_iter() Jason A. Donenfeld
2022-05-20 13:37   ` Jason A. Donenfeld
2022-05-20 14:36     ` Jens Axboe
2022-05-20 14:39       ` Jason A. Donenfeld
2022-05-20 15:12         ` Al Viro
2022-05-20  9:44 ` [PATCH v4 2/3] random: convert to using fops->write_iter() Jason A. Donenfeld
2022-05-20  9:44 ` [PATCH v4 3/3] random: wire up fops->splice_{read,write}_iter() Jason A. Donenfeld
2022-05-20 12:16 ` [PATCH v4 0/3] random: convert to using iters, for Al Viro Jens Axboe
2022-05-20 15:25 ` Jason A. Donenfeld
2022-05-20 15:34   ` Jens Axboe
2022-05-20 15:39     ` Jason A. Donenfeld
2022-05-20 15:44       ` Jens Axboe
2022-05-20 15:55         ` Jason A. Donenfeld
2022-05-20 15:58           ` Jens Axboe
2022-05-20 16:03             ` Jason A. Donenfeld
2022-05-20 16:06               ` Jens Axboe
2022-05-20 15:46     ` Jason A. Donenfeld
2022-05-20 15:51       ` Jens Axboe
2022-05-20 15:58         ` Jason A. Donenfeld
2022-05-20 16:00           ` Jens Axboe
2022-05-20 15:47     ` Al Viro
2022-05-20 15:53       ` Jens Axboe
2022-05-20 16:15         ` Al Viro
2022-05-20 16:24           ` Jens Axboe
2022-05-20 16:39             ` Jason A. Donenfeld
2022-05-20 16:41               ` Jens Axboe
2022-05-24  4:52                 ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.