IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] io_uring: fix iovec leaks
@ 2020-02-07 19:04 Pavel Begunkov
  2020-02-07 19:09 ` Pavel Begunkov
  2020-02-11 10:07 ` David Laight
  0 siblings, 2 replies; 7+ messages in thread
From: Pavel Begunkov @ 2020-02-07 19:04 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Allocated iovec is freed only in io_{read,write,send,recv)(), and just
leaves it if an error occured. There are plenty of such cases:
- cancellation of non-head requests
- fail grabbing files in __io_queue_sqe()
- set REQ_F_NOWAIT and returning in __io_queue_sqe()
- etc.

Add REQ_F_NEED_CLEANUP, which will force such requests with custom
allocated resourses go through cleanup handlers on put.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1914351ebd5e..d699695ef809 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -478,6 +478,7 @@ enum {
 	REQ_F_MUST_PUNT_BIT,
 	REQ_F_TIMEOUT_NOSEQ_BIT,
 	REQ_F_COMP_LOCKED_BIT,
+	REQ_F_NEED_CLEANUP_BIT,
 };
 
 enum {
@@ -516,6 +517,8 @@ enum {
 	REQ_F_TIMEOUT_NOSEQ	= BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
 	/* completion under lock */
 	REQ_F_COMP_LOCKED	= BIT(REQ_F_COMP_LOCKED_BIT),
+	/* needs cleanup */
+	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
 
 };
 
@@ -749,6 +752,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				 unsigned nr_args);
 static int io_grab_files(struct io_kiocb *req);
 static void io_ring_file_ref_flush(struct fixed_file_data *data);
+static void io_cleanup_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
 
@@ -1236,6 +1240,9 @@ static void __io_free_req(struct io_kiocb *req)
 {
 	__io_req_aux_free(req);
 
+	if (req->flags & REQ_F_NEED_CLEANUP)
+		io_cleanup_req(req);
+
 	if (req->flags & REQ_F_INFLIGHT) {
 		struct io_ring_ctx *ctx = req->ctx;
 		unsigned long flags;
@@ -2129,6 +2136,8 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
 		req->io->rw.iov = req->io->rw.fast_iov;
 		memcpy(req->io->rw.iov, fast_iov,
 			sizeof(struct iovec) * iter->nr_segs);
+	} else {
+		req->flags |= REQ_F_NEED_CLEANUP;
 	}
 }
 
@@ -2239,6 +2248,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
 	}
 out_free:
 	kfree(iovec);
+	req->flags &= ~REQ_F_NEED_CLEANUP;
 	return ret;
 }
 
@@ -2343,6 +2353,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
 		}
 	}
 out_free:
+	req->flags &= ~REQ_F_NEED_CLEANUP;
 	kfree(iovec);
 	return ret;
 }
@@ -2943,6 +2954,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 #if defined(CONFIG_NET)
 	struct io_sr_msg *sr = &req->sr_msg;
 	struct io_async_ctx *io = req->io;
+	int ret;
 
 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -2952,8 +2964,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return 0;
 
 	io->msg.iov = io->msg.fast_iov;
-	return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+	ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
 					&io->msg.iov);
+	if (!ret)
+		req->flags |= REQ_F_NEED_CLEANUP;
+	return ret;
 #else
 	return -EOPNOTSUPP;
 #endif
@@ -3011,6 +3026,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 					kfree(kmsg->iov);
 				return -ENOMEM;
 			}
+			req->flags |= REQ_F_NEED_CLEANUP;
 			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
 			return -EAGAIN;
 		}
@@ -3020,6 +3036,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 
 	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
+	req->flags &= ~REQ_F_NEED_CLEANUP;
 	io_cqring_add_event(req, ret);
 	if (ret < 0)
 		req_set_fail_links(req);
@@ -3087,6 +3104,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
 #if defined(CONFIG_NET)
 	struct io_sr_msg *sr = &req->sr_msg;
 	struct io_async_ctx *io = req->io;
+	int ret;
 
 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -3096,8 +3114,11 @@ static int io_recvmsg_prep(struct io_kiocb *req,
 		return 0;
 
 	io->msg.iov = io->msg.fast_iov;
-	return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+	ret = recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
 					&io->msg.uaddr, &io->msg.iov);
+	if (!ret)
+		req->flags |= REQ_F_NEED_CLEANUP;
+	return ret;
 #else
 	return -EOPNOTSUPP;
 #endif
@@ -3158,6 +3179,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 				return -ENOMEM;
 			}
 			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
+			req->flags |= REQ_F_NEED_CLEANUP;
 			return -EAGAIN;
 		}
 		if (ret == -ERESTARTSYS)
@@ -3166,6 +3188,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 
 	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
+	req->flags &= ~REQ_F_NEED_CLEANUP;
 	io_cqring_add_event(req, ret);
 	if (ret < 0)
 		req_set_fail_links(req);
@@ -4176,6 +4199,30 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return -EIOCBQUEUED;
 }
 
+static void io_cleanup_req(struct io_kiocb *req)
+{
+	struct io_async_ctx *io = req->io;
+
+	switch (req->opcode) {
+	case IORING_OP_READV:
+	case IORING_OP_READ_FIXED:
+	case IORING_OP_READ:
+	case IORING_OP_WRITEV:
+	case IORING_OP_WRITE_FIXED:
+	case IORING_OP_WRITE:
+		if (io->rw.iov != io->rw.fast_iov)
+			kfree(io->rw.iov);
+		break;
+	case IORING_OP_SENDMSG:
+	case IORING_OP_RECVMSG:
+		if (io->msg.iov != io->msg.fast_iov)
+			kfree(io->msg.iov);
+		break;
+	}
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+}
+
 static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			struct io_kiocb **nxt, bool force_nonblock)
 {
-- 
2.24.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: fix iovec leaks
  2020-02-07 19:04 [PATCH] io_uring: fix iovec leaks Pavel Begunkov
@ 2020-02-07 19:09 ` Pavel Begunkov
  2020-02-07 20:40   ` Jens Axboe
  2020-02-11 10:07 ` David Laight
  1 sibling, 1 reply; 7+ messages in thread
From: Pavel Begunkov @ 2020-02-07 19:09 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

[-- Attachment #1.1: Type: text/plain, Size: 6259 bytes --]

On 07/02/2020 22:04, Pavel Begunkov wrote:
> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
> leaves it if an error occured. There are plenty of such cases:
> - cancellation of non-head requests
> - fail grabbing files in __io_queue_sqe()
> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
> - etc.
> 
> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
> allocated resourses go through cleanup handlers on put.

This is probably desirable in stable-5.5, so I tried to not change much.
I'll hide common parts in following patches for-5.6/next.

> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 1914351ebd5e..d699695ef809 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -478,6 +478,7 @@ enum {
>  	REQ_F_MUST_PUNT_BIT,
>  	REQ_F_TIMEOUT_NOSEQ_BIT,
>  	REQ_F_COMP_LOCKED_BIT,
> +	REQ_F_NEED_CLEANUP_BIT,
>  };
>  
>  enum {
> @@ -516,6 +517,8 @@ enum {
>  	REQ_F_TIMEOUT_NOSEQ	= BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
>  	/* completion under lock */
>  	REQ_F_COMP_LOCKED	= BIT(REQ_F_COMP_LOCKED_BIT),
> +	/* needs cleanup */
> +	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
>  
>  };
>  
> @@ -749,6 +752,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
>  				 unsigned nr_args);
>  static int io_grab_files(struct io_kiocb *req);
>  static void io_ring_file_ref_flush(struct fixed_file_data *data);
> +static void io_cleanup_req(struct io_kiocb *req);
>  
>  static struct kmem_cache *req_cachep;
>  
> @@ -1236,6 +1240,9 @@ static void __io_free_req(struct io_kiocb *req)
>  {
>  	__io_req_aux_free(req);
>  
> +	if (req->flags & REQ_F_NEED_CLEANUP)
> +		io_cleanup_req(req);
> +
>  	if (req->flags & REQ_F_INFLIGHT) {
>  		struct io_ring_ctx *ctx = req->ctx;
>  		unsigned long flags;
> @@ -2129,6 +2136,8 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
>  		req->io->rw.iov = req->io->rw.fast_iov;
>  		memcpy(req->io->rw.iov, fast_iov,
>  			sizeof(struct iovec) * iter->nr_segs);
> +	} else {
> +		req->flags |= REQ_F_NEED_CLEANUP;
>  	}
>  }
>  
> @@ -2239,6 +2248,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
>  	}
>  out_free:
>  	kfree(iovec);
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
>  	return ret;
>  }
>  
> @@ -2343,6 +2353,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
>  		}
>  	}
>  out_free:
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
>  	kfree(iovec);
>  	return ret;
>  }
> @@ -2943,6 +2954,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  #if defined(CONFIG_NET)
>  	struct io_sr_msg *sr = &req->sr_msg;
>  	struct io_async_ctx *io = req->io;
> +	int ret;
>  
>  	sr->msg_flags = READ_ONCE(sqe->msg_flags);
>  	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
> @@ -2952,8 +2964,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  		return 0;
>  
>  	io->msg.iov = io->msg.fast_iov;
> -	return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
> +	ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
>  					&io->msg.iov);
> +	if (!ret)
> +		req->flags |= REQ_F_NEED_CLEANUP;
> +	return ret;
>  #else
>  	return -EOPNOTSUPP;
>  #endif
> @@ -3011,6 +3026,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  					kfree(kmsg->iov);
>  				return -ENOMEM;
>  			}
> +			req->flags |= REQ_F_NEED_CLEANUP;
>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
>  			return -EAGAIN;
>  		}
> @@ -3020,6 +3036,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  
>  	if (kmsg && kmsg->iov != kmsg->fast_iov)
>  		kfree(kmsg->iov);
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
>  	io_cqring_add_event(req, ret);
>  	if (ret < 0)
>  		req_set_fail_links(req);
> @@ -3087,6 +3104,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
>  #if defined(CONFIG_NET)
>  	struct io_sr_msg *sr = &req->sr_msg;
>  	struct io_async_ctx *io = req->io;
> +	int ret;
>  
>  	sr->msg_flags = READ_ONCE(sqe->msg_flags);
>  	sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
> @@ -3096,8 +3114,11 @@ static int io_recvmsg_prep(struct io_kiocb *req,
>  		return 0;
>  
>  	io->msg.iov = io->msg.fast_iov;
> -	return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
> +	ret = recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
>  					&io->msg.uaddr, &io->msg.iov);
> +	if (!ret)
> +		req->flags |= REQ_F_NEED_CLEANUP;
> +	return ret;
>  #else
>  	return -EOPNOTSUPP;
>  #endif
> @@ -3158,6 +3179,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  				return -ENOMEM;
>  			}
>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
> +			req->flags |= REQ_F_NEED_CLEANUP;
>  			return -EAGAIN;
>  		}
>  		if (ret == -ERESTARTSYS)
> @@ -3166,6 +3188,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  
>  	if (kmsg && kmsg->iov != kmsg->fast_iov)
>  		kfree(kmsg->iov);
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
>  	io_cqring_add_event(req, ret);
>  	if (ret < 0)
>  		req_set_fail_links(req);
> @@ -4176,6 +4199,30 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  	return -EIOCBQUEUED;
>  }
>  
> +static void io_cleanup_req(struct io_kiocb *req)
> +{
> +	struct io_async_ctx *io = req->io;
> +
> +	switch (req->opcode) {
> +	case IORING_OP_READV:
> +	case IORING_OP_READ_FIXED:
> +	case IORING_OP_READ:
> +	case IORING_OP_WRITEV:
> +	case IORING_OP_WRITE_FIXED:
> +	case IORING_OP_WRITE:
> +		if (io->rw.iov != io->rw.fast_iov)
> +			kfree(io->rw.iov);
> +		break;
> +	case IORING_OP_SENDMSG:
> +	case IORING_OP_RECVMSG:
> +		if (io->msg.iov != io->msg.fast_iov)
> +			kfree(io->msg.iov);
> +		break;
> +	}
> +
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
> +}
> +
>  static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  			struct io_kiocb **nxt, bool force_nonblock)
>  {
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: fix iovec leaks
  2020-02-07 19:09 ` Pavel Begunkov
@ 2020-02-07 20:40   ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-02-07 20:40 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring, linux-kernel

On 2/7/20 12:09 PM, Pavel Begunkov wrote:
> On 07/02/2020 22:04, Pavel Begunkov wrote:
>> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
>> leaves it if an error occured. There are plenty of such cases:
>> - cancellation of non-head requests
>> - fail grabbing files in __io_queue_sqe()
>> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
>> - etc.
>>
>> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
>> allocated resourses go through cleanup handlers on put.
> 
> This is probably desirable in stable-5.5, so I tried to not change much.
> I'll hide common parts in following patches for-5.6/next.

I appreciate that, it's worth keeping in mind for stable bound patches
for sure. Thanks, applied.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] io_uring: fix iovec leaks
  2020-02-07 19:04 [PATCH] io_uring: fix iovec leaks Pavel Begunkov
  2020-02-07 19:09 ` Pavel Begunkov
@ 2020-02-11 10:07 ` David Laight
  2020-02-11 11:05   ` Pavel Begunkov
  1 sibling, 1 reply; 7+ messages in thread
From: David Laight @ 2020-02-11 10:07 UTC (permalink / raw)
  To: 'Pavel Begunkov', Jens Axboe, io-uring, linux-kernel

From: Pavel Begunkov
> Sent: 07 February 2020 19:05
> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
> leaves it if an error occured. There are plenty of such cases:
> - cancellation of non-head requests
> - fail grabbing files in __io_queue_sqe()
> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
> - etc.
> 
> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
> allocated resourses go through cleanup handlers on put.

This looks horribly fragile.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: fix iovec leaks
  2020-02-11 10:07 ` David Laight
@ 2020-02-11 11:05   ` Pavel Begunkov
  2020-02-11 11:16     ` David Laight
  0 siblings, 1 reply; 7+ messages in thread
From: Pavel Begunkov @ 2020-02-11 11:05 UTC (permalink / raw)
  To: David Laight, Jens Axboe, io-uring, linux-kernel

On 2/11/2020 1:07 PM, David Laight wrote:
> From: Pavel Begunkov
>> Sent: 07 February 2020 19:05
>> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
>> leaves it if an error occured. There are plenty of such cases:
>> - cancellation of non-head requests
>> - fail grabbing files in __io_queue_sqe()
>> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
>> - etc.
>>
>> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
>> allocated resourses go through cleanup handlers on put.
> 
> This looks horribly fragile.

Well, not as horrible as it may appear -- set the flag, whenever you
want the corresponding destructor to be called, and clear it when is not
needed anymore.

I'd love to have something better, maybe even something more intrusive
for-next, but that shouldn't hurt the hot path. Any ideas?

> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] io_uring: fix iovec leaks
  2020-02-11 11:05   ` Pavel Begunkov
@ 2020-02-11 11:16     ` David Laight
  2020-02-11 11:43       ` Pavel Begunkov
  0 siblings, 1 reply; 7+ messages in thread
From: David Laight @ 2020-02-11 11:16 UTC (permalink / raw)
  To: 'Pavel Begunkov', Jens Axboe, io-uring, linux-kernel

From: Pavel Begunkov
> Sent: 11 February 2020 11:05
> On 2/11/2020 1:07 PM, David Laight wrote:
> > From: Pavel Begunkov
> >> Sent: 07 February 2020 19:05
> >> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
> >> leaves it if an error occured. There are plenty of such cases:
> >> - cancellation of non-head requests
> >> - fail grabbing files in __io_queue_sqe()
> >> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
> >> - etc.
> >>
> >> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
> >> allocated resourses go through cleanup handlers on put.
> >
> > This looks horribly fragile.
> 
> Well, not as horrible as it may appear -- set the flag, whenever you
> want the corresponding destructor to be called, and clear it when is not
> needed anymore.
> 
> I'd love to have something better, maybe even something more intrusive
> for-next, but that shouldn't hurt the hot path. Any ideas?

Given all the 'cud chewing' that happens in code paths
like the one that read iov from userspace just adding:

	if (unlikely(foo->ptr))
		kfree(foo->ptr);

before 'foo' goes out of scope (or is reused) is probably
not measurable.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] io_uring: fix iovec leaks
  2020-02-11 11:16     ` David Laight
@ 2020-02-11 11:43       ` Pavel Begunkov
  0 siblings, 0 replies; 7+ messages in thread
From: Pavel Begunkov @ 2020-02-11 11:43 UTC (permalink / raw)
  To: David Laight, Jens Axboe, io-uring, linux-kernel

On 2/11/2020 2:16 PM, David Laight wrote:
> From: Pavel Begunkov
>> Sent: 11 February 2020 11:05
>> On 2/11/2020 1:07 PM, David Laight wrote:
>>> From: Pavel Begunkov
>>>> Sent: 07 February 2020 19:05
>>>> Allocated iovec is freed only in io_{read,write,send,recv)(), and just
>>>> leaves it if an error occured. There are plenty of such cases:
>>>> - cancellation of non-head requests
>>>> - fail grabbing files in __io_queue_sqe()
>>>> - set REQ_F_NOWAIT and returning in __io_queue_sqe()
>>>> - etc.
>>>>
>>>> Add REQ_F_NEED_CLEANUP, which will force such requests with custom
>>>> allocated resourses go through cleanup handlers on put.
>>>
>>> This looks horribly fragile.
>>
>> Well, not as horrible as it may appear -- set the flag, whenever you
>> want the corresponding destructor to be called, and clear it when is not
>> needed anymore.
>>
>> I'd love to have something better, maybe even something more intrusive
>> for-next, but that shouldn't hurt the hot path. Any ideas?
> 
> Given all the 'cud chewing' that happens in code paths
> like the one that read iov from userspace just adding:
> 
> 	if (unlikely(foo->ptr))
> 		kfree(foo->ptr);
> 
> before 'foo' goes out of scope (or is reused) is probably
> not measurable.

There are a bunch of problems with it:

1. "out of scope" may end up in the generic code, but not opcode
handler, so the deallocation should be in the generic path, otherwise
it'll leak.

2. @iovec is an opcode-specific thing, so you would need to call a
proper destructor. And that's an indirect call or a switch (as in the
cleanup()) in the hot path.

2. we may need several such resources and/or other resource types (e.g.
struct file, which is needed for splice(2).

4. such fields are not initialised until custom opcode handler came to
the scene. And I'm not sure zeroing will solve all cases and won't hurt
performance. Workarounds with something like REQ_F_INITIALISED are not
much better.

That's why I think it's good enough for an immediate fix, it solves the
issue and is easy to be backported. It'd be great to look for a more
gracious approach, but that's most probably for 5.7

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-07 19:04 [PATCH] io_uring: fix iovec leaks Pavel Begunkov
2020-02-07 19:09 ` Pavel Begunkov
2020-02-07 20:40   ` Jens Axboe
2020-02-11 10:07 ` David Laight
2020-02-11 11:05   ` Pavel Begunkov
2020-02-11 11:16     ` David Laight
2020-02-11 11:43       ` Pavel Begunkov

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git