linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] io_uring: fix deferred req iovec leak
@ 2020-02-06 16:51 Pavel Begunkov
  2020-02-06 17:04 ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 16:51 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

After defer, a request will be prepared, that includes allocating iovec
if needed, and then submitted through io_wq_submit_work() but not custom
handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
iovec, as it's in io-wq and the code goes as follows:

io_read() {
	if (!io_wq_current_is_worker())
		kfree(iovec);
}

Put all deallocation logic in io_{read,write,send,recv}(), which will
leave the memory, if going async with -EAGAIN.

It also fixes a leak after failed io_alloc_async_ctx() in
io_{recv,send}_msg().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 47 ++++++++++++-----------------------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bff7a03e873f..ce3dbd2b1b5c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2143,17 +2143,6 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
 	return req->io == NULL;
 }
 
-static void io_rw_async(struct io_wq_work **workptr)
-{
-	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-	struct iovec *iov = NULL;
-
-	if (req->io->rw.iov != req->io->rw.fast_iov)
-		iov = req->io->rw.iov;
-	io_wq_submit_work(workptr);
-	kfree(iov);
-}
-
 static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 			     struct iovec *iovec, struct iovec *fast_iov,
 			     struct iov_iter *iter)
@@ -2166,7 +2155,6 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 
 		io_req_map_rw(req, io_size, iovec, fast_iov, iter);
 	}
-	req->work.func = io_rw_async;
 	return 0;
 }
 
@@ -2253,8 +2241,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
 		}
 	}
 out_free:
-	if (!io_wq_current_is_worker())
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
@@ -2359,8 +2346,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
 		}
 	}
 out_free:
-	if (!io_wq_current_is_worker())
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
@@ -2955,19 +2941,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
 	return 0;
 }
 
-#if defined(CONFIG_NET)
-static void io_sendrecv_async(struct io_wq_work **workptr)
-{
-	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-	struct iovec *iov = NULL;
-
-	if (req->io->rw.iov != req->io->rw.fast_iov)
-		iov = req->io->msg.iov;
-	io_wq_submit_work(workptr);
-	kfree(iov);
-}
-#endif
-
 static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_NET)
@@ -3036,17 +3009,19 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 		if (force_nonblock && ret == -EAGAIN) {
 			if (req->io)
 				return -EAGAIN;
-			if (io_alloc_async_ctx(req))
+			if (io_alloc_async_ctx(req)) {
+				if (kmsg && kmsg->iov != kmsg->fast_iov)
+					kfree(kmsg->iov);
 				return -ENOMEM;
+			}
 			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
-			req->work.func = io_sendrecv_async;
 			return -EAGAIN;
 		}
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
 	}
 
-	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
 	io_cqring_add_event(req, ret);
 	if (ret < 0)
@@ -3180,17 +3155,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 		if (force_nonblock && ret == -EAGAIN) {
 			if (req->io)
 				return -EAGAIN;
-			if (io_alloc_async_ctx(req))
+			if (io_alloc_async_ctx(req)) {
+				if (kmsg && kmsg->iov != kmsg->fast_iov)
+					kfree(kmsg->iov);
 				return -ENOMEM;
+			}
 			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
-			req->work.func = io_sendrecv_async;
 			return -EAGAIN;
 		}
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
 	}
 
-	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
 	io_cqring_add_event(req, ret);
 	if (ret < 0)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 16:51 [PATCH] io_uring: fix deferred req iovec leak Pavel Begunkov
@ 2020-02-06 17:04 ` Pavel Begunkov
  2020-02-06 17:16   ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 17:04 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 4485 bytes --]

On 06/02/2020 19:51, Pavel Begunkov wrote:
> After defer, a request will be prepared, that includes allocating iovec
> if needed, and then submitted through io_wq_submit_work() but not custom
> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
> iovec, as it's in io-wq and the code goes as follows:
> 
> io_read() {
> 	if (!io_wq_current_is_worker())
> 		kfree(iovec);
> }
> 
> Put all deallocation logic in io_{read,write,send,recv}(), which will
> leave the memory, if going async with -EAGAIN.
> 
Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
Apparently, I need to do v2.

> It also fixes a leak after failed io_alloc_async_ctx() in
> io_{recv,send}_msg().
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 47 ++++++++++++-----------------------------------
>  1 file changed, 12 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index bff7a03e873f..ce3dbd2b1b5c 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2143,17 +2143,6 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
>  	return req->io == NULL;
>  }
>  
> -static void io_rw_async(struct io_wq_work **workptr)
> -{
> -	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> -	struct iovec *iov = NULL;
> -
> -	if (req->io->rw.iov != req->io->rw.fast_iov)
> -		iov = req->io->rw.iov;
> -	io_wq_submit_work(workptr);
> -	kfree(iov);
> -}
> -
>  static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
>  			     struct iovec *iovec, struct iovec *fast_iov,
>  			     struct iov_iter *iter)
> @@ -2166,7 +2155,6 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
>  
>  		io_req_map_rw(req, io_size, iovec, fast_iov, iter);
>  	}
> -	req->work.func = io_rw_async;
>  	return 0;
>  }
>  
> @@ -2253,8 +2241,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
>  		}
>  	}
>  out_free:
> -	if (!io_wq_current_is_worker())
> -		kfree(iovec);
> +	kfree(iovec);
>  	return ret;
>  }
>  
> @@ -2359,8 +2346,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
>  		}
>  	}
>  out_free:
> -	if (!io_wq_current_is_worker())
> -		kfree(iovec);
> +	kfree(iovec);
>  	return ret;
>  }
>  
> @@ -2955,19 +2941,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
>  	return 0;
>  }
>  
> -#if defined(CONFIG_NET)
> -static void io_sendrecv_async(struct io_wq_work **workptr)
> -{
> -	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
> -	struct iovec *iov = NULL;
> -
> -	if (req->io->rw.iov != req->io->rw.fast_iov)
> -		iov = req->io->msg.iov;
> -	io_wq_submit_work(workptr);
> -	kfree(iov);
> -}
> -#endif
> -
>  static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
>  #if defined(CONFIG_NET)
> @@ -3036,17 +3009,19 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  		if (force_nonblock && ret == -EAGAIN) {
>  			if (req->io)
>  				return -EAGAIN;
> -			if (io_alloc_async_ctx(req))
> +			if (io_alloc_async_ctx(req)) {
> +				if (kmsg && kmsg->iov != kmsg->fast_iov)
> +					kfree(kmsg->iov);
>  				return -ENOMEM;
> +			}
>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
> -			req->work.func = io_sendrecv_async;
>  			return -EAGAIN;
>  		}
>  		if (ret == -ERESTARTSYS)
>  			ret = -EINTR;
>  	}
>  
> -	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
> +	if (kmsg && kmsg->iov != kmsg->fast_iov)
>  		kfree(kmsg->iov);
>  	io_cqring_add_event(req, ret);
>  	if (ret < 0)
> @@ -3180,17 +3155,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>  		if (force_nonblock && ret == -EAGAIN) {
>  			if (req->io)
>  				return -EAGAIN;
> -			if (io_alloc_async_ctx(req))
> +			if (io_alloc_async_ctx(req)) {
> +				if (kmsg && kmsg->iov != kmsg->fast_iov)
> +					kfree(kmsg->iov);
>  				return -ENOMEM;
> +			}
>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
> -			req->work.func = io_sendrecv_async;
>  			return -EAGAIN;
>  		}
>  		if (ret == -ERESTARTSYS)
>  			ret = -EINTR;
>  	}
>  
> -	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
> +	if (kmsg && kmsg->iov != kmsg->fast_iov)
>  		kfree(kmsg->iov);
>  	io_cqring_add_event(req, ret);
>  	if (ret < 0)
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 17:04 ` Pavel Begunkov
@ 2020-02-06 17:16   ` Pavel Begunkov
  2020-02-06 19:56     ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 17:16 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 4941 bytes --]

On 06/02/2020 20:04, Pavel Begunkov wrote:
> On 06/02/2020 19:51, Pavel Begunkov wrote:
>> After defer, a request will be prepared, that includes allocating iovec
>> if needed, and then submitted through io_wq_submit_work() but not custom
>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>> iovec, as it's in io-wq and the code goes as follows:
>>
>> io_read() {
>> 	if (!io_wq_current_is_worker())
>> 		kfree(iovec);
>> }
>>
>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>> leave the memory, if going async with -EAGAIN.
>>
> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
> Apparently, I need to do v2.
> 
Or not...
Jens, can you please explain what's with the -EAGAIN handling in
io_wq_submit_work()? Checking the code, it seems neither of
read/write/recv/send can return -EAGAIN from async context (i.e.
force_nonblock=false). Are there other ops that can do it?


>> It also fixes a leak after failed io_alloc_async_ctx() in
>> io_{recv,send}_msg().
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> ---
>>  fs/io_uring.c | 47 ++++++++++++-----------------------------------
>>  1 file changed, 12 insertions(+), 35 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index bff7a03e873f..ce3dbd2b1b5c 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -2143,17 +2143,6 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
>>  	return req->io == NULL;
>>  }
>>  
>> -static void io_rw_async(struct io_wq_work **workptr)
>> -{
>> -	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
>> -	struct iovec *iov = NULL;
>> -
>> -	if (req->io->rw.iov != req->io->rw.fast_iov)
>> -		iov = req->io->rw.iov;
>> -	io_wq_submit_work(workptr);
>> -	kfree(iov);
>> -}
>> -
>>  static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
>>  			     struct iovec *iovec, struct iovec *fast_iov,
>>  			     struct iov_iter *iter)
>> @@ -2166,7 +2155,6 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
>>  
>>  		io_req_map_rw(req, io_size, iovec, fast_iov, iter);
>>  	}
>> -	req->work.func = io_rw_async;
>>  	return 0;
>>  }
>>  
>> @@ -2253,8 +2241,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
>>  		}
>>  	}
>>  out_free:
>> -	if (!io_wq_current_is_worker())
>> -		kfree(iovec);
>> +	kfree(iovec);
>>  	return ret;
>>  }
>>  
>> @@ -2359,8 +2346,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
>>  		}
>>  	}
>>  out_free:
>> -	if (!io_wq_current_is_worker())
>> -		kfree(iovec);
>> +	kfree(iovec);
>>  	return ret;
>>  }
>>  
>> @@ -2955,19 +2941,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
>>  	return 0;
>>  }
>>  
>> -#if defined(CONFIG_NET)
>> -static void io_sendrecv_async(struct io_wq_work **workptr)
>> -{
>> -	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
>> -	struct iovec *iov = NULL;
>> -
>> -	if (req->io->rw.iov != req->io->rw.fast_iov)
>> -		iov = req->io->msg.iov;
>> -	io_wq_submit_work(workptr);
>> -	kfree(iov);
>> -}
>> -#endif
>> -
>>  static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>  {
>>  #if defined(CONFIG_NET)
>> @@ -3036,17 +3009,19 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>>  		if (force_nonblock && ret == -EAGAIN) {
>>  			if (req->io)
>>  				return -EAGAIN;
>> -			if (io_alloc_async_ctx(req))
>> +			if (io_alloc_async_ctx(req)) {
>> +				if (kmsg && kmsg->iov != kmsg->fast_iov)
>> +					kfree(kmsg->iov);
>>  				return -ENOMEM;
>> +			}
>>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
>> -			req->work.func = io_sendrecv_async;
>>  			return -EAGAIN;
>>  		}
>>  		if (ret == -ERESTARTSYS)
>>  			ret = -EINTR;
>>  	}
>>  
>> -	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
>> +	if (kmsg && kmsg->iov != kmsg->fast_iov)
>>  		kfree(kmsg->iov);
>>  	io_cqring_add_event(req, ret);
>>  	if (ret < 0)
>> @@ -3180,17 +3155,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
>>  		if (force_nonblock && ret == -EAGAIN) {
>>  			if (req->io)
>>  				return -EAGAIN;
>> -			if (io_alloc_async_ctx(req))
>> +			if (io_alloc_async_ctx(req)) {
>> +				if (kmsg && kmsg->iov != kmsg->fast_iov)
>> +					kfree(kmsg->iov);
>>  				return -ENOMEM;
>> +			}
>>  			memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
>> -			req->work.func = io_sendrecv_async;
>>  			return -EAGAIN;
>>  		}
>>  		if (ret == -ERESTARTSYS)
>>  			ret = -EINTR;
>>  	}
>>  
>> -	if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
>> +	if (kmsg && kmsg->iov != kmsg->fast_iov)
>>  		kfree(kmsg->iov);
>>  	io_cqring_add_event(req, ret);
>>  	if (ret < 0)
>>
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 17:16   ` Pavel Begunkov
@ 2020-02-06 19:56     ` Jens Axboe
  2020-02-06 20:00       ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-02-06 19:56 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring, linux-kernel

On 2/6/20 10:16 AM, Pavel Begunkov wrote:
> On 06/02/2020 20:04, Pavel Begunkov wrote:
>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>> After defer, a request will be prepared, that includes allocating iovec
>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>> iovec, as it's in io-wq and the code goes as follows:
>>>
>>> io_read() {
>>> 	if (!io_wq_current_is_worker())
>>> 		kfree(iovec);
>>> }
>>>
>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>> leave the memory, if going async with -EAGAIN.
>>>
>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>> Apparently, I need to do v2.
>>
> Or not...
> Jens, can you please explain what's with the -EAGAIN handling in
> io_wq_submit_work()? Checking the code, it seems neither of
> read/write/recv/send can return -EAGAIN from async context (i.e.
> force_nonblock=false). Are there other ops that can do it?

Nobody should return -EAGAIN with force_nonblock=false, they should
end the io_kiocb inline for that.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 19:56     ` Jens Axboe
@ 2020-02-06 20:00       ` Pavel Begunkov
  2020-02-06 20:16         ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 20:00 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 1304 bytes --]

On 06/02/2020 22:56, Jens Axboe wrote:
> On 2/6/20 10:16 AM, Pavel Begunkov wrote:
>> On 06/02/2020 20:04, Pavel Begunkov wrote:
>>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>>> After defer, a request will be prepared, that includes allocating iovec
>>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>>> iovec, as it's in io-wq and the code goes as follows:
>>>>
>>>> io_read() {
>>>> 	if (!io_wq_current_is_worker())
>>>> 		kfree(iovec);
>>>> }
>>>>
>>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>>> leave the memory, if going async with -EAGAIN.
>>>>
>>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>>> Apparently, I need to do v2.
>>>
>> Or not...
>> Jens, can you please explain what's with the -EAGAIN handling in
>> io_wq_submit_work()? Checking the code, it seems neither of
>> read/write/recv/send can return -EAGAIN from async context (i.e.
>> force_nonblock=false). Are there other ops that can do it?
> 
> Nobody should return -EAGAIN with force_nonblock=false, they should
> end the io_kiocb inline for that.
> 

If so for those 4, then the patch should work well.

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 20:00       ` Pavel Begunkov
@ 2020-02-06 20:16         ` Jens Axboe
  2020-02-06 20:39           ` Pavel Begunkov
  2020-02-06 21:00           ` Pavel Begunkov
  0 siblings, 2 replies; 10+ messages in thread
From: Jens Axboe @ 2020-02-06 20:16 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring, linux-kernel

On 2/6/20 1:00 PM, Pavel Begunkov wrote:
> On 06/02/2020 22:56, Jens Axboe wrote:
>> On 2/6/20 10:16 AM, Pavel Begunkov wrote:
>>> On 06/02/2020 20:04, Pavel Begunkov wrote:
>>>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>>>> After defer, a request will be prepared, that includes allocating iovec
>>>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>>>> iovec, as it's in io-wq and the code goes as follows:
>>>>>
>>>>> io_read() {
>>>>> 	if (!io_wq_current_is_worker())
>>>>> 		kfree(iovec);
>>>>> }
>>>>>
>>>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>>>> leave the memory, if going async with -EAGAIN.
>>>>>
>>>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>>>> Apparently, I need to do v2.
>>>>
>>> Or not...
>>> Jens, can you please explain what's with the -EAGAIN handling in
>>> io_wq_submit_work()? Checking the code, it seems neither of
>>> read/write/recv/send can return -EAGAIN from async context (i.e.
>>> force_nonblock=false). Are there other ops that can do it?
>>
>> Nobody should return -EAGAIN with force_nonblock=false, they should
>> end the io_kiocb inline for that.
>>
> 
> If so for those 4, then the patch should work well.

Maybe I'm dense, but I'm not seeing the leak? We have two cases here:

- The number of vecs is less than UIO_FASTIOV, in which case we use the
  on-stack inline_vecs. If we need to go async, we copy that inline vec
  to our async_ctx area.

- The number of vecs is more than UIO_FASTIOV, this is where iovec is
  allocated by the vec import. If we make it to completion here, we
  free it at the end of eg io_read(). If we need to go async, we stash
  that pointer in our async_ctx area and free it when the work item
  has run and completed.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 20:16         ` Jens Axboe
@ 2020-02-06 20:39           ` Pavel Begunkov
  2020-02-06 20:58             ` Jens Axboe
  2020-02-06 21:00           ` Pavel Begunkov
  1 sibling, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 20:39 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 2719 bytes --]

On 06/02/2020 23:16, Jens Axboe wrote:
> On 2/6/20 1:00 PM, Pavel Begunkov wrote:
>> On 06/02/2020 22:56, Jens Axboe wrote:
>>> On 2/6/20 10:16 AM, Pavel Begunkov wrote:
>>>> On 06/02/2020 20:04, Pavel Begunkov wrote:
>>>>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>>>>> After defer, a request will be prepared, that includes allocating iovec
>>>>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>>>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>>>>> iovec, as it's in io-wq and the code goes as follows:
>>>>>>
>>>>>> io_read() {
>>>>>> 	if (!io_wq_current_is_worker())
>>>>>> 		kfree(iovec);
>>>>>> }
>>>>>>
>>>>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>>>>> leave the memory, if going async with -EAGAIN.
>>>>>>
>>>>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>>>>> Apparently, I need to do v2.
>>>>>
>>>> Or not...
>>>> Jens, can you please explain what's with the -EAGAIN handling in
>>>> io_wq_submit_work()? Checking the code, it seems neither of
>>>> read/write/recv/send can return -EAGAIN from async context (i.e.
>>>> force_nonblock=false). Are there other ops that can do it?
>>>
>>> Nobody should return -EAGAIN with force_nonblock=false, they should
>>> end the io_kiocb inline for that.
>>>
>>
>> If so for those 4, then the patch should work well.
> 
> Maybe I'm dense, but I'm not seeing the leak? We have two cases here:
> 

There is an example:

1. submit a read, which need defer.

2. io_req_defer() allocates ->io and goes io_req_defer_prep() -> io_read_prep().
Let #vecs > UIO_FASTIOV, so the prep() in the presence of ->io will allocate iovec.
Note: that work.func is left io_wq_submit_work

3. At some point @io_wq calls io_wq_submit_work() -> io_issue_sqe() -> io_read(),

4. actual reading succeeds, and it's coming to finalisation and the following
code in particular.

if (!io_wq_current_is_worker())
	kfree(iovec);

5. Because we're in io_wq, the cleanup will not be performed, even though we're
returning with success. And that's a leak.

Do you see anything wrong with it?

> - The number of vecs is less than UIO_FASTIOV, in which case we use the
>   on-stack inline_vecs. If we need to go async, we copy that inline vec
>   to our async_ctx area.
> 
> - The number of vecs is more than UIO_FASTIOV, this is where iovec is
>   allocated by the vec import. If we make it to completion here, we
>   free it at the end of eg io_read(). If we need to go async, we stash
>   that pointer in our async_ctx area and free it when the work item
>   has run and completed.
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 20:39           ` Pavel Begunkov
@ 2020-02-06 20:58             ` Jens Axboe
  2020-02-06 21:03               ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-02-06 20:58 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring, linux-kernel

On 2/6/20 1:39 PM, Pavel Begunkov wrote:
> On 06/02/2020 23:16, Jens Axboe wrote:
>> On 2/6/20 1:00 PM, Pavel Begunkov wrote:
>>> On 06/02/2020 22:56, Jens Axboe wrote:
>>>> On 2/6/20 10:16 AM, Pavel Begunkov wrote:
>>>>> On 06/02/2020 20:04, Pavel Begunkov wrote:
>>>>>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>>>>>> After defer, a request will be prepared, that includes allocating iovec
>>>>>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>>>>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>>>>>> iovec, as it's in io-wq and the code goes as follows:
>>>>>>>
>>>>>>> io_read() {
>>>>>>> 	if (!io_wq_current_is_worker())
>>>>>>> 		kfree(iovec);
>>>>>>> }
>>>>>>>
>>>>>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>>>>>> leave the memory, if going async with -EAGAIN.
>>>>>>>
>>>>>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>>>>>> Apparently, I need to do v2.
>>>>>>
>>>>> Or not...
>>>>> Jens, can you please explain what's with the -EAGAIN handling in
>>>>> io_wq_submit_work()? Checking the code, it seems neither of
>>>>> read/write/recv/send can return -EAGAIN from async context (i.e.
>>>>> force_nonblock=false). Are there other ops that can do it?
>>>>
>>>> Nobody should return -EAGAIN with force_nonblock=false, they should
>>>> end the io_kiocb inline for that.
>>>>
>>>
>>> If so for those 4, then the patch should work well.
>>
>> Maybe I'm dense, but I'm not seeing the leak? We have two cases here:
>>
> 
> There is an example:
> 
> 1. submit a read, which need defer.
> 
> 2. io_req_defer() allocates ->io and goes io_req_defer_prep() -> io_read_prep().
> Let #vecs > UIO_FASTIOV, so the prep() in the presence of ->io will allocate iovec.
> Note: that work.func is left io_wq_submit_work
> 
> 3. At some point @io_wq calls io_wq_submit_work() -> io_issue_sqe() -> io_read(),
> 
> 4. actual reading succeeds, and it's coming to finalisation and the following
> code in particular.
> 
> if (!io_wq_current_is_worker())
> 	kfree(iovec);
> 
> 5. Because we're in io_wq, the cleanup will not be performed, even though we're
> returning with success. And that's a leak.
> 
> Do you see anything wrong with it?

That's my bad, I didn't read the subject fully, this is specific to
a deferred request. Patch looks good to me, and it cleans it up too
which is always a nice win!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 20:16         ` Jens Axboe
  2020-02-06 20:39           ` Pavel Begunkov
@ 2020-02-06 21:00           ` Pavel Begunkov
  1 sibling, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 21:00 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 2307 bytes --]

On 06/02/2020 23:16, Jens Axboe wrote:
> On 2/6/20 1:00 PM, Pavel Begunkov wrote:
>> On 06/02/2020 22:56, Jens Axboe wrote:
>>> On 2/6/20 10:16 AM, Pavel Begunkov wrote:
>>>> On 06/02/2020 20:04, Pavel Begunkov wrote:
>>>>> On 06/02/2020 19:51, Pavel Begunkov wrote:
>>>>>> After defer, a request will be prepared, that includes allocating iovec
>>>>>> if needed, and then submitted through io_wq_submit_work() but not custom
>>>>>> handler (e.g. io_rw_async()/io_sendrecv_async()). However, it'll leak
>>>>>> iovec, as it's in io-wq and the code goes as follows:
>>>>>>
>>>>>> io_read() {
>>>>>> 	if (!io_wq_current_is_worker())
>>>>>> 		kfree(iovec);
>>>>>> }
>>>>>>
>>>>>> Put all deallocation logic in io_{read,write,send,recv}(), which will
>>>>>> leave the memory, if going async with -EAGAIN.
>>>>>>
>>>>> Interestingly, this will fail badly if it returns -EAGAIN from io-wq context.
>>>>> Apparently, I need to do v2.
>>>>>
>>>> Or not...
>>>> Jens, can you please explain what's with the -EAGAIN handling in
>>>> io_wq_submit_work()? Checking the code, it seems neither of
>>>> read/write/recv/send can return -EAGAIN from async context (i.e.
>>>> force_nonblock=false). Are there other ops that can do it?
>>>
>>> Nobody should return -EAGAIN with force_nonblock=false, they should
>>> end the io_kiocb inline for that.
>>>
>>
>> If so for those 4, then the patch should work well.
> 
> Maybe I'm dense, but I'm not seeing the leak? We have two cases here:
> 
> - The number of vecs is less than UIO_FASTIOV, in which case we use the
>   on-stack inline_vecs. If we need to go async, we copy that inline vec
>   to our async_ctx area.
> 
> - The number of vecs is more than UIO_FASTIOV, this is where iovec is
>   allocated by the vec import. If we make it to completion here, we
>   free it at the end of eg io_read(). If we need to go async, we stash
>   that pointer in our async_ctx area and free it when the work item
>   has run and completed.
> 

BTW, there are plenty of ways to leak even with this applied. E.g. double
io_read_prep() call with ->io, and that may happen. Or by not punting in
__io_queue_sqe() after io_issue_sqe()==-EAGAIN.

That's the next patch I'm preparing, and then I'm good for splice(2).

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] io_uring: fix deferred req iovec leak
  2020-02-06 20:58             ` Jens Axboe
@ 2020-02-06 21:03               ` Pavel Begunkov
  0 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-06 21:03 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 1139 bytes --]

On 06/02/2020 23:58, Jens Axboe wrote:
>>
>> 1. submit a read, which need defer.
>>
>> 2. io_req_defer() allocates ->io and goes io_req_defer_prep() -> io_read_prep().
>> Let #vecs > UIO_FASTIOV, so the prep() in the presence of ->io will allocate iovec.
>> Note: that work.func is left io_wq_submit_work
>>
>> 3. At some point @io_wq calls io_wq_submit_work() -> io_issue_sqe() -> io_read(),
>>
>> 4. actual reading succeeds, and it's coming to finalisation and the following
>> code in particular.
>>
>> if (!io_wq_current_is_worker())
>> 	kfree(iovec);
>>
>> 5. Because we're in io_wq, the cleanup will not be performed, even though we're
>> returning with success. And that's a leak.
>>
>> Do you see anything wrong with it?
> 
> That's my bad, I didn't read the subject fully, this is specific to
> a deferred request. Patch looks good to me, and it cleans it up too
> which is always a nice win!
> 

Great we're agree. Though, it's not only about defer, it's just one example.
The another one is a non-head request, for which io_submit_sqe() allocates ->io,
+ REQ_F_FORCE_ASYNC.

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-02-06 21:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06 16:51 [PATCH] io_uring: fix deferred req iovec leak Pavel Begunkov
2020-02-06 17:04 ` Pavel Begunkov
2020-02-06 17:16   ` Pavel Begunkov
2020-02-06 19:56     ` Jens Axboe
2020-02-06 20:00       ` Pavel Begunkov
2020-02-06 20:16         ` Jens Axboe
2020-02-06 20:39           ` Pavel Begunkov
2020-02-06 20:58             ` Jens Axboe
2020-02-06 21:03               ` Pavel Begunkov
2020-02-06 21:00           ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).