Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net-next,v2] net/tls: fix race condition causing kernel panic
@ 2020-05-22 20:10 Vinay Kumar Yadav
  2020-05-25 20:25 ` Jakub Kicinski
  2020-05-26  0:42 ` David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Vinay Kumar Yadav @ 2020-05-22 20:10 UTC (permalink / raw)
  To: netdev, davem; +Cc: kuba, secdev, Vinay Kumar Yadav

tls_sw_recvmsg() and tls_decrypt_done() can be run concurrently.
// tls_sw_recvmsg()
	if (atomic_read(&ctx->decrypt_pending))
		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
	else
		reinit_completion(&ctx->async_wait.completion);

//tls_decrypt_done()
  	pending = atomic_dec_return(&ctx->decrypt_pending);

  	if (!pending && READ_ONCE(ctx->async_notify))
  		complete(&ctx->async_wait.completion);

Consider the scenario tls_decrypt_done() is about to run complete()

	if (!pending && READ_ONCE(ctx->async_notify))

and tls_sw_recvmsg() reads decrypt_pending == 0, does reinit_completion(),
then tls_decrypt_done() runs complete(). This sequence of execution
results in wrong completion. Consequently, for next decrypt request,
it will not wait for completion, eventually on connection close, crypto
resources freed, there is no way to handle pending decrypt response.

This race condition can be avoided by having atomic_read() mutually
exclusive with atomic_dec_return(),complete().Intoduced spin lock to
ensure the mutual exclution.

Addressed similar problem in tx direction.

v1->v2:
- More readable commit message.
- Corrected the lock to fix new race scenario.
- Removed barrier which is not needed now.

Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
---
 include/net/tls.h |  4 ++++
 net/tls/tls_sw.c  | 33 +++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index bf9eb4823933..18cd4f418464 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -135,6 +135,8 @@ struct tls_sw_context_tx {
 	struct tls_rec *open_rec;
 	struct list_head tx_list;
 	atomic_t encrypt_pending;
+	/* protect crypto_wait with encrypt_pending */
+	spinlock_t encrypt_compl_lock;
 	int async_notify;
 	u8 async_capable:1;
 
@@ -155,6 +157,8 @@ struct tls_sw_context_rx {
 	u8 async_capable:1;
 	u8 decrypted:1;
 	atomic_t decrypt_pending;
+	/* protect crypto_wait with decrypt_pending*/
+	spinlock_t decrypt_compl_lock;
 	bool async_notify;
 };
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c98e602a1a2d..f29ea717590c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -206,10 +206,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 
 	kfree(aead_req);
 
+	spin_lock_bh(&ctx->decrypt_compl_lock);
 	pending = atomic_dec_return(&ctx->decrypt_pending);
 
-	if (!pending && READ_ONCE(ctx->async_notify))
+	if (!pending && ctx->async_notify)
 		complete(&ctx->async_wait.completion);
+	spin_unlock_bh(&ctx->decrypt_compl_lock);
 }
 
 static int tls_do_decryption(struct sock *sk,
@@ -467,10 +469,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 			ready = true;
 	}
 
+	spin_lock_bh(&ctx->encrypt_compl_lock);
 	pending = atomic_dec_return(&ctx->encrypt_pending);
 
-	if (!pending && READ_ONCE(ctx->async_notify))
+	if (!pending && ctx->async_notify)
 		complete(&ctx->async_wait.completion);
+	spin_unlock_bh(&ctx->encrypt_compl_lock);
 
 	if (!ready)
 		return;
@@ -924,6 +928,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	int num_zc = 0;
 	int orig_size;
 	int ret = 0;
+	int pending;
 
 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
 		return -EOPNOTSUPP;
@@ -1090,13 +1095,19 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		goto send_end;
 	} else if (num_zc) {
 		/* Wait for pending encryptions to get completed */
-		smp_store_mb(ctx->async_notify, true);
+		spin_lock_bh(&ctx->encrypt_compl_lock);
+		ctx->async_notify = true;
 
-		if (atomic_read(&ctx->encrypt_pending))
+		pending = atomic_read(&ctx->encrypt_pending);
+		spin_unlock_bh(&ctx->encrypt_compl_lock);
+		if (pending)
 			crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
 		else
 			reinit_completion(&ctx->async_wait.completion);
 
+		/* There can be no concurrent accesses, since we have no
+		 * pending encrypt operations
+		 */
 		WRITE_ONCE(ctx->async_notify, false);
 
 		if (ctx->async_wait.err) {
@@ -1727,6 +1738,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool is_peek = flags & MSG_PEEK;
 	int num_async = 0;
+	int pending;
 
 	flags |= nonblock;
 
@@ -1889,8 +1901,11 @@ int tls_sw_recvmsg(struct sock *sk,
 recv_end:
 	if (num_async) {
 		/* Wait for all previously submitted records to be decrypted */
-		smp_store_mb(ctx->async_notify, true);
-		if (atomic_read(&ctx->decrypt_pending)) {
+		spin_lock_bh(&ctx->decrypt_compl_lock);
+		ctx->async_notify = true;
+		pending = atomic_read(&ctx->decrypt_pending);
+		spin_unlock_bh(&ctx->decrypt_compl_lock);
+		if (pending) {
 			err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
 			if (err) {
 				/* one of async decrypt failed */
@@ -1902,6 +1917,10 @@ int tls_sw_recvmsg(struct sock *sk,
 		} else {
 			reinit_completion(&ctx->async_wait.completion);
 		}
+
+		/* There can be no concurrent accesses, since we have no
+		 * pending decrypt operations
+		 */
 		WRITE_ONCE(ctx->async_notify, false);
 
 		/* Drain records from the rx_list & copy if required */
@@ -2287,6 +2306,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	if (tx) {
 		crypto_init_wait(&sw_ctx_tx->async_wait);
+		spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
 		crypto_info = &ctx->crypto_send.info;
 		cctx = &ctx->tx;
 		aead = &sw_ctx_tx->aead_send;
@@ -2295,6 +2315,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		sw_ctx_tx->tx_work.sk = sk;
 	} else {
 		crypto_init_wait(&sw_ctx_rx->async_wait);
+		spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
 		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
 		skb_queue_head_init(&sw_ctx_rx->rx_list);
-- 
2.18.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next,v2] net/tls: fix race condition causing kernel panic
  2020-05-22 20:10 [PATCH net-next,v2] net/tls: fix race condition causing kernel panic Vinay Kumar Yadav
@ 2020-05-25 20:25 ` Jakub Kicinski
  2020-05-26  0:42 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: Jakub Kicinski @ 2020-05-25 20:25 UTC (permalink / raw)
  To: Vinay Kumar Yadav; +Cc: netdev, davem, secdev

On Sat, 23 May 2020 01:40:31 +0530 Vinay Kumar Yadav wrote:
> tls_sw_recvmsg() and tls_decrypt_done() can be run concurrently.
> // tls_sw_recvmsg()
> 	if (atomic_read(&ctx->decrypt_pending))
> 		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
> 	else
> 		reinit_completion(&ctx->async_wait.completion);
> 
> //tls_decrypt_done()
>   	pending = atomic_dec_return(&ctx->decrypt_pending);
> 
>   	if (!pending && READ_ONCE(ctx->async_notify))
>   		complete(&ctx->async_wait.completion);
> 
> Consider the scenario tls_decrypt_done() is about to run complete()
> 
> 	if (!pending && READ_ONCE(ctx->async_notify))
> 
> and tls_sw_recvmsg() reads decrypt_pending == 0, does reinit_completion(),
> then tls_decrypt_done() runs complete(). This sequence of execution
> results in wrong completion. Consequently, for next decrypt request,
> it will not wait for completion, eventually on connection close, crypto
> resources freed, there is no way to handle pending decrypt response.
> 
> This race condition can be avoided by having atomic_read() mutually
> exclusive with atomic_dec_return(),complete().Intoduced spin lock to
> ensure the mutual exclution.
> 
> Addressed similar problem in tx direction.
> 
> v1->v2:
> - More readable commit message.
> - Corrected the lock to fix new race scenario.
> - Removed barrier which is not needed now.
> 
> Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>

The tree should have been net, since this is a fix for:

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")

but admittedly it's not very urgent.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>

We can try the trick with recording async_notify as a large bias 
on crypt_pending if the spin lock slows things down.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next,v2] net/tls: fix race condition causing kernel panic
  2020-05-22 20:10 [PATCH net-next,v2] net/tls: fix race condition causing kernel panic Vinay Kumar Yadav
  2020-05-25 20:25 ` Jakub Kicinski
@ 2020-05-26  0:42 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: David Miller @ 2020-05-26  0:42 UTC (permalink / raw)
  To: vinay.yadav; +Cc: netdev, kuba, secdev

From: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Date: Sat, 23 May 2020 01:40:31 +0530

> tls_sw_recvmsg() and tls_decrypt_done() can be run concurrently.
> // tls_sw_recvmsg()
> 	if (atomic_read(&ctx->decrypt_pending))
> 		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
> 	else
> 		reinit_completion(&ctx->async_wait.completion);
> 
> //tls_decrypt_done()
>   	pending = atomic_dec_return(&ctx->decrypt_pending);
> 
>   	if (!pending && READ_ONCE(ctx->async_notify))
>   		complete(&ctx->async_wait.completion);
> 
> Consider the scenario tls_decrypt_done() is about to run complete()
> 
> 	if (!pending && READ_ONCE(ctx->async_notify))
> 
> and tls_sw_recvmsg() reads decrypt_pending == 0, does reinit_completion(),
> then tls_decrypt_done() runs complete(). This sequence of execution
> results in wrong completion. Consequently, for next decrypt request,
> it will not wait for completion, eventually on connection close, crypto
> resources freed, there is no way to handle pending decrypt response.
> 
> This race condition can be avoided by having atomic_read() mutually
> exclusive with atomic_dec_return(),complete().Intoduced spin lock to
> ensure the mutual exclution.
> 
> Addressed similar problem in tx direction.
> 
> v1->v2:
> - More readable commit message.
> - Corrected the lock to fix new race scenario.
> - Removed barrier which is not needed now.
> 
> Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>

Applied to 'net' as this is a bug fix, with Fixes tag from Jakub added,
and queued up for -stable.

Thanks.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-22 20:10 [PATCH net-next,v2] net/tls: fix race condition causing kernel panic Vinay Kumar Yadav
2020-05-25 20:25 ` Jakub Kicinski
2020-05-26  0:42 ` David Miller

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git