All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Bluetooth: fix inconsistent lock state in sco
@ 2021-07-13 16:28 ` Desmond Cheong Zhi Xi
  0 siblings, 0 replies; 7+ messages in thread
From: Desmond Cheong Zhi Xi @ 2021-07-13 16:28 UTC (permalink / raw)
  To: marcel, johan.hedberg, luiz.dentz, davem, kuba, stefan
  Cc: Desmond Cheong Zhi Xi, linux-bluetooth, netdev, linux-kernel,
	skhan, gregkh, linux-kernel-mentees, syzbot+2f6d7c28bb4bf7e82060

Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
usage in sco_conn_del and sco_sock_timeout that could lead to
deadlocks.

This inconsistent lock state can also happen in sco_conn_ready,
rfcomm_connect_ind, and bt_accept_enqueue.

The issue is that these functions take a spin lock on the socket with
interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
context. This could lead to deadlocks:

       CPU0
       ----
  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
  <Interrupt>
    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

 *** DEADLOCK ***

We fix this by ensuring that local bh is disabled before calling
bh_lock_sock.

After doing this, we additionally need to protect sco_conn_lock by
disabling local bh.

This is necessary because sco_conn_del makes a call to sco_chan_del
while holding on to the sock lock, and sco_chan_del itself makes a
call to sco_conn_lock. If sco_conn_lock is held elsewhere with
interrupts enabled, there could still be a
slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
follows:

        CPU0                    CPU1
        ----                    ----
   lock(&conn->lock#2);
                                local_irq_disable();
                                lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
                                lock(&conn->lock#2);
   <Interrupt>
     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

  *** DEADLOCK ***

As sco_conn_del now disables local bh before calling sco_chan_del,
instead of disabling local bh for the calls to sco_conn_lock in
sco_chan_del, we instead wrap other calls to sco_chan_del with
local_bh_disable/enable.

Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
---

Hi,

The previous version of this patch was a bit of a mess, so I made the
following changes.

v1 -> v2:
- Instead of pulling out the clean-up code out from sco_chan_del and
using it directly in sco_conn_del, disable local irqs for relevant
sections.
- Disable local irqs more thoroughly for instances of
bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
with local irqs disabled as well.

Best wishes,
Desmond

 net/bluetooth/rfcomm/sock.c |  2 ++
 net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ae6f80730561..d8734abb2df4 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 	if (!parent)
 		return 0;
 
+	local_bh_disable();
 	bh_lock_sock(parent);
 
 	/* Check for backlog size */
@@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 
 done:
 	bh_unlock_sock(parent);
+	local_bh_enable();
 
 	if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
 		parent->sk_state_change(parent);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3bd41563f118..2548b8f81473 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
 	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
 
 	/* Kill socket */
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (sk) {
 		sock_hold(sk);
+
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
+		local_bh_enable();
+
 		sco_sock_kill(sk);
 		sock_put(sk);
 	}
@@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 {
 	int err = 0;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	if (conn->sk)
 		err = -EBUSY;
@@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 		__sco_chan_add(conn, sk, parent);
 
 	sco_conn_unlock(conn);
+	local_bh_enable();
 	return err;
 }
 
@@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 {
 	struct sock *sk;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (!sk)
 		goto drop;
@@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
 		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+			local_bh_disable();
 			sco_conn_lock(sco_pi(sk)->conn);
 			hci_conn_drop(sco_pi(sk)->conn->hcon);
 			sco_pi(sk)->conn->hcon = NULL;
 			sco_conn_unlock(sco_pi(sk)->conn);
-		} else
+			local_bh_enable();
+		} else {
+			local_bh_disable();
 			sco_chan_del(sk, ECONNRESET);
+			local_bh_enable();
+		}
 		break;
 
 	case BT_CONNECT2:
 	case BT_CONNECT:
 	case BT_DISCONN:
+		local_bh_disable();
 		sco_chan_del(sk, ECONNRESET);
+		local_bh_enable();
 		break;
 
 	default:
@@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
 
 	if (sk) {
 		sco_sock_clear_timer(sk);
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sk->sk_state = BT_CONNECTED;
 		sk->sk_state_change(sk);
 		bh_unlock_sock(sk);
+		local_bh_enable();
 	} else {
+		local_bh_disable();
 		sco_conn_lock(conn);
 
 		if (!conn->hcon) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
 		parent = sco_get_sock_listen(&conn->hcon->src);
 		if (!parent) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
 		if (!sk) {
 			bh_unlock_sock(parent);
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
 		bh_unlock_sock(parent);
 
 		sco_conn_unlock(conn);
+		local_bh_enable();
 	}
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2] Bluetooth: fix inconsistent lock state in sco
@ 2021-07-13 16:28 ` Desmond Cheong Zhi Xi
  0 siblings, 0 replies; 7+ messages in thread
From: Desmond Cheong Zhi Xi @ 2021-07-13 16:28 UTC (permalink / raw)
  To: marcel, johan.hedberg, luiz.dentz, davem, kuba, stefan
  Cc: syzbot+2f6d7c28bb4bf7e82060, linux-kernel, linux-bluetooth,
	netdev, Desmond Cheong Zhi Xi, linux-kernel-mentees

Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
usage in sco_conn_del and sco_sock_timeout that could lead to
deadlocks.

This inconsistent lock state can also happen in sco_conn_ready,
rfcomm_connect_ind, and bt_accept_enqueue.

The issue is that these functions take a spin lock on the socket with
interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
context. This could lead to deadlocks:

       CPU0
       ----
  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
  <Interrupt>
    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

 *** DEADLOCK ***

We fix this by ensuring that local bh is disabled before calling
bh_lock_sock.

After doing this, we additionally need to protect sco_conn_lock by
disabling local bh.

This is necessary because sco_conn_del makes a call to sco_chan_del
while holding on to the sock lock, and sco_chan_del itself makes a
call to sco_conn_lock. If sco_conn_lock is held elsewhere with
interrupts enabled, there could still be a
slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
follows:

        CPU0                    CPU1
        ----                    ----
   lock(&conn->lock#2);
                                local_irq_disable();
                                lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
                                lock(&conn->lock#2);
   <Interrupt>
     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

  *** DEADLOCK ***

As sco_conn_del now disables local bh before calling sco_chan_del,
instead of disabling local bh for the calls to sco_conn_lock in
sco_chan_del, we instead wrap other calls to sco_chan_del with
local_bh_disable/enable.

Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
---

Hi,

The previous version of this patch was a bit of a mess, so I made the
following changes.

v1 -> v2:
- Instead of pulling out the clean-up code out from sco_chan_del and
using it directly in sco_conn_del, disable local irqs for relevant
sections.
- Disable local irqs more thoroughly for instances of
bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
with local irqs disabled as well.

Best wishes,
Desmond

 net/bluetooth/rfcomm/sock.c |  2 ++
 net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ae6f80730561..d8734abb2df4 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 	if (!parent)
 		return 0;
 
+	local_bh_disable();
 	bh_lock_sock(parent);
 
 	/* Check for backlog size */
@@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 
 done:
 	bh_unlock_sock(parent);
+	local_bh_enable();
 
 	if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
 		parent->sk_state_change(parent);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3bd41563f118..2548b8f81473 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
 	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
 
 	/* Kill socket */
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (sk) {
 		sock_hold(sk);
+
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
+		local_bh_enable();
+
 		sco_sock_kill(sk);
 		sock_put(sk);
 	}
@@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 {
 	int err = 0;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	if (conn->sk)
 		err = -EBUSY;
@@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 		__sco_chan_add(conn, sk, parent);
 
 	sco_conn_unlock(conn);
+	local_bh_enable();
 	return err;
 }
 
@@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 {
 	struct sock *sk;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (!sk)
 		goto drop;
@@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
 		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+			local_bh_disable();
 			sco_conn_lock(sco_pi(sk)->conn);
 			hci_conn_drop(sco_pi(sk)->conn->hcon);
 			sco_pi(sk)->conn->hcon = NULL;
 			sco_conn_unlock(sco_pi(sk)->conn);
-		} else
+			local_bh_enable();
+		} else {
+			local_bh_disable();
 			sco_chan_del(sk, ECONNRESET);
+			local_bh_enable();
+		}
 		break;
 
 	case BT_CONNECT2:
 	case BT_CONNECT:
 	case BT_DISCONN:
+		local_bh_disable();
 		sco_chan_del(sk, ECONNRESET);
+		local_bh_enable();
 		break;
 
 	default:
@@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
 
 	if (sk) {
 		sco_sock_clear_timer(sk);
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sk->sk_state = BT_CONNECTED;
 		sk->sk_state_change(sk);
 		bh_unlock_sock(sk);
+		local_bh_enable();
 	} else {
+		local_bh_disable();
 		sco_conn_lock(conn);
 
 		if (!conn->hcon) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
 		parent = sco_get_sock_listen(&conn->hcon->src);
 		if (!parent) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
 		if (!sk) {
 			bh_unlock_sock(parent);
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
 		bh_unlock_sock(parent);
 
 		sco_conn_unlock(conn);
+		local_bh_enable();
 	}
 }
 
-- 
2.25.1

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [v2] Bluetooth: fix inconsistent lock state in sco
  2021-07-13 16:28 ` Desmond Cheong Zhi Xi
  (?)
@ 2021-07-13 17:25 ` bluez.test.bot
  -1 siblings, 0 replies; 7+ messages in thread
From: bluez.test.bot @ 2021-07-13 17:25 UTC (permalink / raw)
  To: linux-bluetooth, desmondcheongzx

[-- Attachment #1: Type: text/plain, Size: 2727 bytes --]

This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=514869

---Test result---

Test Summary:
CheckPatch                    PASS      0.60 seconds
GitLint                       PASS      0.10 seconds
BuildKernel                   PASS      501.68 seconds
TestRunner: Setup             PASS      336.91 seconds
TestRunner: l2cap-tester      PASS      2.57 seconds
TestRunner: bnep-tester       PASS      1.93 seconds
TestRunner: mgmt-tester       PASS      29.64 seconds
TestRunner: rfcomm-tester     PASS      2.08 seconds
TestRunner: sco-tester        PASS      2.04 seconds
TestRunner: smp-tester        FAIL      2.12 seconds
TestRunner: userchan-tester   PASS      1.94 seconds

Details
##############################
Test: CheckPatch - PASS - 0.60 seconds
Run checkpatch.pl script with rule in .checkpatch.conf


##############################
Test: GitLint - PASS - 0.10 seconds
Run gitlint with rule in .gitlint


##############################
Test: BuildKernel - PASS - 501.68 seconds
Build Kernel with minimal configuration supports Bluetooth


##############################
Test: TestRunner: Setup - PASS - 336.91 seconds
Setup environment for running Test Runner


##############################
Test: TestRunner: l2cap-tester - PASS - 2.57 seconds
Run test-runner with l2cap-tester
Total: 40, Passed: 40 (100.0%), Failed: 0, Not Run: 0

##############################
Test: TestRunner: bnep-tester - PASS - 1.93 seconds
Run test-runner with bnep-tester
Total: 1, Passed: 1 (100.0%), Failed: 0, Not Run: 0

##############################
Test: TestRunner: mgmt-tester - PASS - 29.64 seconds
Run test-runner with mgmt-tester
Total: 446, Passed: 443 (99.3%), Failed: 0, Not Run: 3

##############################
Test: TestRunner: rfcomm-tester - PASS - 2.08 seconds
Run test-runner with rfcomm-tester
Total: 9, Passed: 9 (100.0%), Failed: 0, Not Run: 0

##############################
Test: TestRunner: sco-tester - PASS - 2.04 seconds
Run test-runner with sco-tester
Total: 8, Passed: 8 (100.0%), Failed: 0, Not Run: 0

##############################
Test: TestRunner: smp-tester - FAIL - 2.12 seconds
Run test-runner with smp-tester
Total: 8, Passed: 7 (87.5%), Failed: 1, Not Run: 0

Failed Test Cases
SMP Client - SC Request 2                            Failed       0.027 seconds

##############################
Test: TestRunner: userchan-tester - PASS - 1.94 seconds
Run test-runner with userchan-tester
Total: 3, Passed: 3 (100.0%), Failed: 0, Not Run: 0



---
Regards,
Linux Bluetooth


[-- Attachment #2: l2cap-tester.log --]
[-- Type: application/octet-stream, Size: 44350 bytes --]

[-- Attachment #3: bnep-tester.log --]
[-- Type: application/octet-stream, Size: 3555 bytes --]

[-- Attachment #4: mgmt-tester.log --]
[-- Type: application/octet-stream, Size: 614404 bytes --]

[-- Attachment #5: rfcomm-tester.log --]
[-- Type: application/octet-stream, Size: 11677 bytes --]

[-- Attachment #6: sco-tester.log --]
[-- Type: application/octet-stream, Size: 9912 bytes --]

[-- Attachment #7: smp-tester.log --]
[-- Type: application/octet-stream, Size: 11705 bytes --]

[-- Attachment #8: userchan-tester.log --]
[-- Type: application/octet-stream, Size: 5454 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] Bluetooth: fix inconsistent lock state in sco
  2021-07-13 16:28 ` Desmond Cheong Zhi Xi
@ 2021-07-14 19:12   ` Luiz Augusto von Dentz
  -1 siblings, 0 replies; 7+ messages in thread
From: Luiz Augusto von Dentz @ 2021-07-14 19:12 UTC (permalink / raw)
  To: Desmond Cheong Zhi Xi
  Cc: Marcel Holtmann, Johan Hedberg, David Miller, Jakub Kicinski,
	stefan, linux-bluetooth, open list:NETWORKING [GENERAL],
	Linux Kernel Mailing List, skhan, Greg Kroah-Hartman,
	linux-kernel-mentees, syzbot+2f6d7c28bb4bf7e82060

Hi Desmond,

On Tue, Jul 13, 2021 at 9:29 AM Desmond Cheong Zhi Xi
<desmondcheongzx@gmail.com> wrote:
>
> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
> usage in sco_conn_del and sco_sock_timeout that could lead to
> deadlocks.
>
> This inconsistent lock state can also happen in sco_conn_ready,
> rfcomm_connect_ind, and bt_accept_enqueue.
>
> The issue is that these functions take a spin lock on the socket with
> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
> context. This could lead to deadlocks:
>
>        CPU0
>        ----
>   lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>   <Interrupt>
>     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>
>  *** DEADLOCK ***
>
> We fix this by ensuring that local bh is disabled before calling
> bh_lock_sock.
>
> After doing this, we additionally need to protect sco_conn_lock by
> disabling local bh.
>
> This is necessary because sco_conn_del makes a call to sco_chan_del
> while holding on to the sock lock, and sco_chan_del itself makes a
> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
> interrupts enabled, there could still be a
> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
> follows:
>
>         CPU0                    CPU1
>         ----                    ----
>    lock(&conn->lock#2);
>                                 local_irq_disable();
>                                 lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>                                 lock(&conn->lock#2);
>    <Interrupt>
>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>
>   *** DEADLOCK ***
>
> As sco_conn_del now disables local bh before calling sco_chan_del,
> instead of disabling local bh for the calls to sco_conn_lock in
> sco_chan_del, we instead wrap other calls to sco_chan_del with
> local_bh_disable/enable.
>
> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
> ---
>
> Hi,
>
> The previous version of this patch was a bit of a mess, so I made the
> following changes.
>
> v1 -> v2:
> - Instead of pulling out the clean-up code out from sco_chan_del and
> using it directly in sco_conn_del, disable local irqs for relevant
> sections.
> - Disable local irqs more thoroughly for instances of
> bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
> Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
> with local irqs disabled as well.
>
> Best wishes,
> Desmond
>
>  net/bluetooth/rfcomm/sock.c |  2 ++
>  net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
> index ae6f80730561..d8734abb2df4 100644
> --- a/net/bluetooth/rfcomm/sock.c
> +++ b/net/bluetooth/rfcomm/sock.c
> @@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>         if (!parent)
>                 return 0;
>
> +       local_bh_disable();
>         bh_lock_sock(parent);
>
>         /* Check for backlog size */
> @@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>
>  done:
>         bh_unlock_sock(parent);
> +       local_bh_enable();

Looks like you are touching RFCOMM as well, perhaps you should have it
split, also how about other sockets like L2CAP and HCI are they
affected? There seems to be a lot of problem with the likes of
bh_lock_sock I wonder if going with local_bh_disable is overall a
better way to handle.

>         if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
>                 parent->sk_state_change(parent);
> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
> index 3bd41563f118..2548b8f81473 100644
> --- a/net/bluetooth/sco.c
> +++ b/net/bluetooth/sco.c
> @@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>         BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>
>         /* Kill socket */
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (sk) {
>                 sock_hold(sk);
> +
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sco_sock_clear_timer(sk);
>                 sco_chan_del(sk, err);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
> +
>                 sco_sock_kill(sk);
>                 sock_put(sk);
>         }
> @@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>  {
>         int err = 0;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         if (conn->sk)
>                 err = -EBUSY;
> @@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>                 __sco_chan_add(conn, sk, parent);
>
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>         return err;
>  }
>
> @@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>  {
>         struct sock *sk;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (!sk)
>                 goto drop;
> @@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
>                 if (sco_pi(sk)->conn->hcon) {
>                         sk->sk_state = BT_DISCONN;
>                         sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
> +                       local_bh_disable();
>                         sco_conn_lock(sco_pi(sk)->conn);
>                         hci_conn_drop(sco_pi(sk)->conn->hcon);
>                         sco_pi(sk)->conn->hcon = NULL;
>                         sco_conn_unlock(sco_pi(sk)->conn);
> -               } else
> +                       local_bh_enable();
> +               } else {
> +                       local_bh_disable();
>                         sco_chan_del(sk, ECONNRESET);
> +                       local_bh_enable();
> +               }
>                 break;
>
>         case BT_CONNECT2:
>         case BT_CONNECT:
>         case BT_DISCONN:
> +               local_bh_disable();
>                 sco_chan_del(sk, ECONNRESET);
> +               local_bh_enable();
>                 break;
>
>         default:
> @@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>
>         if (sk) {
>                 sco_sock_clear_timer(sk);
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sk->sk_state = BT_CONNECTED;
>                 sk->sk_state_change(sk);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
>         } else {
> +               local_bh_disable();
>                 sco_conn_lock(conn);
>
>                 if (!conn->hcon) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
>                 parent = sco_get_sock_listen(&conn->hcon->src);
>                 if (!parent) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 if (!sk) {
>                         bh_unlock_sock(parent);
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 bh_unlock_sock(parent);
>
>                 sco_conn_unlock(conn);
> +               local_bh_enable();
>         }
>  }
>
> --
> 2.25.1
>


-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] Bluetooth: fix inconsistent lock state in sco
@ 2021-07-14 19:12   ` Luiz Augusto von Dentz
  0 siblings, 0 replies; 7+ messages in thread
From: Luiz Augusto von Dentz @ 2021-07-14 19:12 UTC (permalink / raw)
  To: Desmond Cheong Zhi Xi
  Cc: Johan Hedberg, open list:NETWORKING [GENERAL],
	Marcel Holtmann, Linux Kernel Mailing List, linux-bluetooth,
	syzbot+2f6d7c28bb4bf7e82060, Jakub Kicinski,
	linux-kernel-mentees, David Miller, stefan

Hi Desmond,

On Tue, Jul 13, 2021 at 9:29 AM Desmond Cheong Zhi Xi
<desmondcheongzx@gmail.com> wrote:
>
> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
> usage in sco_conn_del and sco_sock_timeout that could lead to
> deadlocks.
>
> This inconsistent lock state can also happen in sco_conn_ready,
> rfcomm_connect_ind, and bt_accept_enqueue.
>
> The issue is that these functions take a spin lock on the socket with
> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
> context. This could lead to deadlocks:
>
>        CPU0
>        ----
>   lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>   <Interrupt>
>     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>
>  *** DEADLOCK ***
>
> We fix this by ensuring that local bh is disabled before calling
> bh_lock_sock.
>
> After doing this, we additionally need to protect sco_conn_lock by
> disabling local bh.
>
> This is necessary because sco_conn_del makes a call to sco_chan_del
> while holding on to the sock lock, and sco_chan_del itself makes a
> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
> interrupts enabled, there could still be a
> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
> follows:
>
>         CPU0                    CPU1
>         ----                    ----
>    lock(&conn->lock#2);
>                                 local_irq_disable();
>                                 lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>                                 lock(&conn->lock#2);
>    <Interrupt>
>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>
>   *** DEADLOCK ***
>
> As sco_conn_del now disables local bh before calling sco_chan_del,
> instead of disabling local bh for the calls to sco_conn_lock in
> sco_chan_del, we instead wrap other calls to sco_chan_del with
> local_bh_disable/enable.
>
> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
> ---
>
> Hi,
>
> The previous version of this patch was a bit of a mess, so I made the
> following changes.
>
> v1 -> v2:
> - Instead of pulling out the clean-up code out from sco_chan_del and
> using it directly in sco_conn_del, disable local irqs for relevant
> sections.
> - Disable local irqs more thoroughly for instances of
> bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
> Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
> with local irqs disabled as well.
>
> Best wishes,
> Desmond
>
>  net/bluetooth/rfcomm/sock.c |  2 ++
>  net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
> index ae6f80730561..d8734abb2df4 100644
> --- a/net/bluetooth/rfcomm/sock.c
> +++ b/net/bluetooth/rfcomm/sock.c
> @@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>         if (!parent)
>                 return 0;
>
> +       local_bh_disable();
>         bh_lock_sock(parent);
>
>         /* Check for backlog size */
> @@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>
>  done:
>         bh_unlock_sock(parent);
> +       local_bh_enable();

Looks like you are touching RFCOMM as well, perhaps you should have it
split, also how about other sockets like L2CAP and HCI are they
affected? There seems to be a lot of problem with the likes of
bh_lock_sock I wonder if going with local_bh_disable is overall a
better way to handle.

>         if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
>                 parent->sk_state_change(parent);
> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
> index 3bd41563f118..2548b8f81473 100644
> --- a/net/bluetooth/sco.c
> +++ b/net/bluetooth/sco.c
> @@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>         BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>
>         /* Kill socket */
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (sk) {
>                 sock_hold(sk);
> +
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sco_sock_clear_timer(sk);
>                 sco_chan_del(sk, err);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
> +
>                 sco_sock_kill(sk);
>                 sock_put(sk);
>         }
> @@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>  {
>         int err = 0;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         if (conn->sk)
>                 err = -EBUSY;
> @@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>                 __sco_chan_add(conn, sk, parent);
>
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>         return err;
>  }
>
> @@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>  {
>         struct sock *sk;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (!sk)
>                 goto drop;
> @@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
>                 if (sco_pi(sk)->conn->hcon) {
>                         sk->sk_state = BT_DISCONN;
>                         sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
> +                       local_bh_disable();
>                         sco_conn_lock(sco_pi(sk)->conn);
>                         hci_conn_drop(sco_pi(sk)->conn->hcon);
>                         sco_pi(sk)->conn->hcon = NULL;
>                         sco_conn_unlock(sco_pi(sk)->conn);
> -               } else
> +                       local_bh_enable();
> +               } else {
> +                       local_bh_disable();
>                         sco_chan_del(sk, ECONNRESET);
> +                       local_bh_enable();
> +               }
>                 break;
>
>         case BT_CONNECT2:
>         case BT_CONNECT:
>         case BT_DISCONN:
> +               local_bh_disable();
>                 sco_chan_del(sk, ECONNRESET);
> +               local_bh_enable();
>                 break;
>
>         default:
> @@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>
>         if (sk) {
>                 sco_sock_clear_timer(sk);
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sk->sk_state = BT_CONNECTED;
>                 sk->sk_state_change(sk);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
>         } else {
> +               local_bh_disable();
>                 sco_conn_lock(conn);
>
>                 if (!conn->hcon) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
>                 parent = sco_get_sock_listen(&conn->hcon->src);
>                 if (!parent) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 if (!sk) {
>                         bh_unlock_sock(parent);
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 bh_unlock_sock(parent);
>
>                 sco_conn_unlock(conn);
> +               local_bh_enable();
>         }
>  }
>
> --
> 2.25.1
>


-- 
Luiz Augusto von Dentz
_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] Bluetooth: fix inconsistent lock state in sco
  2021-07-14 19:12   ` Luiz Augusto von Dentz
@ 2021-07-15  2:21     ` Desmond Cheong Zhi Xi
  -1 siblings, 0 replies; 7+ messages in thread
From: Desmond Cheong Zhi Xi @ 2021-07-15  2:21 UTC (permalink / raw)
  To: Luiz Augusto von Dentz
  Cc: Marcel Holtmann, Johan Hedberg, David Miller, Jakub Kicinski,
	stefan, linux-bluetooth, open list:NETWORKING [GENERAL],
	Linux Kernel Mailing List, skhan, Greg Kroah-Hartman,
	linux-kernel-mentees, syzbot+2f6d7c28bb4bf7e82060

On 15/7/21 3:12 am, Luiz Augusto von Dentz wrote:
> Hi Desmond,
> 
> On Tue, Jul 13, 2021 at 9:29 AM Desmond Cheong Zhi Xi
> <desmondcheongzx@gmail.com> wrote:
>>
>> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
>> usage in sco_conn_del and sco_sock_timeout that could lead to
>> deadlocks.
>>
>> This inconsistent lock state can also happen in sco_conn_ready,
>> rfcomm_connect_ind, and bt_accept_enqueue.
>>
>> The issue is that these functions take a spin lock on the socket with
>> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
>> context. This could lead to deadlocks:
>>
>>         CPU0
>>         ----
>>    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>    <Interrupt>
>>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>
>>   *** DEADLOCK ***
>>
>> We fix this by ensuring that local bh is disabled before calling
>> bh_lock_sock.
>>
>> After doing this, we additionally need to protect sco_conn_lock by
>> disabling local bh.
>>
>> This is necessary because sco_conn_del makes a call to sco_chan_del
>> while holding on to the sock lock, and sco_chan_del itself makes a
>> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
>> interrupts enabled, there could still be a
>> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
>> follows:
>>
>>          CPU0                    CPU1
>>          ----                    ----
>>     lock(&conn->lock#2);
>>                                  local_irq_disable();
>>                                  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>                                  lock(&conn->lock#2);
>>     <Interrupt>
>>       lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>
>>    *** DEADLOCK ***
>>
>> As sco_conn_del now disables local bh before calling sco_chan_del,
>> instead of disabling local bh for the calls to sco_conn_lock in
>> sco_chan_del, we instead wrap other calls to sco_chan_del with
>> local_bh_disable/enable.
>>
>> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
>> ---
>>
>> Hi,
>>
>> The previous version of this patch was a bit of a mess, so I made the
>> following changes.
>>
>> v1 -> v2:
>> - Instead of pulling out the clean-up code out from sco_chan_del and
>> using it directly in sco_conn_del, disable local irqs for relevant
>> sections.
>> - Disable local irqs more thoroughly for instances of
>> bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
>> Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
>> with local irqs disabled as well.
>>
>> Best wishes,
>> Desmond
>>
>>   net/bluetooth/rfcomm/sock.c |  2 ++
>>   net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
>>   2 files changed, 27 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
>> index ae6f80730561..d8734abb2df4 100644
>> --- a/net/bluetooth/rfcomm/sock.c
>> +++ b/net/bluetooth/rfcomm/sock.c
>> @@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>>          if (!parent)
>>                  return 0;
>>
>> +       local_bh_disable();
>>          bh_lock_sock(parent);
>>
>>          /* Check for backlog size */
>> @@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>>
>>   done:
>>          bh_unlock_sock(parent);
>> +       local_bh_enable();
> 
> Looks like you are touching RFCOMM as well, perhaps you should have it
> split, also how about other sockets like L2CAP and HCI are they
> affected? There seems to be a lot of problem with the likes of
> bh_lock_sock I wonder if going with local_bh_disable is overall a
> better way to handle.
> 

Thanks for the feedback, Luiz. I'll separate the SCO and RFCOMM code 
changes.

I believe other sockets should be fine. From what I see, they use 
lock_sock, which acquires the spin lock via spin_lock_bh under the hood. 
So only code that uses bh_lock_sock/bh_lock_sock_nested are affected.

Also I'm not sure what you meant by going with local_bh_disable? I'm 
probably missing context about Bluetooth protocols, but I think the spin 
locks still have their place to protect concurrent accesses and to make 
it clear about what's being protected.

>>          if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
>>                  parent->sk_state_change(parent);
>> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
>> index 3bd41563f118..2548b8f81473 100644
>> --- a/net/bluetooth/sco.c
>> +++ b/net/bluetooth/sco.c
>> @@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>>          BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>>
>>          /* Kill socket */
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (sk) {
>>                  sock_hold(sk);
>> +
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sco_sock_clear_timer(sk);
>>                  sco_chan_del(sk, err);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>> +
>>                  sco_sock_kill(sk);
>>                  sock_put(sk);
>>          }
>> @@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>   {
>>          int err = 0;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          if (conn->sk)
>>                  err = -EBUSY;
>> @@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>                  __sco_chan_add(conn, sk, parent);
>>
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>          return err;
>>   }
>>
>> @@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>>   {
>>          struct sock *sk;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (!sk)
>>                  goto drop;
>> @@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
>>                  if (sco_pi(sk)->conn->hcon) {
>>                          sk->sk_state = BT_DISCONN;
>>                          sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
>> +                       local_bh_disable();
>>                          sco_conn_lock(sco_pi(sk)->conn);
>>                          hci_conn_drop(sco_pi(sk)->conn->hcon);
>>                          sco_pi(sk)->conn->hcon = NULL;
>>                          sco_conn_unlock(sco_pi(sk)->conn);
>> -               } else
>> +                       local_bh_enable();
>> +               } else {
>> +                       local_bh_disable();
>>                          sco_chan_del(sk, ECONNRESET);
>> +                       local_bh_enable();
>> +               }
>>                  break;
>>
>>          case BT_CONNECT2:
>>          case BT_CONNECT:
>>          case BT_DISCONN:
>> +               local_bh_disable();
>>                  sco_chan_del(sk, ECONNRESET);
>> +               local_bh_enable();
>>                  break;
>>
>>          default:
>> @@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>>
>>          if (sk) {
>>                  sco_sock_clear_timer(sk);
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sk->sk_state = BT_CONNECTED;
>>                  sk->sk_state_change(sk);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>>          } else {
>> +               local_bh_disable();
>>                  sco_conn_lock(conn);
>>
>>                  if (!conn->hcon) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>>                  parent = sco_get_sock_listen(&conn->hcon->src);
>>                  if (!parent) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  if (!sk) {
>>                          bh_unlock_sock(parent);
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  bh_unlock_sock(parent);
>>
>>                  sco_conn_unlock(conn);
>> +               local_bh_enable();
>>          }
>>   }
>>
>> --
>> 2.25.1
>>
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] Bluetooth: fix inconsistent lock state in sco
@ 2021-07-15  2:21     ` Desmond Cheong Zhi Xi
  0 siblings, 0 replies; 7+ messages in thread
From: Desmond Cheong Zhi Xi @ 2021-07-15  2:21 UTC (permalink / raw)
  To: Luiz Augusto von Dentz
  Cc: Johan Hedberg, open list:NETWORKING [GENERAL],
	Marcel Holtmann, Linux Kernel Mailing List, linux-bluetooth,
	syzbot+2f6d7c28bb4bf7e82060, Jakub Kicinski,
	linux-kernel-mentees, David Miller, stefan

On 15/7/21 3:12 am, Luiz Augusto von Dentz wrote:
> Hi Desmond,
> 
> On Tue, Jul 13, 2021 at 9:29 AM Desmond Cheong Zhi Xi
> <desmondcheongzx@gmail.com> wrote:
>>
>> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
>> usage in sco_conn_del and sco_sock_timeout that could lead to
>> deadlocks.
>>
>> This inconsistent lock state can also happen in sco_conn_ready,
>> rfcomm_connect_ind, and bt_accept_enqueue.
>>
>> The issue is that these functions take a spin lock on the socket with
>> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
>> context. This could lead to deadlocks:
>>
>>         CPU0
>>         ----
>>    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>    <Interrupt>
>>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>
>>   *** DEADLOCK ***
>>
>> We fix this by ensuring that local bh is disabled before calling
>> bh_lock_sock.
>>
>> After doing this, we additionally need to protect sco_conn_lock by
>> disabling local bh.
>>
>> This is necessary because sco_conn_del makes a call to sco_chan_del
>> while holding on to the sock lock, and sco_chan_del itself makes a
>> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
>> interrupts enabled, there could still be a
>> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
>> follows:
>>
>>          CPU0                    CPU1
>>          ----                    ----
>>     lock(&conn->lock#2);
>>                                  local_irq_disable();
>>                                  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>                                  lock(&conn->lock#2);
>>     <Interrupt>
>>       lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>
>>    *** DEADLOCK ***
>>
>> As sco_conn_del now disables local bh before calling sco_chan_del,
>> instead of disabling local bh for the calls to sco_conn_lock in
>> sco_chan_del, we instead wrap other calls to sco_chan_del with
>> local_bh_disable/enable.
>>
>> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
>> ---
>>
>> Hi,
>>
>> The previous version of this patch was a bit of a mess, so I made the
>> following changes.
>>
>> v1 -> v2:
>> - Instead of pulling out the clean-up code out from sco_chan_del and
>> using it directly in sco_conn_del, disable local irqs for relevant
>> sections.
>> - Disable local irqs more thoroughly for instances of
>> bh_lock_sock/bh_lock_sock_nested in the bluetooth subsystem.
>> Specifically, the calls in af_bluetooth.c and rfcomm/sock.c are now made
>> with local irqs disabled as well.
>>
>> Best wishes,
>> Desmond
>>
>>   net/bluetooth/rfcomm/sock.c |  2 ++
>>   net/bluetooth/sco.c         | 26 +++++++++++++++++++++++++-
>>   2 files changed, 27 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
>> index ae6f80730561..d8734abb2df4 100644
>> --- a/net/bluetooth/rfcomm/sock.c
>> +++ b/net/bluetooth/rfcomm/sock.c
>> @@ -974,6 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>>          if (!parent)
>>                  return 0;
>>
>> +       local_bh_disable();
>>          bh_lock_sock(parent);
>>
>>          /* Check for backlog size */
>> @@ -1002,6 +1003,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
>>
>>   done:
>>          bh_unlock_sock(parent);
>> +       local_bh_enable();
> 
> Looks like you are touching RFCOMM as well, perhaps you should have it
> split, also how about other sockets like L2CAP and HCI are they
> affected? There seems to be a lot of problem with the likes of
> bh_lock_sock I wonder if going with local_bh_disable is overall a
> better way to handle.
> 

Thanks for the feedback, Luiz. I'll separate the SCO and RFCOMM code 
changes.

I believe other sockets should be fine. From what I see, they use 
lock_sock, which acquires the spin lock via spin_lock_bh under the hood. 
So only code that uses bh_lock_sock/bh_lock_sock_nested are affected.

Also I'm not sure what you meant by going with local_bh_disable? I'm 
probably missing context about Bluetooth protocols, but I think the spin 
locks still have their place to protect concurrent accesses and to make 
it clear about what's being protected.

>>          if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
>>                  parent->sk_state_change(parent);
>> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
>> index 3bd41563f118..2548b8f81473 100644
>> --- a/net/bluetooth/sco.c
>> +++ b/net/bluetooth/sco.c
>> @@ -167,16 +167,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>>          BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>>
>>          /* Kill socket */
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (sk) {
>>                  sock_hold(sk);
>> +
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sco_sock_clear_timer(sk);
>>                  sco_chan_del(sk, err);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>> +
>>                  sco_sock_kill(sk);
>>                  sock_put(sk);
>>          }
>> @@ -202,6 +208,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>   {
>>          int err = 0;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          if (conn->sk)
>>                  err = -EBUSY;
>> @@ -209,6 +216,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>                  __sco_chan_add(conn, sk, parent);
>>
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>          return err;
>>   }
>>
>> @@ -303,9 +311,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>>   {
>>          struct sock *sk;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (!sk)
>>                  goto drop;
>> @@ -420,18 +430,25 @@ static void __sco_sock_close(struct sock *sk)
>>                  if (sco_pi(sk)->conn->hcon) {
>>                          sk->sk_state = BT_DISCONN;
>>                          sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
>> +                       local_bh_disable();
>>                          sco_conn_lock(sco_pi(sk)->conn);
>>                          hci_conn_drop(sco_pi(sk)->conn->hcon);
>>                          sco_pi(sk)->conn->hcon = NULL;
>>                          sco_conn_unlock(sco_pi(sk)->conn);
>> -               } else
>> +                       local_bh_enable();
>> +               } else {
>> +                       local_bh_disable();
>>                          sco_chan_del(sk, ECONNRESET);
>> +                       local_bh_enable();
>> +               }
>>                  break;
>>
>>          case BT_CONNECT2:
>>          case BT_CONNECT:
>>          case BT_DISCONN:
>> +               local_bh_disable();
>>                  sco_chan_del(sk, ECONNRESET);
>> +               local_bh_enable();
>>                  break;
>>
>>          default:
>> @@ -1084,21 +1101,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>>
>>          if (sk) {
>>                  sco_sock_clear_timer(sk);
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sk->sk_state = BT_CONNECTED;
>>                  sk->sk_state_change(sk);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>>          } else {
>> +               local_bh_disable();
>>                  sco_conn_lock(conn);
>>
>>                  if (!conn->hcon) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>>                  parent = sco_get_sock_listen(&conn->hcon->src);
>>                  if (!parent) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1109,6 +1131,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  if (!sk) {
>>                          bh_unlock_sock(parent);
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1131,6 +1154,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  bh_unlock_sock(parent);
>>
>>                  sco_conn_unlock(conn);
>> +               local_bh_enable();
>>          }
>>   }
>>
>> --
>> 2.25.1
>>
> 
> 

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-07-15  2:21 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 16:28 [PATCH v2] Bluetooth: fix inconsistent lock state in sco Desmond Cheong Zhi Xi
2021-07-13 16:28 ` Desmond Cheong Zhi Xi
2021-07-13 17:25 ` [v2] " bluez.test.bot
2021-07-14 19:12 ` [PATCH v2] " Luiz Augusto von Dentz
2021-07-14 19:12   ` Luiz Augusto von Dentz
2021-07-15  2:21   ` Desmond Cheong Zhi Xi
2021-07-15  2:21     ` Desmond Cheong Zhi Xi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.