All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ziqi Zhao <astrajoan@yahoo.com>
To: astrajoan@yahoo.com, davem@davemloft.net, edumazet@google.com,
	ivan.orlov0322@gmail.com, kernel@pengutronix.de, kuba@kernel.org,
	linux@rempel-privat.de, linux-can@vger.kernel.org,
	mkl@pengutronix.de, pabeni@redhat.com, robin@protonic.nl,
	skhan@linuxfoundation.org, socketcan@hartkopp.net
Cc: arnd@arndb.de, bridge@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, mudongliangabcd@gmail.com,
	netdev@vger.kernel.org, nikolay@nvidia.com, roopa@nvidia.com,
	syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com,
	syzkaller-bugs@googlegroups.com,
	syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
Subject: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
Date: Fri, 21 Jul 2023 09:22:26 -0700	[thread overview]
Message-ID: <20230721162226.8639-1-astrajoan@yahoo.com> (raw)
In-Reply-To: <20230704064710.3189-1-astrajoan@yahoo.com>

The following 3 locks would race against each other, causing the
deadlock situation in the Syzbot bug report:

- j1939_socks_lock
- active_session_list_lock
- sk_session_queue_lock

A reasonable fix is to change j1939_socks_lock to an rwlock, since in
the rare situations where a write lock is required for the linked list
that j1939_socks_lock is protecting, the code does not attempt to
acquire any more locks. This would break the circular lock dependency,
where, for example, the current thread already locks j1939_socks_lock
and attempts to acquire sk_session_queue_lock, and at the same time,
another thread attempts to acquire j1939_socks_lock while holding
sk_session_queue_lock.

NOTE: This patch along does not fix the unregister_netdevice bug
reported by Syzbot; instead, it solves a deadlock situation to prepare
for one or more further patches to actually fix the Syzbot bug, which
appears to be a reference counting problem within the j1939 codebase.

Reported-by: syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
---
 net/can/j1939/j1939-priv.h |  2 +-
 net/can/j1939/main.c       |  2 +-
 net/can/j1939/socket.c     | 25 +++++++++++++------------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..74f15592d170 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
 	unsigned int tp_max_packet_size;
 
 	/* lock for j1939_socks list */
-	spinlock_t j1939_socks_lock;
+	rwlock_t j1939_socks_lock;
 	struct list_head j1939_socks;
 
 	struct kref rx_kref;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
 		return ERR_PTR(-ENOMEM);
 
 	j1939_tp_init(priv);
-	spin_lock_init(&priv->j1939_socks_lock);
+	rwlock_init(&priv->j1939_socks_lock);
 	INIT_LIST_HEAD(&priv->j1939_socks);
 
 	mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index feaec4ad6d16..a8b981dc2065 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
 	jsk->state |= J1939_SOCK_BOUND;
 	j1939_priv_get(priv);
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	write_lock_bh(&priv->j1939_socks_lock);
 	list_add_tail(&jsk->list, &priv->j1939_socks);
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	write_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
 {
-	spin_lock_bh(&priv->j1939_socks_lock);
+	write_lock_bh(&priv->j1939_socks_lock);
 	list_del_init(&jsk->list);
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	write_unlock_bh(&priv->j1939_socks_lock);
 
 	j1939_priv_put(priv);
 	jsk->state &= ~J1939_SOCK_BOUND;
@@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
 	struct j1939_sock *jsk;
 	bool match = false;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		match = j1939_sk_recv_match_one(jsk, skcb);
 		if (match)
 			break;
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 
 	return match;
 }
@@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
 {
 	struct j1939_sock *jsk;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		j1939_sk_recv_one(jsk, skb);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static void j1939_sk_sock_destruct(struct sock *sk)
@@ -484,6 +484,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
 		priv = j1939_netdev_start(ndev);
 		dev_put(ndev);
+
 		if (IS_ERR(priv)) {
 			ret = PTR_ERR(priv);
 			goto out_release_sock;
@@ -1078,12 +1079,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
 	}
 
 	/* spread RX notifications to all sockets subscribed to this session */
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		if (j1939_sk_recv_match_one(jsk, &session->skcb))
 			__j1939_sk_errqueue(session, &jsk->sk, type);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 };
 
 void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1271,7 +1272,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
 	struct j1939_sock *jsk;
 	int error_code = ENETDOWN;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		jsk->sk.sk_err = error_code;
 		if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1279,7 +1280,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
 
 		j1939_sk_queue_drop_all(priv, jsk, error_code);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
-- 
2.34.1


WARNING: multiple messages have this Message-ID (diff)
From: Ziqi Zhao <astrajoan@yahoo.com>
To: astrajoan@yahoo.com, davem@davemloft.net, edumazet@google.com,
	ivan.orlov0322@gmail.com, kernel@pengutronix.de, kuba@kernel.org,
	linux@rempel-privat.de, linux-can@vger.kernel.org,
	mkl@pengutronix.de, pabeni@redhat.com, robin@protonic.nl,
	skhan@linuxfoundation.org, socketcan@hartkopp.net
Cc: arnd@arndb.de, netdev@vger.kernel.org,
	bridge@lists.linux-foundation.org,
	syzkaller-bugs@googlegroups.com, linux-kernel@vger.kernel.org,
	mudongliangabcd@gmail.com, nikolay@nvidia.com,
	syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com,
	roopa@nvidia.com,
	syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com
Subject: [Bridge] [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
Date: Fri, 21 Jul 2023 09:22:26 -0700	[thread overview]
Message-ID: <20230721162226.8639-1-astrajoan@yahoo.com> (raw)
In-Reply-To: <20230704064710.3189-1-astrajoan@yahoo.com>

The following 3 locks would race against each other, causing the
deadlock situation in the Syzbot bug report:

- j1939_socks_lock
- active_session_list_lock
- sk_session_queue_lock

A reasonable fix is to change j1939_socks_lock to an rwlock, since in
the rare situations where a write lock is required for the linked list
that j1939_socks_lock is protecting, the code does not attempt to
acquire any more locks. This would break the circular lock dependency,
where, for example, the current thread already locks j1939_socks_lock
and attempts to acquire sk_session_queue_lock, and at the same time,
another thread attempts to acquire j1939_socks_lock while holding
sk_session_queue_lock.

NOTE: This patch along does not fix the unregister_netdevice bug
reported by Syzbot; instead, it solves a deadlock situation to prepare
for one or more further patches to actually fix the Syzbot bug, which
appears to be a reference counting problem within the j1939 codebase.

Reported-by: syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
---
 net/can/j1939/j1939-priv.h |  2 +-
 net/can/j1939/main.c       |  2 +-
 net/can/j1939/socket.c     | 25 +++++++++++++------------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..74f15592d170 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
 	unsigned int tp_max_packet_size;
 
 	/* lock for j1939_socks list */
-	spinlock_t j1939_socks_lock;
+	rwlock_t j1939_socks_lock;
 	struct list_head j1939_socks;
 
 	struct kref rx_kref;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
 		return ERR_PTR(-ENOMEM);
 
 	j1939_tp_init(priv);
-	spin_lock_init(&priv->j1939_socks_lock);
+	rwlock_init(&priv->j1939_socks_lock);
 	INIT_LIST_HEAD(&priv->j1939_socks);
 
 	mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index feaec4ad6d16..a8b981dc2065 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
 	jsk->state |= J1939_SOCK_BOUND;
 	j1939_priv_get(priv);
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	write_lock_bh(&priv->j1939_socks_lock);
 	list_add_tail(&jsk->list, &priv->j1939_socks);
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	write_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
 {
-	spin_lock_bh(&priv->j1939_socks_lock);
+	write_lock_bh(&priv->j1939_socks_lock);
 	list_del_init(&jsk->list);
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	write_unlock_bh(&priv->j1939_socks_lock);
 
 	j1939_priv_put(priv);
 	jsk->state &= ~J1939_SOCK_BOUND;
@@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
 	struct j1939_sock *jsk;
 	bool match = false;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		match = j1939_sk_recv_match_one(jsk, skcb);
 		if (match)
 			break;
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 
 	return match;
 }
@@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
 {
 	struct j1939_sock *jsk;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		j1939_sk_recv_one(jsk, skb);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static void j1939_sk_sock_destruct(struct sock *sk)
@@ -484,6 +484,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
 		priv = j1939_netdev_start(ndev);
 		dev_put(ndev);
+
 		if (IS_ERR(priv)) {
 			ret = PTR_ERR(priv);
 			goto out_release_sock;
@@ -1078,12 +1079,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
 	}
 
 	/* spread RX notifications to all sockets subscribed to this session */
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		if (j1939_sk_recv_match_one(jsk, &session->skcb))
 			__j1939_sk_errqueue(session, &jsk->sk, type);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 };
 
 void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1271,7 +1272,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
 	struct j1939_sock *jsk;
 	int error_code = ENETDOWN;
 
-	spin_lock_bh(&priv->j1939_socks_lock);
+	read_lock_bh(&priv->j1939_socks_lock);
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		jsk->sk.sk_err = error_code;
 		if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1279,7 +1280,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
 
 		j1939_sk_queue_drop_all(priv, jsk, error_code);
 	}
-	spin_unlock_bh(&priv->j1939_socks_lock);
+	read_unlock_bh(&priv->j1939_socks_lock);
 }
 
 static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
-- 
2.34.1


  parent reply	other threads:[~2023-07-21 16:44 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-04  6:19 [syzbot] [can?] possible deadlock in j1939_sk_errqueue (2) syzbot
2023-07-04  6:47 ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Ziqi Zhao
2023-07-04  6:47   ` syzbot
2023-07-04  7:37     ` Oleksij Rempel
2023-07-21 16:22   ` Ziqi Zhao [this message]
2023-07-21 16:22     ` [Bridge] " Ziqi Zhao
2023-07-23 15:41     ` Oleksij Rempel
2023-07-23 15:41       ` [Bridge] " Oleksij Rempel
2023-08-07  4:46     ` Oleksij Rempel
2023-08-07  4:46       ` [Bridge] " Oleksij Rempel
2023-11-17  8:10       ` Oleksij Rempel
2023-07-10 17:53 ` [syzbot] [can?] possible deadlock in j1939_sk_errqueue (2) syzbot
2023-07-12  0:47   ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Ziqi Zhao
2023-07-12  1:16     ` [syzbot] [can?] possible deadlock in j1939_sk_errqueue (2) syzbot
2023-07-13 22:23     ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Stephen Hemminger
2023-11-15  3:54 ` [syzbot] [can?] possible deadlock in j1939_sk_errqueue (2) syzbot
     [not found] <C29A816B-6E28-4A36-AE59-F446180C910B.ref@yahoo.com>
2023-07-14  5:58 ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Astra Joan
     [not found] <8A7EE4B2-4DCE-40FF-B971-F67F402872BB.ref@yahoo.com>
2023-07-07  4:39 ` Astra Joan
     [not found] <F17EC83C-9D70-463A-9C46-FBCC53A1F13C.ref@yahoo.com>
2023-07-04 17:55 ` Astra Joan
2023-07-05  4:39   ` Oleksij Rempel
2023-07-05  4:50     ` Dmitry Vyukov
  -- strict thread matches above, loose matches on Subject: below --
2023-06-21  8:46 [syzbot] [net?] unregister_netdevice: waiting for DEV to become free (8) Dongliang Mu
2023-06-26  5:50 ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Ziqi Zhao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230721162226.8639-1-astrajoan@yahoo.com \
    --to=astrajoan@yahoo.com \
    --cc=arnd@arndb.de \
    --cc=bridge@lists.linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=ivan.orlov0322@gmail.com \
    --cc=kernel@pengutronix.de \
    --cc=kuba@kernel.org \
    --cc=linux-can@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rempel-privat.de \
    --cc=mkl@pengutronix.de \
    --cc=mudongliangabcd@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=nikolay@nvidia.com \
    --cc=pabeni@redhat.com \
    --cc=robin@protonic.nl \
    --cc=roopa@nvidia.com \
    --cc=skhan@linuxfoundation.org \
    --cc=socketcan@hartkopp.net \
    --cc=syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com \
    --cc=syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.