All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
@ 2010-10-22  2:26 Haijun Liu
  2010-10-22  2:26 ` [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect Haijun Liu
  2010-10-22 17:18 ` [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Gustavo F. Padovan
  0 siblings, 2 replies; 16+ messages in thread
From: Haijun Liu @ 2010-10-22  2:26 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: padovan, Haijun Liu

During test session with another vendor's bt stack, found that in
l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
be called after the sock was freed, so it raised a system crash.
So I just replaced del_timer() with del_timer_sync() to solve it.

Signed-off-by: Haijun Liu <haijun.liu@atheros.com>
---
 net/bluetooth/l2cap.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6f931cc..879f386 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -283,9 +283,9 @@ static void l2cap_chan_del(struct sock *sk, int err)
 	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
 		struct srej_list *l, *tmp;
 
-		del_timer(&l2cap_pi(sk)->retrans_timer);
-		del_timer(&l2cap_pi(sk)->monitor_timer);
-		del_timer(&l2cap_pi(sk)->ack_timer);
+		del_timer_sync(&l2cap_pi(sk)->retrans_timer);
+		del_timer_sync(&l2cap_pi(sk)->monitor_timer);
+		del_timer_sync(&l2cap_pi(sk)->ack_timer);
 
 		skb_queue_purge(SREJ_QUEUE(sk));
 		skb_queue_purge(BUSY_QUEUE(sk));
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect
  2010-10-22  2:26 [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Haijun Liu
@ 2010-10-22  2:26 ` Haijun Liu
  2010-10-22 17:34   ` Gustavo F. Padovan
  2010-10-22 17:18 ` [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Gustavo F. Padovan
  1 sibling, 1 reply; 16+ messages in thread
From: Haijun Liu @ 2010-10-22  2:26 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: padovan, Haijun Liu

During test session with another vendor's bt stack, found that
without lock protect for TX_QUEUE(sk) will cause system crash while
data transfer over AMP controller. So I just add lock protect for
TX_QUEUE(sk).

Signed-off-by: Haijun Liu <haijun.liu@atheros.com>
---
 include/net/bluetooth/l2cap.h |    1 +
 net/bluetooth/l2cap.c         |   25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index c819c8b..276c7ea 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -304,6 +304,7 @@ struct sock_del_list {
 #define SREJ_QUEUE(sk) (&l2cap_pi(sk)->srej_queue)
 #define BUSY_QUEUE(sk) (&l2cap_pi(sk)->busy_queue)
 #define SREJ_LIST(sk) (&l2cap_pi(sk)->srej_l.list)
+#define TX_QUEUE_LOCK(sk) (&l2cap_pi(sk)->tx_queue.lock)
 
 struct srej_list {
 	__u8	tx_seq;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 879f386..153614f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1432,16 +1432,18 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 {
 	struct sk_buff *skb;
 
+	spin_lock_bh(TX_QUEUE_LOCK(sk));
 	while ((skb = skb_peek(TX_QUEUE(sk))) &&
 			l2cap_pi(sk)->unacked_frames) {
 		if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
 			break;
 
-		skb = skb_dequeue(TX_QUEUE(sk));
+		skb = __skb_dequeue(TX_QUEUE(sk));
 		kfree_skb(skb);
 
 		l2cap_pi(sk)->unacked_frames--;
 	}
+	spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 	if (!l2cap_pi(sk)->unacked_frames)
 		del_timer(&l2cap_pi(sk)->retrans_timer);
@@ -1484,18 +1486,24 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	struct sk_buff *skb, *tx_skb;
 	u16 control, fcs;
 
+	spin_lock_bh(TX_QUEUE_LOCK(sk));
 	skb = skb_peek(TX_QUEUE(sk));
-	if (!skb)
+	if (!skb) {
+		spin_unlock_bh(TX_QUEUE_LOCK(sk));
 		return;
+	}
 
 	do {
 		if (bt_cb(skb)->tx_seq == tx_seq)
 			break;
 
-		if (skb_queue_is_last(TX_QUEUE(sk), skb))
+		if (skb_queue_is_last(TX_QUEUE(sk), skb)) {
+			spin_unlock_bh(TX_QUEUE_LOCK(sk));
 			return;
+		}
 
 	} while ((skb = skb_queue_next(TX_QUEUE(sk), skb)));
+	spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 	if (pi->remote_max_tx &&
 			bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1535,12 +1543,14 @@ static int l2cap_ertm_send(struct sock *sk)
 	if (sk->sk_state != BT_CONNECTED)
 		return -ENOTCONN;
 
+	spin_lock_bh(TX_QUEUE_LOCK(sk));
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
+			spin_unlock_bh(TX_QUEUE_LOCK(sk));
 			l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
-			break;
+			return nsent;
 		}
 
 		tx_skb = skb_clone(skb, GFP_ATOMIC);
@@ -1581,6 +1591,7 @@ static int l2cap_ertm_send(struct sock *sk)
 
 		nsent++;
 	}
+	spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 	return nsent;
 }
@@ -1590,8 +1601,10 @@ static int l2cap_retransmit_frames(struct sock *sk)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	int ret;
 
+	spin_lock_bh(TX_QUEUE_LOCK(sk));
 	if (!skb_queue_empty(TX_QUEUE(sk)))
 		sk->sk_send_head = TX_QUEUE(sk)->next;
+	spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 	pi->next_tx_seq = pi->expected_ack_seq;
 	ret = l2cap_ertm_send(sk);
@@ -1806,9 +1819,11 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		len -= buflen;
 		size += buflen;
 	}
+	spin_lock_bh(TX_QUEUE_LOCK(sk));
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
+	spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 	return size;
 }
@@ -1878,10 +1893,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				err = PTR_ERR(skb);
 				goto done;
 			}
+			spin_lock_bh(TX_QUEUE_LOCK(sk));
 			__skb_queue_tail(TX_QUEUE(sk), skb);
 
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
+			spin_unlock_bh(TX_QUEUE_LOCK(sk));
 
 		} else {
 		/* Segment SDU into multiples PDUs */
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-22  2:26 [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Haijun Liu
  2010-10-22  2:26 ` [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect Haijun Liu
@ 2010-10-22 17:18 ` Gustavo F. Padovan
  2010-10-25  1:35   ` haijun liu
  1 sibling, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-10-22 17:18 UTC (permalink / raw)
  To: Haijun Liu; +Cc: linux-bluetooth

* Haijun Liu <haijun.liu@atheros.com> [2010-10-22 10:26:58 +0800]:

> During test session with another vendor's bt stack, found that in
> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
> be called after the sock was freed, so it raised a system crash.
> So I just replaced del_timer() with del_timer_sync() to solve it.

NAK on this. If you read the del_timer_sync() documentation you can
see that you can't call del_timer_sync() on interrupt context. The
possible solution here is to check in the beginning of
l2cap_monitor_timeout() if your sock is still valid.

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect
  2010-10-22  2:26 ` [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect Haijun Liu
@ 2010-10-22 17:34   ` Gustavo F. Padovan
  2010-10-25  2:15     ` haijun liu
  0 siblings, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-10-22 17:34 UTC (permalink / raw)
  To: Haijun Liu; +Cc: linux-bluetooth

* Haijun Liu <haijun.liu@atheros.com> [2010-10-22 10:26:59 +0800]:

> During test session with another vendor's bt stack, found that
> without lock protect for TX_QUEUE(sk) will cause system crash while
> data transfer over AMP controller. So I just add lock protect for
> TX_QUEUE(sk).

We already use the default socket lock protection. Is it not working for
you? Why? Could you show a crash case that requires your patch to fix
it?

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-22 17:18 ` [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Gustavo F. Padovan
@ 2010-10-25  1:35   ` haijun liu
  2010-10-25  2:21     ` haijun liu
  2010-10-25 11:01     ` Gustavo F. Padovan
  0 siblings, 2 replies; 16+ messages in thread
From: haijun liu @ 2010-10-25  1:35 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Hi Gustavo,

>> During test session with another vendor's bt stack, found that in
>> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
>> be called after the sock was freed, so it raised a system crash.
>> So I just replaced del_timer() with del_timer_sync() to solve it.
>
> NAK on this. If you read the del_timer_sync() documentation you can
> see that you can't call del_timer_sync() on interrupt context. The
> possible solution here is to check in the beginning of
> l2cap_monitor_timeout() if your sock is still valid.
>

You are right, I only considered close() interface, so missed the interrupt
context.

It's very difficult to check sock valid or not in timeout procedure, since it's
an interrupt context, and only can get context from parameter pre-stored,
except global variables.

Let's think about it and come up a good solution for this situation.

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect
  2010-10-22 17:34   ` Gustavo F. Padovan
@ 2010-10-25  2:15     ` haijun liu
  2010-10-25 11:09       ` Gustavo F. Padovan
  0 siblings, 1 reply; 16+ messages in thread
From: haijun liu @ 2010-10-25  2:15 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Hi Gustavo,

>> During test session with another vendor's bt stack, found that
>> without lock protect for TX_QUEUE(sk) will cause system crash while
>> data transfer over AMP controller. So I just add lock protect for
>> TX_QUEUE(sk).
>
> We already use the default socket lock protection. Is it not working for
> you? Why? Could you show a crash case that requires your patch to fix
> it?
>

Yes,  there is socket lock protection, but only for sk_buff, for the related
variable we need protect them as well, such as 'sk->sk_send_head',
because later in different context we will use it as sk_buff directly, but at
that time maybe it has been freed and that buffer be occupied by another
sk_buff.

Below is the crash case we met:

[  265.544145] l2cap_sock_sendmsg: sock e7f4e380, sk e015fc00, msg
e01f5ea4, len 1668
[  265.544149] l2cap_sock_sendmsg: sk->scid 42, sk->dcid 5d, sk->mode 3
[  265.544157] block_sendmsg_condition:
[  265.544160] l2cap_tx_window_full:
[  265.544163] block_sendmsg_condition: tx_window full: 0, or
wait_f/remote busy.
[  265.544168] l2cap_sar_segment_sdu: sk e015fc00 len 5736
[  265.544172] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
4000  sdulen 5736
[  265.544175] l2cap_loglink_validate:
[  265.544179] l2cap_skbuff_fromiovec:
[  265.544183] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
c000  sdulen 0
[  265.544187] l2cap_loglink_validate:
[  265.544191] l2cap_skbuff_fromiovec:
[  265.544195] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
c000  sdulen 0
[  265.544200] l2cap_loglink_validate:
[  265.544203] l2cap_skbuff_fromiovec:
[  265.544207] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
c000  sdulen 0
[  265.544211] l2cap_loglink_validate:
[  265.544214] l2cap_skbuff_fromiovec:
[  265.544218] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
c000  sdulen 0
[  265.544221] l2cap_loglink_validate:
[  265.544225] l2cap_skbuff_fromiovec:
[  265.544229] l2cap_create_iframe_pdu: sk e015fc00 len 681 control
8000  sdulen 0
[  265.544252] l2cap_loglink_validate:
[  265.544255] l2cap_skbuff_fromiovec:
[  265.544483] l2cap_recv_acldata:
[  265.544488] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.544492] l2cap_recv_frame: conn f461bcc0, skb ee91ccc0, cid 42, len 4
[  265.544496] l2cap_recv_frame: len 4, cid 0x0042
[  265.544498] l2cap_data_channel:
[  265.544501] l2cap_get_chan_by_scid:
[  265.544504] __l2cap_get_chan_by_scid:
[  265.544508] l2cap_data_channel: sk e015fc00, len 4
[  265.544511] l2cap_ertm_data_rcv:
[  265.544514] l2cap_check_fcs:
[  265.544517] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2209 len 0
[  265.544521] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 34 ctrl 0x2209
[  265.544525] l2cap_drop_acked_frames:
[  265.544636] l2cap_recv_acldata:
[  265.544641] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.544645] l2cap_recv_frame: conn f461bcc0, skb ee91c6c0, cid 42, len 4
[  265.544649] l2cap_recv_frame: len 4, cid 0x0042
[  265.544652] l2cap_data_channel:
[  265.544655] l2cap_get_chan_by_scid:
[  265.544657] __l2cap_get_chan_by_scid:
[  265.570492] l2cap_recv_acldata:
[  265.570503] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.570507] l2cap_recv_frame: conn f461bcc0, skb ee91c0c0, cid 42, len 4
[  265.570513] l2cap_recv_frame: len 4, cid 0x0042
[  265.570517] l2cap_data_channel:
[  265.570520] l2cap_get_chan_by_scid:
[  265.570524] __l2cap_get_chan_by_scid:
[  265.570529] l2cap_data_channel: sk e015fc00, len 4
[  265.570533] l2cap_ertm_data_rcv:
[  265.570536] l2cap_check_fcs:
[  265.570542] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2709 len 0
[  265.570547] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 39 ctrl 0x2709
[  265.570550] l2cap_drop_acked_frames:
[  265.570658] l2cap_recv_acldata:
[  265.570663] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.570668] l2cap_recv_frame: conn f461bcc0, skb ee91ca80, cid 42, len 4
[  265.570673] l2cap_recv_frame: len 4, cid 0x0042
[  265.570677] l2cap_data_channel:
[  265.570680] l2cap_get_chan_by_scid:
[  265.570683] __l2cap_get_chan_by_scid:
[  265.570687] l2cap_data_channel: sk e015fc00, len 4
[  265.570691] l2cap_ertm_data_rcv:
[  265.570694] l2cap_check_fcs:
[  265.570698] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2809 len 0
[  265.570702] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 40 ctrl 0x2809
[  265.570706] l2cap_drop_acked_frames:
[  265.570858] l2cap_recv_acldata:
[  265.572903] l2cap_recv_acldata:
[  265.572910] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.572915] l2cap_recv_frame: conn f461bcc0, skb f469fa80, cid 42, len 4
[  265.572919] l2cap_recv_frame: len 4, cid 0x0042
[  265.572921] l2cap_data_channel:
[  265.572925] l2cap_get_chan_by_scid:
[  265.572928] __l2cap_get_chan_by_scid:
[  265.572933] l2cap_data_channel: sk e015fc00, len 4
[  265.572936] l2cap_ertm_data_rcv:
[  265.572938] l2cap_check_fcs:
[  265.572943] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2b09 len 0
[  265.573348] l2cap_recv_acldata:

[  265.609993] l2cap_recv_acldata:
[  265.610005] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.610009] l2cap_recv_frame: conn f461bcc0, skb ee91c540, cid 42, len 4
[  265.610013] l2cap_recv_frame: len 4, cid 0x0042
[  265.610016] l2cap_data_channel:
[  265.610019] l2cap_get_chan_by_scid:
[  265.610022] __l2cap_get_chan_by_scid:
[  265.610025] l2cap_data_channel: sk e015fc00, len 4
[  265.610029] l2cap_ertm_data_rcv:
[  265.610032] l2cap_check_fcs:
[  265.610036] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3801 len 0
[  265.610041] l2cap_data_channel_rrframe: sk e015fc00, req_seq 56 ctrl 0x3801
[  265.610044] l2cap_drop_acked_frames:
[  265.610060] l2cap_ertm_send: sk e015fc00, sk->scid 42, sk->dcid 5d
[  265.610064] l2cap_tx_window_full:
[  265.610071] l2cap_ertm_send: pi->next_tx_seq: 13, pi->buffer_seq: 2
[  265.610075] l2cap_do_send: sk e015fc00, cid 66 skb e0147840 len 1019
[  265.610078] l2cap_loglink_validate:
[  265.610081] l2cap_do_send: send I frame over AMP controller
[  265.610085] l2cap_tx_window_full:
[  265.610093] l2cap_ertm_send: pi->next_tx_seq: 14, pi->buffer_seq: 2
[  265.610096] l2cap_do_send: sk e015fc00, cid 66 skb f4801cc0 len 1019
[  265.610099] l2cap_loglink_validate:
[  265.610102] l2cap_do_send: send I frame over AMP controller
[  265.610105] l2cap_tx_window_full:
[  265.610112] l2cap_ertm_send: pi->next_tx_seq: 15, pi->buffer_seq: 2
[  265.610115] l2cap_do_send: sk e015fc00, cid 66 skb f4801600 len 1019
[  265.610118] l2cap_loglink_validate:
[  265.610121] l2cap_do_send: send I frame over AMP controller
[  265.610124] l2cap_tx_window_full:
[  265.610130] l2cap_ertm_send: pi->next_tx_seq: 16, pi->buffer_seq: 2
[  265.610133] l2cap_do_send: sk e015fc00, cid 66 skb f4801c00 len 689
[  265.610137] l2cap_loglink_validate:
[  265.610140] l2cap_do_send: send I frame over AMP controller
[  265.610143] l2cap_tx_window_full:
[  265.610153] l2cap_ertm_send: pi->next_tx_seq: 17, pi->buffer_seq: 2
[  265.610215] l2cap_ertm_send: pi->next_tx_seq: 20, pi->buffer_seq: 2
[  265.610219] l2cap_do_send: sk e015fc00, cid 66 skb f47f03c0 len 1019
[  265.610222] l2cap_loglink_validate:
[  265.619937] l2cap_recv_acldata:
[  265.619948] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.619952] l2cap_recv_frame: conn f461bcc0, skb ee91c300, cid 42, len 4
[  265.619956] l2cap_recv_frame: len 4, cid 0x0042
[  265.620154] l2cap_ertm_send: pi->next_tx_seq: 29, pi->buffer_seq: 2
[  265.629111] l2cap_recv_acldata:
[  265.629123] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3

[  265.639371] l2cap_recv_acldata:
[  265.639384] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.639388] l2cap_recv_frame: conn f461bcc0, skb ee91ccc0, cid 42, len 4
[  265.639392] l2cap_recv_frame: len 4, cid 0x0042
[  265.639395] l2cap_data_channel:
[  265.639398] l2cap_get_chan_by_scid:
[  265.639401] __l2cap_get_chan_by_scid:
[  265.639405] l2cap_data_channel: sk e015fc00, len 4
[  265.639407] l2cap_ertm_data_rcv:
[  265.639570] l2cap_do_send: send I frame over AMP controller
[  265.646669] l2cap_recv_acldata:
[  265.646681] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.646685] l2cap_recv_frame: conn f461bcc0, skb ee91c6c0, cid 42, len 4
[  265.646822] l2cap_loglink_validate:
[  265.646825] l2cap_skbuff_fromiovec:
[  265.647800] l2cap_recv_acldata:
[  265.647808] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.649645] l2cap_recv_acldata:
[  265.649655] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.649659] l2cap_recv_frame: conn f461bcc0, skb ee91c180, cid 42, len 4
[  265.649663] l2cap_recv_frame: len 4, cid 0x0042
[  265.651518] l2cap_recv_acldata:
[  265.651527] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.651532] l2cap_recv_frame: conn f461bcc0, skb ee91c0c0, cid 42, len 4
[  265.655539] l2cap_recv_acldata:
[  265.655547] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.655550] l2cap_recv_frame: conn f461bcc0, skb e035bc00, cid 42, len 4
[  265.655554] l2cap_recv_frame: len 4, cid 0x0042
[  265.655556] l2cap_data_channel:
[  265.655559] l2cap_get_chan_by_scid:
[  265.655562] __l2cap_get_chan_by_scid:
[  265.663270] l2cap_recv_acldata:
[  265.663276] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.667987] l2cap_recv_acldata:
[  265.673206] l2cap_recv_acldata:
[  265.673217] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.673221] l2cap_recv_frame: conn f461bcc0, skb ee91c780, cid 42, len 4
[  265.673225] l2cap_recv_frame: len 4, cid 0x0042
[  265.673227] l2cap_data_channel:
[  265.673230] l2cap_get_chan_by_scid:
[  265.673233] __l2cap_get_chan_by_scid:
[  265.673236] l2cap_data_channel: sk e015fc00, len 4
[  265.673240] l2cap_ertm_data_rcv:
[  265.673243] l2cap_check_fcs:
[  265.673247] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3109 len 0
[  265.675265] l2cap_recv_acldata:
[  265.675273] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.691337] l2cap_recv_acldata:
[  265.691348] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
[  265.691352] l2cap_recv_frame: conn f461bcc0, skb ee91c000, cid 42, len 4
[  265.691356] l2cap_recv_frame: len 4, cid 0x0042
[  265.691359] l2cap_data_channel:
[  265.691362] l2cap_get_chan_by_scid:
[  265.691366] __l2cap_get_chan_by_scid:
[  265.691369] l2cap_data_channel: sk e015fc00, len 4
[  265.691372] l2cap_ertm_data_rcv:
[  265.691375] l2cap_check_fcs:
[  265.691379] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3511 len 0
[  265.691383] l2cap_data_channel_rrframe: sk e015fc00, req_seq 53 ctrl 0x3511
[  265.691386] l2cap_drop_acked_frames:
[  265.691389] l2cap_send_i_or_rr_or_rnr:
[  265.691392] l2cap_ertm_send: sk e015fc00, sk->scid 42, sk->dcid 5d
[  265.691396] l2cap_tx_window_full:
[  265.691400] l2cap_ertm_send: pi->next_tx_seq: 53, pi->buffer_seq: 2
[  265.691404] l2cap_do_send: sk e015fc00, cid 66 skb e0204000 len 101
[  265.691407] l2cap_loglink_validate:
[  265.691410] l2cap_do_send: send I frame over AMP controller
[  265.691415] skb_under_panic: text:f8224a57 len:105 put:4
head:e002ba00 data:e002b9fe tail:0xe002ba2c end:0xe002ba80 dev:瘗"?
[  265.691443] ------------[ cut here ]------------
[  265.691446] kernel BUG at net/core/skbuff.c:147!
[  265.691450] invalid opcode: 0000 [#1] SMP
[  265.691456] last sysfs file:
/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:00/PNP0C09:00/PNP0C0A:00/power_supply/BAT0/energy_full
[  265.691460] Modules linked in:[  265.692743]  [<f822573e>]
hci_rx_task+0x2de/0x380 [bluetooth]
[  265.692748]  [<c0132921>] ? __dequeue_entity+0x21/0x40
[  265.696007] NOHZ: local_softirq_pending 0e

>From above context, we know we expect a skb with length 1019(1011+8),
but dump context tell us, the len is 105.

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-25  1:35   ` haijun liu
@ 2010-10-25  2:21     ` haijun liu
  2010-10-25 11:01     ` Gustavo F. Padovan
  1 sibling, 0 replies; 16+ messages in thread
From: haijun liu @ 2010-10-25  2:21 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Here is a dump context:

[ 2544.321834] l2cap_do_send: sk e0325800, cid 3 skb f4839840 len 50
[ 2546.320108] l2cap_ack_timeout:
[ 2546.320122] l2cap_send_ack:
[ 2546.320129] l2cap_ertm_send: sk e0325800, sk->scid 3, sk->dcid 3
[ 2546.320138] l2cap_send_sframe:
[ 2546.320144] l2cap_send_sframe: pi e0325800, control 0x300
[ 2546.320152] l2cap_retrans_timeout: sk e0325800
[ 2546.320157] l2cap_send_rr_or_rnr:
[ 2546.320162] l2cap_send_sframe:
[ 2546.320166] l2cap_send_sframe: pi e0325800, control 0x310
[ 2548.204103] l2cap_disconn_ind: hcon f0443e00
[ 2548.273408] l2cap_disconn_cfm: hcon f0443e00 reason 22
[ 2548.273415] l2cap_conn_del:
[ 2548.273421] l2cap_conn_del: hcon f0443e00 conn f4839b40, err 103
[ 2548.273428] l2cap_free_sock_a2mp_internal: conn f4839b40	a2mp_sock e0325800
[ 2548.273438] l2cap_sock_close: sk e0325800, conn f4839b40
[ 2548.273444] l2cap_sock_clear_timer: sock e0325800 state 1
[ 2548.273450] l2cap_sock_clear_extimer: sock e0325800 state 1
[ 2548.273456] l2cap_sock_close: sk e0325800, conn f4839b40	a2mp_sock e0325800
[ 2548.273462] amp_a2mp_channel_exit: l2cap_conn f4839b40
[ 2548.273468] amp_a2mp_conn_unlink:
[ 2548.273473] amp_a2mp_channel_exit: exit
[ 2558.320031] l2cap_monitor_timeout: sk e0325800
[ 2558.320045] l2cap_send_disconn_req:
[ 2558.320051] l2cap_get_ident:
[ 2558.352291] BUG: unable to handle kernel NULL pointer dereference at 00000072
[ 2558.352325] IP: [<c0223b19>] dnotify_flush+0x19/0x100
[ 2558.352344] *pde = 00000000
[ 2558.352354] Oops: 0000 [#1] SMP
[ 2558.352364] last sysfs file:
/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:00/PNP0C09:00/PNP0C0A:00/power_supply/BAT0/voltage_now
[ 2558.352376] Modules linked in: netconsole ar6000 binfmt_misc rfcomm
sco bridge stp ppdev bnep sha256_generic l2cap arc4
snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep
snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss
snd_seq_midi joydev snd_rawmidi pcmcia iwlagn snd_seq_midi_event
snd_seq mmc_block yenta_socket iwlcore rsrc_nonstatic btusb sdhci_pci
snd_timer pcmcia_core sdhci snd_seq_device thinkpad_acpi tpm_tis
led_class tpm snd mac80211 psmouse bluetooth tpm_bios uvcvideo
soundcore snd_page_alloc videodev v4l1_compat nvram cfg80211 configfs
serio_raw iptable_filter lp ip_tables x_tables parport i915 fbcon
tileblit font bitblit softcursor radeon ttm drm_kms_helper drm usbhid
ohci1394 ieee1394 intel_agp e1000e i2c_algo_bit agpgart video output
[ 2558.352675]
[ 2558.352683] Pid: 1161, comm: Xorg Not tainted 2.6.34-rc7-300 #1
278225C/278225C
[ 2558.352691] EIP: 0060:[<c0223b19>] EFLAGS: 00013282 CPU: 1
[ 2558.352697] EIP is at dnotify_flush+0x19/0x100
[ 2558.352703] EAX: cccccccc EBX: eaf51b00 ECX: 00000000 EDX: eaf51b00
[ 2558.352712] ESI: e032e600 EDI: 00000000 EBP: f487df7c ESP: f487df68
[ 2558.352717]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[ 2558.352727] Process Xorg (pid: 1161, ti=f487c000 task=f671bfc0
task.ti=f487c000)
[ 2558.352732] Stack:
[ 2558.352737]  f487dfac c047a6f0 e032e600 eaf51b00 00000000 f487df94
c01f4027 fffffff7
[ 2558.352761] <0> eaf51b00 e032e600 00000012 f487dfac c01f40d3
eaf51b40 00000012 0c9ff878
[ 2558.352795] <0> 0c91c8c0 f487c000 c0102fa3 00000012 ffffffc8
081e5ff4 0c9ff878 0c91c8c0
[ 2558.352824] Call Trace:
[ 2558.352840]  [<c047a6f0>] ? sys_socketcall+0x140/0x2a0
[ 2558.352853]  [<c01f4027>] ? filp_close+0x37/0x70
[ 2558.352860]  [<c01f40d3>] ? sys_close+0x73/0xb0
[ 2558.352868]  [<c0102fa3>] ? sysenter_do_call+0x12/0x28
[ 2558.352882]  [<c0550000>] ? __down_interruptible+0x60/0xb0
[ 2558.352888] Code: f7 ff ff eb b2 8d b6 00 00 00 00 8d bc 27 00 00
00 00 55 89 e5 83 ec 14 89 5d f4 89 d3 89 75 f8 89 c6 89 7d fc 8b 40
0c 8b 78 10 <0f> b7 47 72 25 00 f0 00 00 3d 00 40 00 00 74 0f 8b 5d f4
8b 75
[ 2558.353070] EIP: [<c0223b19>] dnotify_flush+0x19/0x100 SS:ESP 0068:f487df68
[ 2558.353083] CR2: 0000000000000072
[ 2558.353307] ---[ end trace 577d994b8fcc4773 ]---
[ 2558.362500] BUG: unable to handle kernel NULL pointer dereference at 00000010
[ 2558.362531] IP: [<c01c515c>] set_page_dirty+0x1c/0x60
[ 2558.362554] *pde = 00000000
[ 2558.362563] Oops: 0000 [#2] SMP
[ 2558.362576] last sysfs file:
/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:00/PNP0C09:00/PNP0C0A:00/power_supply/BAT0/voltage_now
[ 2558.362586] Modules linked in: netconsole ar6000 binfmt_misc rfcomm
sco bridge stp ppdev bnep sha256_generic l2cap arc4
snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep
snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss
snd_seq_midi joydev snd_rawmidi pcmcia iwlagn snd_seq_midi_event
snd_seq mmc_block yenta_socket iwlcore rsrc_nonstatic btusb sdhci_pci
snd_timer pcmcia_core sdhci snd_seq_device thinkpad_acpi tpm_tis
led_class tpm snd mac80211 psmouse bluetooth tpm_bios uvcvideo
soundcore snd_page_alloc videodev v4l1_compat nvram cfg80211 configfs
serio_raw iptable_filter lp ip_tables x_tables parport i915 fbcon
tileblit font bitblit softcursor radeon ttm drm_kms_helper drm usbhid
ohci1394 ieee1394 intel_agp e1000e i2c_algo_bit agpgart video output
[ 2558.362892]
[ 2558.362901] Pid: 1161, comm: Xorg Tainted: G      D
2.6.34-rc7-300 #1 278225C/278225C
[ 2558.362909] EIP: 0060:[<c01c515c>] EFLAGS: 00013282 CPU: 1
[ 2558.362920] EIP is at set_page_dirty+0x1c/0x60
[ 2558.362930] EAX: c13630c0 EBX: b69d1000 ECX: 4010007c EDX: 00000000
[ 2558.362936] ESI: 00030cd2 EDI: f4873744 EBP: f487dce0 ESP: f487dce0
[ 2558.362942]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[ 2558.362949] Process Xorg (pid: 1161, ti=f487c000 task=f671bfc0
task.ti=f487c000)
[ 2558.362954] Stack:
[ 2558.362958]  f487dd64 c01d5117 f487dd00 c0109f56 00000000 f652e600
f671bfc0 00000001
[ 2558.362985] <0> 00000000 c07f7440 00000000 c1903450 f487dd7c
b69d1fff eaa58b68 b69d2000
[ 2558.363017] <0> 1b106067 f671bfc0 f04394d0 00000000 b69d0000
c1691e6c c1903440 b69d2000
[ 2558.363049] Call Trace:
[ 2558.363061]  [<c01d5117>] ? unmap_vmas+0x587/0x770
[ 2558.363072]  [<c0109f56>] ? __switch_to_xtra+0xb6/0x140
[ 2558.363081]  [<c01db1f0>] ? exit_mmap+0x90/0x150
[ 2558.363092]  [<c014350e>] ? mmput+0x2e/0xb0
[ 2558.363100]  [<c0147c90>] ? exit_mm+0xe0/0x100
[ 2558.363107]  [<c0147f6c>] ? do_exit+0x10c/0x740
[ 2558.363118]  [<c0146999>] ? kmsg_dump+0x119/0x130
[ 2558.363128]  [<c0552430>] ? oops_end+0x90/0xd0
[ 2558.363138]  [<c012960e>] ? no_context+0xbe/0x150
[ 2558.363147]  [<c0204768>] ? set_fd_set+0x38/0x50
[ 2558.363155]  [<c01296d7>] ? __bad_area_nosemaphore+0x37/0x160
[ 2558.363163]  [<c012985a>] ? __bad_area+0x3a/0x50
[ 2558.363171]  [<c0129882>] ? bad_area+0x12/0x20
[ 2558.363181]  [<c05545b6>] ? do_page_fault+0x406/0x410
[ 2558.363191]  [<c0164ad2>] ? __hrtimer_start_range_ns+0x162/0x410
[ 2558.363199]  [<c05541b0>] ? do_page_fault+0x0/0x410
[ 2558.363207]  [<c0551913>] ? error_code+0x73/0x80
[ 2558.363215]  [<c0223b19>] ? dnotify_flush+0x19/0x100
[ 2558.363226]  [<c047a6f0>] ? sys_socketcall+0x140/0x2a0
[ 2558.363237]  [<c01f4027>] ? filp_close+0x37/0x70
[ 2558.363244]  [<c01f40d3>] ? sys_close+0x73/0xb0
[ 2558.363252]  [<c0102fa3>] ? sysenter_do_call+0x12/0x28
[ 2558.363263]  [<c0550000>] ? __down_interruptible+0x60/0xb0
[ 2558.363268] Code: da eb 9b 8d b6 00 00 00 00 8d bf 00 00 00 00 55
8b 08 89 e5 8b 50 10 f7 c1 00 00 01 00 75 3f f6 c2 01 75 22 85 d2 74
1e 8b 52 38 <8b> 52 10 85 d2 74 0d ff d2 89 c2 89 d0 5d c3 90 8d 74 26
00 ba
[ 2558.363453] EIP: [<c01c515c>] set_page_dirty+0x1c/0x60 SS:ESP 0068:f487dce0
[ 2558.363470] CR2: 0000000000000010
[ 2558.363481] ---[ end trace 577d994b8fcc4774 ]---
[ 2558.363489] Fixing recursive fault but reboot is needed!
[ 2558.363497] BUG: scheduling while atomic: Xorg/1161/0x00000001
[ 2558.363502] Modules linked in: netconsole ar6000 binfmt_misc rfcomm
sco bridge stp ppdev bnep sha256_generic l2cap arc4
snd_hda_codec_conexant snd_hda_intel snd_hda_codec snd_hwdep
snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss
snd_seq_midi joydev snd_rawmidi pcmcia iwlagn snd_seq_midi_event
snd_seq mmc_block yenta_socket iwlcore rsrc_nonstatic btusb sdhci_pci
snd_timer pcmcia_core sdhci snd_seq_device thinkpad_acpi tpm_tis
led_class tpm snd mac80211 psmouse bluetooth tpm_bios uvcvideo
soundcore snd_page_alloc videodev v4l1_compat nvram cfg80211 configfs
serio_raw iptable_filter lp ip_tables x_tables parport i915 fbcon
tileblit font bitblit softcursor radeon ttm drm_kms_helper drm usbhid
ohci1394 ieee1394 intel_agp e1000e i2c_algo_bit agpgart video output
[ 2558.364031] Pid: 1161, comm: Xorg Tainted: G      D    2.6.34-rc7-300 #1
[ 2558.364037] Call Trace:
[ 2558.364054]  [<c0134d9d>] __schedule_bug+0x5d/0x70
[ 2558.364064]  [<c054ed07>] schedule+0x647/0x7e0
[ 2558.364074]  [<c0148518>] do_exit+0x6b8/0x740
[ 2558.364083]  [<c0146999>] ? kmsg_dump+0x119/0x130
[ 2558.364090]  [<c054e548>] ? printk+0x18/0x20
[ 2558.364100]  [<c0552430>] oops_end+0x90/0xd0
[ 2558.364108]  [<c012960e>] no_context+0xbe/0x150
[ 2558.364116]  [<c01296d7>] __bad_area_nosemaphore+0x37/0x160
[ 2558.364124]  [<c0129812>] bad_area_nosemaphore+0x12/0x20
[ 2558.364132]  [<c0554518>] do_page_fault+0x368/0x410
[ 2558.364141]  [<c01c6860>] ? release_pages+0x190/0x1c0
[ 2558.364149]  [<c05541b0>] ? do_page_fault+0x0/0x410
[ 2558.364156]  [<c0551913>] error_code+0x73/0x80
[ 2558.364164]  [<c012007b>] ? mask_IO_APIC_setup+0x9b/0xa0
[ 2558.364171]  [<c01c515c>] ? set_page_dirty+0x1c/0x60
[ 2558.364183]  [<c01d5117>] unmap_vmas+0x587/0x770
[ 2558.364194]  [<c0109f56>] ? __switch_to_xtra+0xb6/0x140
[ 2558.364203]  [<c01db1f0>] exit_mmap+0x90/0x150
[ 2558.364211]  [<c014350e>] mmput+0x2e/0xb0
[ 2558.364217]  [<c0147c90>] exit_mm+0xe0/0x100
[ 2558.364229]  [<c0147f6c>] do_exit+0x10c/0x740
[ 2558.364237]  [<c0146999>] ? kmsg_dump+0x119/0x130
[ 2558.364244]  [<c0552430>] oops_end+0x90/0xd0
[ 2558.364252]  [<c012960e>] no_context+0xbe/0x150
[ 2558.364261]  [<c0204768>] ? set_fd_set+0x38/0x50
[ 2558.364268]  [<c01296d7>] __bad_area_nosemaphore+0x37/0x160
[ 2558.364276]  [<c012985a>] __bad_area+0x3a/0x50
[ 2558.364282]  [<c0129882>] bad_area+0x12/0x20
[ 2558.364290]  [<c05545b6>] do_page_fault+0x406/0x410
[ 2558.364300]  [<c0164ad2>] ? __hrtimer_start_range_ns+0x162/0x410
[ 2558.364308]  [<c05541b0>] ? do_page_fault+0x0/0x410
[ 2558.364315]  [<c0551913>] error_code+0x73/0x80
[ 2558.364323]  [<c0223b19>] ? dnotify_flush+0x19/0x100
[ 2558.364333]  [<c047a6f0>] ? sys_socketcall+0x140/0x2a0
[ 2558.364344]  [<c01f4027>] filp_close+0x37/0x70
[ 2558.364352]  [<c01f40d3>] sys_close+0x73/0xb0
[ 2558.364359]  [<c0102fa3>] sysenter_do_call+0x12/0x28
[ 2558.364370]  [<c0550000>] ? __down_interruptible+0x60/0xb0
[ 2558.365152] init[1]: segfault at 0 ip (null) sp bfb4ba94 error 4 in
libnss_files-2.10.1.so[b74db000+a000]

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-25  1:35   ` haijun liu
  2010-10-25  2:21     ` haijun liu
@ 2010-10-25 11:01     ` Gustavo F. Padovan
  2010-10-26  1:32       ` haijun liu
  1 sibling, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-10-25 11:01 UTC (permalink / raw)
  To: haijun liu; +Cc: Haijun Liu, linux-bluetooth

Hi Haijun,

* haijun liu <liuhaijun.er@gmail.com> [2010-10-25 09:35:33 +0800]:

> Hi Gustavo,
> 
> >> During test session with another vendor's bt stack, found that in
> >> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
> >> be called after the sock was freed, so it raised a system crash.
> >> So I just replaced del_timer() with del_timer_sync() to solve it.
> >
> > NAK on this. If you read the del_timer_sync() documentation you can
> > see that you can't call del_timer_sync() on interrupt context. The
> > possible solution here is to check in the beginning of
> > l2cap_monitor_timeout() if your sock is still valid.
> >
> 
> You are right, I only considered close() interface, so missed the interrupt
> context.
> 
> It's very difficult to check sock valid or not in timeout procedure, since it's
> an interrupt context, and only can get context from parameter pre-stored,
> except global variables.

I think you can check for sk == null there.

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect
  2010-10-25  2:15     ` haijun liu
@ 2010-10-25 11:09       ` Gustavo F. Padovan
  2010-10-26 11:50         ` haijun liu
  0 siblings, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-10-25 11:09 UTC (permalink / raw)
  To: haijun liu; +Cc: Haijun Liu, linux-bluetooth

Hi Haijun,

* haijun liu <liuhaijun.er@gmail.com> [2010-10-25 10:15:48 +0800]:

> Hi Gustavo,
> 
> >> During test session with another vendor's bt stack, found that
> >> without lock protect for TX_QUEUE(sk) will cause system crash while
> >> data transfer over AMP controller. So I just add lock protect for
> >> TX_QUEUE(sk).
> >
> > We already use the default socket lock protection. Is it not working for
> > you? Why? Could you show a crash case that requires your patch to fix
> > it?
> >
> 
> Yes,  there is socket lock protection, but only for sk_buff, for the related
> variable we need protect them as well, such as 'sk->sk_send_head',
> because later in different context we will use it as sk_buff directly, but at
> that time maybe it has been freed and that buffer be occupied by another
> sk_buff.

sk->sk_send_head is also protected by the socket lock.

> 
> Below is the crash case we met:
> 
> [  265.544145] l2cap_sock_sendmsg: sock e7f4e380, sk e015fc00, msg
> e01f5ea4, len 1668
> [  265.544149] l2cap_sock_sendmsg: sk->scid 42, sk->dcid 5d, sk->mode 3
> [  265.544157] block_sendmsg_condition:
> [  265.544160] l2cap_tx_window_full:
> [  265.544163] block_sendmsg_condition: tx_window full: 0, or
> wait_f/remote busy.
> [  265.544168] l2cap_sar_segment_sdu: sk e015fc00 len 5736
> [  265.544172] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
> 4000  sdulen 5736
> [  265.544175] l2cap_loglink_validate:
> [  265.544179] l2cap_skbuff_fromiovec:
> [  265.544183] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
> c000  sdulen 0
> [  265.544187] l2cap_loglink_validate:
> [  265.544191] l2cap_skbuff_fromiovec:
> [  265.544195] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
> c000  sdulen 0
> [  265.544200] l2cap_loglink_validate:
> [  265.544203] l2cap_skbuff_fromiovec:
> [  265.544207] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
> c000  sdulen 0
> [  265.544211] l2cap_loglink_validate:
> [  265.544214] l2cap_skbuff_fromiovec:
> [  265.544218] l2cap_create_iframe_pdu: sk e015fc00 len 1011 control
> c000  sdulen 0
> [  265.544221] l2cap_loglink_validate:
> [  265.544225] l2cap_skbuff_fromiovec:
> [  265.544229] l2cap_create_iframe_pdu: sk e015fc00 len 681 control
> 8000  sdulen 0
> [  265.544252] l2cap_loglink_validate:
> [  265.544255] l2cap_skbuff_fromiovec:
> [  265.544483] l2cap_recv_acldata:
> [  265.544488] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.544492] l2cap_recv_frame: conn f461bcc0, skb ee91ccc0, cid 42, len 4
> [  265.544496] l2cap_recv_frame: len 4, cid 0x0042
> [  265.544498] l2cap_data_channel:
> [  265.544501] l2cap_get_chan_by_scid:
> [  265.544504] __l2cap_get_chan_by_scid:
> [  265.544508] l2cap_data_channel: sk e015fc00, len 4
> [  265.544511] l2cap_ertm_data_rcv:
> [  265.544514] l2cap_check_fcs:
> [  265.544517] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2209 len 0
> [  265.544521] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 34 ctrl 0x2209
> [  265.544525] l2cap_drop_acked_frames:
> [  265.544636] l2cap_recv_acldata:
> [  265.544641] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.544645] l2cap_recv_frame: conn f461bcc0, skb ee91c6c0, cid 42, len 4
> [  265.544649] l2cap_recv_frame: len 4, cid 0x0042
> [  265.544652] l2cap_data_channel:
> [  265.544655] l2cap_get_chan_by_scid:
> [  265.544657] __l2cap_get_chan_by_scid:
> [  265.570492] l2cap_recv_acldata:
> [  265.570503] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.570507] l2cap_recv_frame: conn f461bcc0, skb ee91c0c0, cid 42, len 4
> [  265.570513] l2cap_recv_frame: len 4, cid 0x0042
> [  265.570517] l2cap_data_channel:
> [  265.570520] l2cap_get_chan_by_scid:
> [  265.570524] __l2cap_get_chan_by_scid:
> [  265.570529] l2cap_data_channel: sk e015fc00, len 4
> [  265.570533] l2cap_ertm_data_rcv:
> [  265.570536] l2cap_check_fcs:
> [  265.570542] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2709 len 0
> [  265.570547] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 39 ctrl 0x2709
> [  265.570550] l2cap_drop_acked_frames:
> [  265.570658] l2cap_recv_acldata:
> [  265.570663] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.570668] l2cap_recv_frame: conn f461bcc0, skb ee91ca80, cid 42, len 4
> [  265.570673] l2cap_recv_frame: len 4, cid 0x0042
> [  265.570677] l2cap_data_channel:
> [  265.570680] l2cap_get_chan_by_scid:
> [  265.570683] __l2cap_get_chan_by_scid:
> [  265.570687] l2cap_data_channel: sk e015fc00, len 4
> [  265.570691] l2cap_ertm_data_rcv:
> [  265.570694] l2cap_check_fcs:
> [  265.570698] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2809 len 0
> [  265.570702] l2cap_data_channel_rnrframe: sk e015fc00, req_seq 40 ctrl 0x2809
> [  265.570706] l2cap_drop_acked_frames:
> [  265.570858] l2cap_recv_acldata:
> [  265.572903] l2cap_recv_acldata:
> [  265.572910] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.572915] l2cap_recv_frame: conn f461bcc0, skb f469fa80, cid 42, len 4
> [  265.572919] l2cap_recv_frame: len 4, cid 0x0042
> [  265.572921] l2cap_data_channel:
> [  265.572925] l2cap_get_chan_by_scid:
> [  265.572928] __l2cap_get_chan_by_scid:
> [  265.572933] l2cap_data_channel: sk e015fc00, len 4
> [  265.572936] l2cap_ertm_data_rcv:
> [  265.572938] l2cap_check_fcs:
> [  265.572943] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x2b09 len 0
> [  265.573348] l2cap_recv_acldata:
> 
> [  265.609993] l2cap_recv_acldata:
> [  265.610005] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.610009] l2cap_recv_frame: conn f461bcc0, skb ee91c540, cid 42, len 4
> [  265.610013] l2cap_recv_frame: len 4, cid 0x0042
> [  265.610016] l2cap_data_channel:
> [  265.610019] l2cap_get_chan_by_scid:
> [  265.610022] __l2cap_get_chan_by_scid:
> [  265.610025] l2cap_data_channel: sk e015fc00, len 4
> [  265.610029] l2cap_ertm_data_rcv:
> [  265.610032] l2cap_check_fcs:
> [  265.610036] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3801 len 0
> [  265.610041] l2cap_data_channel_rrframe: sk e015fc00, req_seq 56 ctrl 0x3801
> [  265.610044] l2cap_drop_acked_frames:
> [  265.610060] l2cap_ertm_send: sk e015fc00, sk->scid 42, sk->dcid 5d
> [  265.610064] l2cap_tx_window_full:
> [  265.610071] l2cap_ertm_send: pi->next_tx_seq: 13, pi->buffer_seq: 2
> [  265.610075] l2cap_do_send: sk e015fc00, cid 66 skb e0147840 len 1019
> [  265.610078] l2cap_loglink_validate:
> [  265.610081] l2cap_do_send: send I frame over AMP controller
> [  265.610085] l2cap_tx_window_full:
> [  265.610093] l2cap_ertm_send: pi->next_tx_seq: 14, pi->buffer_seq: 2
> [  265.610096] l2cap_do_send: sk e015fc00, cid 66 skb f4801cc0 len 1019
> [  265.610099] l2cap_loglink_validate:
> [  265.610102] l2cap_do_send: send I frame over AMP controller
> [  265.610105] l2cap_tx_window_full:
> [  265.610112] l2cap_ertm_send: pi->next_tx_seq: 15, pi->buffer_seq: 2
> [  265.610115] l2cap_do_send: sk e015fc00, cid 66 skb f4801600 len 1019
> [  265.610118] l2cap_loglink_validate:
> [  265.610121] l2cap_do_send: send I frame over AMP controller
> [  265.610124] l2cap_tx_window_full:
> [  265.610130] l2cap_ertm_send: pi->next_tx_seq: 16, pi->buffer_seq: 2
> [  265.610133] l2cap_do_send: sk e015fc00, cid 66 skb f4801c00 len 689
> [  265.610137] l2cap_loglink_validate:
> [  265.610140] l2cap_do_send: send I frame over AMP controller
> [  265.610143] l2cap_tx_window_full:
> [  265.610153] l2cap_ertm_send: pi->next_tx_seq: 17, pi->buffer_seq: 2
> [  265.610215] l2cap_ertm_send: pi->next_tx_seq: 20, pi->buffer_seq: 2
> [  265.610219] l2cap_do_send: sk e015fc00, cid 66 skb f47f03c0 len 1019
> [  265.610222] l2cap_loglink_validate:
> [  265.619937] l2cap_recv_acldata:
> [  265.619948] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.619952] l2cap_recv_frame: conn f461bcc0, skb ee91c300, cid 42, len 4
> [  265.619956] l2cap_recv_frame: len 4, cid 0x0042
> [  265.620154] l2cap_ertm_send: pi->next_tx_seq: 29, pi->buffer_seq: 2
> [  265.629111] l2cap_recv_acldata:
> [  265.629123] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> 
> [  265.639371] l2cap_recv_acldata:
> [  265.639384] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.639388] l2cap_recv_frame: conn f461bcc0, skb ee91ccc0, cid 42, len 4
> [  265.639392] l2cap_recv_frame: len 4, cid 0x0042
> [  265.639395] l2cap_data_channel:
> [  265.639398] l2cap_get_chan_by_scid:
> [  265.639401] __l2cap_get_chan_by_scid:
> [  265.639405] l2cap_data_channel: sk e015fc00, len 4
> [  265.639407] l2cap_ertm_data_rcv:
> [  265.639570] l2cap_do_send: send I frame over AMP controller
> [  265.646669] l2cap_recv_acldata:
> [  265.646681] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.646685] l2cap_recv_frame: conn f461bcc0, skb ee91c6c0, cid 42, len 4
> [  265.646822] l2cap_loglink_validate:
> [  265.646825] l2cap_skbuff_fromiovec:
> [  265.647800] l2cap_recv_acldata:
> [  265.647808] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.649645] l2cap_recv_acldata:
> [  265.649655] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.649659] l2cap_recv_frame: conn f461bcc0, skb ee91c180, cid 42, len 4
> [  265.649663] l2cap_recv_frame: len 4, cid 0x0042
> [  265.651518] l2cap_recv_acldata:
> [  265.651527] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.651532] l2cap_recv_frame: conn f461bcc0, skb ee91c0c0, cid 42, len 4
> [  265.655539] l2cap_recv_acldata:
> [  265.655547] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.655550] l2cap_recv_frame: conn f461bcc0, skb e035bc00, cid 42, len 4
> [  265.655554] l2cap_recv_frame: len 4, cid 0x0042
> [  265.655556] l2cap_data_channel:
> [  265.655559] l2cap_get_chan_by_scid:
> [  265.655562] __l2cap_get_chan_by_scid:
> [  265.663270] l2cap_recv_acldata:
> [  265.663276] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.667987] l2cap_recv_acldata:
> [  265.673206] l2cap_recv_acldata:
> [  265.673217] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.673221] l2cap_recv_frame: conn f461bcc0, skb ee91c780, cid 42, len 4
> [  265.673225] l2cap_recv_frame: len 4, cid 0x0042
> [  265.673227] l2cap_data_channel:
> [  265.673230] l2cap_get_chan_by_scid:
> [  265.673233] __l2cap_get_chan_by_scid:
> [  265.673236] l2cap_data_channel: sk e015fc00, len 4
> [  265.673240] l2cap_ertm_data_rcv:
> [  265.673243] l2cap_check_fcs:
> [  265.673247] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3109 len 0
> [  265.675265] l2cap_recv_acldata:
> [  265.675273] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.691337] l2cap_recv_acldata:
> [  265.691348] l2cap_recv_acldata: conn f461bcc0 len 8 flags 0x3
> [  265.691352] l2cap_recv_frame: conn f461bcc0, skb ee91c000, cid 42, len 4
> [  265.691356] l2cap_recv_frame: len 4, cid 0x0042
> [  265.691359] l2cap_data_channel:
> [  265.691362] l2cap_get_chan_by_scid:
> [  265.691366] __l2cap_get_chan_by_scid:
> [  265.691369] l2cap_data_channel: sk e015fc00, len 4
> [  265.691372] l2cap_ertm_data_rcv:
> [  265.691375] l2cap_check_fcs:
> [  265.691379] l2cap_data_channel_sframe: sk e015fc00 rx_control 0x3511 len 0
> [  265.691383] l2cap_data_channel_rrframe: sk e015fc00, req_seq 53 ctrl 0x3511
> [  265.691386] l2cap_drop_acked_frames:
> [  265.691389] l2cap_send_i_or_rr_or_rnr:
> [  265.691392] l2cap_ertm_send: sk e015fc00, sk->scid 42, sk->dcid 5d
> [  265.691396] l2cap_tx_window_full:
> [  265.691400] l2cap_ertm_send: pi->next_tx_seq: 53, pi->buffer_seq: 2
> [  265.691404] l2cap_do_send: sk e015fc00, cid 66 skb e0204000 len 101
> [  265.691407] l2cap_loglink_validate:
> [  265.691410] l2cap_do_send: send I frame over AMP controller

This dump shows that the crash happens for a code that is not mainline
yet. I can't take a patch that fix a bug for code not in mainline. You
have to show the bug using mainline code.

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-25 11:01     ` Gustavo F. Padovan
@ 2010-10-26  1:32       ` haijun liu
       [not found]         ` <AANLkTin+dNkjySQBvCSLK9f5aRF9445UqjhXaNvKWSz_@mail.gmail.com>
  2010-10-28  8:49         ` Gustavo F. Padovan
  0 siblings, 2 replies; 16+ messages in thread
From: haijun liu @ 2010-10-26  1:32 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Hi Gustavo,

>> >> During test session with another vendor's bt stack, found that in
>> >> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
>> >> be called after the sock was freed, so it raised a system crash.
>> >> So I just replaced del_timer() with del_timer_sync() to solve it.
>> >
>> > NAK on this. If you read the del_timer_sync() documentation you can
>> > see that you can't call del_timer_sync() on interrupt context. The
>> > possible solution here is to check in the beginning of
>> > l2cap_monitor_timeout() if your sock is still valid.
>> >
>>
>> You are right, I only considered close() interface, so missed the interrupt
>> context.
>>
>> It's very difficult to check sock valid or not in timeout procedure, since it's
>> an interrupt context, and only can get context from parameter pre-stored,
>> except global variables.
>
> I think you can check for sk == null there.
>

It's a pre-stored parameter, it will not change by itself.

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
       [not found]         ` <AANLkTin+dNkjySQBvCSLK9f5aRF9445UqjhXaNvKWSz_@mail.gmail.com>
@ 2010-10-26  7:35           ` haijun liu
  0 siblings, 0 replies; 16+ messages in thread
From: haijun liu @ 2010-10-26  7:35 UTC (permalink / raw)
  To: shy; +Cc: Gustavo F. Padovan, Haijun Liu, linux-bluetooth

Hi Shy,

> You can check the sk in list l2cap_sk_list.

If we use global lock, in case of mp env., we have to keep it locked
before exiting l2cap_monitor_timeout().

I don't know whether it can be accepted.


-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect
  2010-10-25 11:09       ` Gustavo F. Padovan
@ 2010-10-26 11:50         ` haijun liu
  0 siblings, 0 replies; 16+ messages in thread
From: haijun liu @ 2010-10-26 11:50 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Hi Gustavo,

> sk->sk_send_head is also protected by the socket lock.
>

I thought you mean sk->tx_queue.lock, actually it's sk->sk_lock.slock.

But there could be some paths have not been covered.

> This dump shows that the crash happens for a code that is not mainline
> yet. I can't take a patch that fix a bug for code not in mainline. You
> have to show the bug using mainline code.
>

Yes, it is not mainline code, but this issue could be common issue.

OK, let me try use sk->sk_lock.slock to solve this bug.

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-26  1:32       ` haijun liu
       [not found]         ` <AANLkTin+dNkjySQBvCSLK9f5aRF9445UqjhXaNvKWSz_@mail.gmail.com>
@ 2010-10-28  8:49         ` Gustavo F. Padovan
  2010-11-01  1:22           ` haijun liu
  1 sibling, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-10-28  8:49 UTC (permalink / raw)
  To: haijun liu; +Cc: Haijun Liu, linux-bluetooth

Hi Haijun,

* haijun liu <liuhaijun.er@gmail.com> [2010-10-26 09:32:19 +0800]:

> Hi Gustavo,
> 
> >> >> During test session with another vendor's bt stack, found that in
> >> >> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
> >> >> be called after the sock was freed, so it raised a system crash.
> >> >> So I just replaced del_timer() with del_timer_sync() to solve it.
> >> >
> >> > NAK on this. If you read the del_timer_sync() documentation you can
> >> > see that you can't call del_timer_sync() on interrupt context. The
> >> > possible solution here is to check in the beginning of
> >> > l2cap_monitor_timeout() if your sock is still valid.
> >> >
> >>
> >> You are right, I only considered close() interface, so missed the interrupt
> >> context.
> >>
> >> It's very difficult to check sock valid or not in timeout procedure, since it's
> >> an interrupt context, and only can get context from parameter pre-stored,
> >> except global variables.
> >
> > I think you can check for sk == null there.
> >
> 
> It's a pre-stored parameter, it will not change by itself.

I looked a bit into this and a good solution seems to be to hold a
reference to the sock when we call a mod_timer() and then put the
reference when we call del_timer() and the timer is inactive or when
l2cap_monitor_timeout(). Look net/sctp/ for examples.

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-10-28  8:49         ` Gustavo F. Padovan
@ 2010-11-01  1:22           ` haijun liu
  2010-11-03 17:56             ` Gustavo F. Padovan
  0 siblings, 1 reply; 16+ messages in thread
From: haijun liu @ 2010-11-01  1:22 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: Haijun Liu, linux-bluetooth

Hi Gustavo,

>> >> >> During test session with another vendor's bt stack, found that in
>> >> >> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
>> >> >> be called after the sock was freed, so it raised a system crash.
>> >> >> So I just replaced del_timer() with del_timer_sync() to solve it.
>> >> >
>> >> > NAK on this. If you read the del_timer_sync() documentation you can
>> >> > see that you can't call del_timer_sync() on interrupt context. The
>> >> > possible solution here is to check in the beginning of
>> >> > l2cap_monitor_timeout() if your sock is still valid.
>> >> >
>> >>
>> >> You are right, I only considered close() interface, so missed the interrupt
>> >> context.
>> >>
>> >> It's very difficult to check sock valid or not in timeout procedure, since it's
>> >> an interrupt context, and only can get context from parameter pre-stored,
>> >> except global variables.
>> >
>> > I think you can check for sk == null there.
>> >
>>
>> It's a pre-stored parameter, it will not change by itself.
>
> I looked a bit into this and a good solution seems to be to hold a
> reference to the sock when we call a mod_timer() and then put the
> reference when we call del_timer() and the timer is inactive or when
> l2cap_monitor_timeout(). Look net/sctp/ for examples.
>

Same situation still is there, in this case, l2cap_monitor_timeout()
how to know the reference has been released by del_timer()?

If we refer to net/sctp, use timer_pending() to detect it, unless we can
ensure del_timer() always be called in interrupt context and same
level compare to timer interrupt, otherwise it's not an atomic operation
for this case.

-- 
Haijun Liu

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-11-01  1:22           ` haijun liu
@ 2010-11-03 17:56             ` Gustavo F. Padovan
  2010-11-03 21:12               ` Mat Martineau
  0 siblings, 1 reply; 16+ messages in thread
From: Gustavo F. Padovan @ 2010-11-03 17:56 UTC (permalink / raw)
  To: haijun liu; +Cc: Haijun Liu, linux-bluetooth

Hi Haijun,

* haijun liu <liuhaijun.er@gmail.com> [2010-11-01 09:22:14 +0800]:

> Hi Gustavo,
> 
> >> >> >> During test session with another vendor's bt stack, found that in
> >> >> >> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
> >> >> >> be called after the sock was freed, so it raised a system crash.
> >> >> >> So I just replaced del_timer() with del_timer_sync() to solve it.
> >> >> >
> >> >> > NAK on this. If you read the del_timer_sync() documentation you can
> >> >> > see that you can't call del_timer_sync() on interrupt context. The
> >> >> > possible solution here is to check in the beginning of
> >> >> > l2cap_monitor_timeout() if your sock is still valid.
> >> >> >
> >> >>
> >> >> You are right, I only considered close() interface, so missed the interrupt
> >> >> context.
> >> >>
> >> >> It's very difficult to check sock valid or not in timeout procedure, since it's
> >> >> an interrupt context, and only can get context from parameter pre-stored,
> >> >> except global variables.
> >> >
> >> > I think you can check for sk == null there.
> >> >
> >>
> >> It's a pre-stored parameter, it will not change by itself.
> >
> > I looked a bit into this and a good solution seems to be to hold a
> > reference to the sock when we call a mod_timer() and then put the
> > reference when we call del_timer() and the timer is inactive or when
> > l2cap_monitor_timeout(). Look net/sctp/ for examples.
> >
> 
> Same situation still is there, in this case, l2cap_monitor_timeout()
> how to know the reference has been released by del_timer()?
> 
> If we refer to net/sctp, use timer_pending() to detect it, unless we can
> ensure del_timer() always be called in interrupt context and same
> level compare to timer interrupt, otherwise it's not an atomic operation
> for this case.

No, the socket lock take care of that, so there will be no race
condition here. One related thing that we should fix is a locking
problem in l2cap_monitor_timeout() and l2cap_retrans_timeout() already
reported by Mat Martineau. (I just can't find his e-mail about that).
So I think you should go ahead and this problem using ref counting and
then we can also fix the problem reported by Mat.

-- 
Gustavo F. Padovan
ProFUSION embedded systems - http://profusion.mobi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer()
  2010-11-03 17:56             ` Gustavo F. Padovan
@ 2010-11-03 21:12               ` Mat Martineau
  0 siblings, 0 replies; 16+ messages in thread
From: Mat Martineau @ 2010-11-03 21:12 UTC (permalink / raw)
  To: Gustavo F. Padovan; +Cc: haijun liu, Haijun Liu, linux-bluetooth


Gustavo and Haijun -

On Wed, 3 Nov 2010, Gustavo F. Padovan wrote:

> Hi Haijun,
>
> * haijun liu <liuhaijun.er@gmail.com> [2010-11-01 09:22:14 +0800]:
>
>> Hi Gustavo,
>>
>>>>>>>> During test session with another vendor's bt stack, found that in
>>>>>>>> l2cap_chan_del() using del_timer() caused l2cap_monitor_timeout()
>>>>>>>> be called after the sock was freed, so it raised a system crash.
>>>>>>>> So I just replaced del_timer() with del_timer_sync() to solve it.
>>>>>>>
>>>>>>> NAK on this. If you read the del_timer_sync() documentation you can
>>>>>>> see that you can't call del_timer_sync() on interrupt context. The
>>>>>>> possible solution here is to check in the beginning of
>>>>>>> l2cap_monitor_timeout() if your sock is still valid.
>>>>>>>
>>>>>>
>>>>>> You are right, I only considered close() interface, so missed the interrupt
>>>>>> context.
>>>>>>
>>>>>> It's very difficult to check sock valid or not in timeout procedure, since it's
>>>>>> an interrupt context, and only can get context from parameter pre-stored,
>>>>>> except global variables.
>>>>>
>>>>> I think you can check for sk == null there.
>>>>>
>>>>
>>>> It's a pre-stored parameter, it will not change by itself.
>>>
>>> I looked a bit into this and a good solution seems to be to hold a
>>> reference to the sock when we call a mod_timer() and then put the
>>> reference when we call del_timer() and the timer is inactive or when
>>> l2cap_monitor_timeout(). Look net/sctp/ for examples.
>>>
>>
>> Same situation still is there, in this case, l2cap_monitor_timeout()
>> how to know the reference has been released by del_timer()?
>>
>> If we refer to net/sctp, use timer_pending() to detect it, unless we can
>> ensure del_timer() always be called in interrupt context and same
>> level compare to timer interrupt, otherwise it's not an atomic operation
>> for this case.
>
> No, the socket lock take care of that, so there will be no race
> condition here. One related thing that we should fix is a locking
> problem in l2cap_monitor_timeout() and l2cap_retrans_timeout() already
> reported by Mat Martineau. (I just can't find his e-mail about that).
> So I think you should go ahead and this problem using ref counting and
> then we can also fix the problem reported by Mat.

Here's the message you're referring to:

http://www.spinics.net/lists/linux-bluetooth/msg06734.html

I do have patches to use a workqueue instead of timers that I'll 
upstream soon, but it looks like some kind of reference counting is 
still necessary.

del_timer_sync() or cancel_delayed_work_sync() could be called by a 
workqueue function.  We could sock_hold(), queue the cancellation 
work, then let the workqueue function call sock_put() after the 
timeouts (monitor, retrans, and ack) are all synchronously cancelled.

--
Mat Martineau
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2010-11-03 21:12 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-22  2:26 [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Haijun Liu
2010-10-22  2:26 ` [PATCH 2/2 v2] Bluetooth: Fix system crash bug of no send queue protect Haijun Liu
2010-10-22 17:34   ` Gustavo F. Padovan
2010-10-25  2:15     ` haijun liu
2010-10-25 11:09       ` Gustavo F. Padovan
2010-10-26 11:50         ` haijun liu
2010-10-22 17:18 ` [PATCH 1/2 v2] Bluetooth: Fix system crash caused by del_timer() Gustavo F. Padovan
2010-10-25  1:35   ` haijun liu
2010-10-25  2:21     ` haijun liu
2010-10-25 11:01     ` Gustavo F. Padovan
2010-10-26  1:32       ` haijun liu
     [not found]         ` <AANLkTin+dNkjySQBvCSLK9f5aRF9445UqjhXaNvKWSz_@mail.gmail.com>
2010-10-26  7:35           ` haijun liu
2010-10-28  8:49         ` Gustavo F. Padovan
2010-11-01  1:22           ` haijun liu
2010-11-03 17:56             ` Gustavo F. Padovan
2010-11-03 21:12               ` Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.