linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access
@ 2018-07-20 13:28 Tomas Bortoli
  2018-07-23  2:24 ` jiangyiwen
  2018-07-23  3:02 ` Dominique Martinet
  0 siblings, 2 replies; 4+ messages in thread
From: Tomas Bortoli @ 2018-07-20 13:28 UTC (permalink / raw)
  To: ericvh, rminnich, lucho
  Cc: jiangyiwen, davem, v9fs-developer, netdev, linux-kernel,
	syzkaller, Tomas Bortoli

This patch uses list_del_init() instead of list_del() to eliminate "req_list". This to prevent double list_del()'s calls to the same list from provoking a GPF. Furthermore, this patch fixes an access to "req_list" that was made without getting the relative lock.

Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com
---

 net/9p/trans_fd.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a64b01c56e30..131bb1f059e6 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -223,7 +223,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
 		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
-		list_del(&req->req_list);
+		spin_lock_irqsave(&m->client->lock, flags);
+		list_del_init(&req->req_list);
+		spin_unlock_irqrestore(&m->client->lock, flags);
 		if (!req->t_err)
 			req->t_err = err;
 		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
@@ -369,7 +371,7 @@ static void p9_read_work(struct work_struct *work)
 		spin_lock(&m->client->lock);
 		if (m->req->status != REQ_STATUS_ERROR)
 			status = REQ_STATUS_RCVD;
-		list_del(&m->req->req_list);
+		list_del_init(&m->req->req_list);
 		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req, status);
 		m->rc.sdata = NULL;
@@ -684,7 +686,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	spin_lock(&client->lock);
 
 	if (req->status == REQ_STATUS_UNSENT) {
-		list_del(&req->req_list);
+		list_del_init(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
 	}
@@ -701,7 +703,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
 	 * remove it from the list.
 	 */
 	spin_lock(&client->lock);
-	list_del(&req->req_list);
+	list_del_init(&req->req_list);
 	spin_unlock(&client->lock);
 
 	return 0;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access
  2018-07-20 13:28 [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access Tomas Bortoli
@ 2018-07-23  2:24 ` jiangyiwen
  2018-07-23  3:02 ` Dominique Martinet
  1 sibling, 0 replies; 4+ messages in thread
From: jiangyiwen @ 2018-07-23  2:24 UTC (permalink / raw)
  To: Tomas Bortoli, ericvh, rminnich, lucho
  Cc: davem, v9fs-developer, netdev, linux-kernel, syzkaller

On 2018/7/20 21:28, Tomas Bortoli wrote:
> This patch uses list_del_init() instead of list_del() to eliminate "req_list". This to prevent double list_del()'s calls to the same list from provoking a GPF. Furthermore, this patch fixes an access to "req_list" that was made without getting the relative lock.
> 

I suggest you can apply a 72-character line limit to your
commit messages. Others looks good to me.

Thanks,
Yiwen.

> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com
> ---
> 
>  net/9p/trans_fd.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index a64b01c56e30..131bb1f059e6 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -223,7 +223,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
>  
>  	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
>  		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
> -		list_del(&req->req_list);
> +		spin_lock_irqsave(&m->client->lock, flags);
> +		list_del_init(&req->req_list);
> +		spin_unlock_irqrestore(&m->client->lock, flags);
>  		if (!req->t_err)
>  			req->t_err = err;
>  		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
> @@ -369,7 +371,7 @@ static void p9_read_work(struct work_struct *work)
>  		spin_lock(&m->client->lock);
>  		if (m->req->status != REQ_STATUS_ERROR)
>  			status = REQ_STATUS_RCVD;
> -		list_del(&m->req->req_list);
> +		list_del_init(&m->req->req_list);
>  		spin_unlock(&m->client->lock);
>  		p9_client_cb(m->client, m->req, status);
>  		m->rc.sdata = NULL;
> @@ -684,7 +686,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>  	spin_lock(&client->lock);
>  
>  	if (req->status == REQ_STATUS_UNSENT) {
> -		list_del(&req->req_list);
> +		list_del_init(&req->req_list);
>  		req->status = REQ_STATUS_FLSHD;
>  		ret = 0;
>  	}
> @@ -701,7 +703,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>  	 * remove it from the list.
>  	 */
>  	spin_lock(&client->lock);
> -	list_del(&req->req_list);
> +	list_del_init(&req->req_list);
>  	spin_unlock(&client->lock);
>  
>  	return 0;
> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access
  2018-07-20 13:28 [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access Tomas Bortoli
  2018-07-23  2:24 ` jiangyiwen
@ 2018-07-23  3:02 ` Dominique Martinet
  2018-07-23 11:46   ` Tomas Bortoli
  1 sibling, 1 reply; 4+ messages in thread
From: Dominique Martinet @ 2018-07-23  3:02 UTC (permalink / raw)
  To: Tomas Bortoli
  Cc: ericvh, rminnich, lucho, jiangyiwen, davem, v9fs-developer,
	netdev, linux-kernel, syzkaller

Tomas Bortoli wrote on Fri, Jul 20, 2018:
> This patch uses list_del_init() instead of list_del() to eliminate
> "req_list". This to prevent double list_del()'s calls to the same list
> from provoking a GPF. Furthermore, this patch fixes an access to
> "req_list" that was made without getting the relative lock.

Please see comment about locking.

As for list_del to list_del_init, it feels a little wrong to me, but I
don't have a better idea so let's go with that.
Do you know what happened to trigger this? one thread running
p9_conn_cancel then the other thread doing p9_fd_cancel ?

> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com
> ---
> 
>  net/9p/trans_fd.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index a64b01c56e30..131bb1f059e6 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -223,7 +223,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
>  
>  	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
>  		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
> -		list_del(&req->req_list);
> +		spin_lock_irqsave(&m->client->lock, flags);
> +		list_del_init(&req->req_list);
> +		spin_unlock_irqrestore(&m->client->lock, flags);

Just locking around one item if you're afraid it might change won't be
enough - list_for_each_entry_safe is only "safe" from removing the
current element from the list yourself, not from other threads messing
with it, so you'd need to lock around the whole loop if that's what
you're protecting against.

(Also, since I've taken the other patchs to change spin locks on
client->lock to spin_lock instead of spin_lock_irqsave, please use that
function for new locking of that variable - in general just basing your
patchs off linux-next's master branch is a good idea.)

>  		if (!req->t_err)
>  			req->t_err = err;
>  		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
> @@ -369,7 +371,7 @@ static void p9_read_work(struct work_struct *work)
>  		spin_lock(&m->client->lock);
>  		if (m->req->status != REQ_STATUS_ERROR)
>  			status = REQ_STATUS_RCVD;
> -		list_del(&m->req->req_list);
> +		list_del_init(&m->req->req_list);
>  		spin_unlock(&m->client->lock);
>  		p9_client_cb(m->client, m->req, status);
>  		m->rc.sdata = NULL;
> @@ -684,7 +686,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>  	spin_lock(&client->lock);
>  
>  	if (req->status == REQ_STATUS_UNSENT) {
> -		list_del(&req->req_list);
> +		list_del_init(&req->req_list);
>  		req->status = REQ_STATUS_FLSHD;
>  		ret = 0;
>  	}
> @@ -701,7 +703,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>  	 * remove it from the list.
>  	 */
>  	spin_lock(&client->lock);
> -	list_del(&req->req_list);
> +	list_del_init(&req->req_list);
>  	spin_unlock(&client->lock);
>  
>  	return 0;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access
  2018-07-23  3:02 ` Dominique Martinet
@ 2018-07-23 11:46   ` Tomas Bortoli
  0 siblings, 0 replies; 4+ messages in thread
From: Tomas Bortoli @ 2018-07-23 11:46 UTC (permalink / raw)
  To: Dominique Martinet
  Cc: ericvh, rminnich, lucho, jiangyiwen, davem, v9fs-developer,
	netdev, linux-kernel, syzkaller

On 07/23/2018 05:02 AM, Dominique Martinet wrote:
> Tomas Bortoli wrote on Fri, Jul 20, 2018:
>> This patch uses list_del_init() instead of list_del() to eliminate
>> "req_list". This to prevent double list_del()'s calls to the same list
>> from provoking a GPF. Furthermore, this patch fixes an access to
>> "req_list" that was made without getting the relative lock.
> 
> Please see comment about locking.
> 
> As for list_del to list_del_init, it feels a little wrong to me, but I
> don't have a better idea so let's go with that.

Yes, it's not the best solution.

> Do you know what happened to trigger this? one thread running
> p9_conn_cancel then the other thread doing p9_fd_cancel ?
> 

I don't see how races should be prevented. The bug is triggered in
p9_fd_cancel and in this case it's due to the status of the request
being REQ_STATUS_UNSENT but list_del(&req->req_list) is used 4 times in
trans_fd.c:

- p9_read_work()
with the lock but updating the status afterwards  (brings to race)
- p9_conn_cancel()
without the lock and updating the status afterwards  (brings to race)
- p9_fd_cancelled()
.. ?
-p9_fd_cancel()
with lock, run on conditional status
BOOM

So, maybe we can try to see if it's the problem of syncing the status
between different threads or if it's more but idk.

>> Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
>> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com
>> ---
>>
>>  net/9p/trans_fd.c | 10 ++++++----
>>  1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>> index a64b01c56e30..131bb1f059e6 100644
>> --- a/net/9p/trans_fd.c
>> +++ b/net/9p/trans_fd.c
>> @@ -223,7 +223,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
>>  
>>  	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
>>  		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
>> -		list_del(&req->req_list);
>> +		spin_lock_irqsave(&m->client->lock, flags);
>> +		list_del_init(&req->req_list);
>> +		spin_unlock_irqrestore(&m->client->lock, flags);
> 
> Just locking around one item if you're afraid it might change won't be
> enough - list_for_each_entry_safe is only "safe" from removing the
> current element from the list yourself, not from other threads messing
> with it, so you'd need to lock around the whole loop if that's what
> you're protecting against.
> 

Right, I thought I had to unlock before p9_client_cb() as here:

https://github.com/torvalds/linux/blob/master/net/9p/trans_fd.c#L375

However, also locking the client mutex for the whole loop doesn't seem
to give problems. See patch below

> (Also, since I've taken the other patchs to change spin locks on
> client->lock to spin_lock instead of spin_lock_irqsave, please use that
> function for new locking of that variable - in general just basing your
> patchs off linux-next's master branch is a good idea.)
> 
>>  		if (!req->t_err)
>>  			req->t_err = err;
>>  		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
>> @@ -369,7 +371,7 @@ static void p9_read_work(struct work_struct *work)
>>  		spin_lock(&m->client->lock);
>>  		if (m->req->status != REQ_STATUS_ERROR)
>>  			status = REQ_STATUS_RCVD;
>> -		list_del(&m->req->req_list);
>> +		list_del_init(&m->req->req_list);
>>  		spin_unlock(&m->client->lock);
>>  		p9_client_cb(m->client, m->req, status);
>>  		m->rc.sdata = NULL;
>> @@ -684,7 +686,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
>>  	spin_lock(&client->lock);
>>  
>>  	if (req->status == REQ_STATUS_UNSENT) {
>> -		list_del(&req->req_list);
>> +		list_del_init(&req->req_list);
>>  		req->status = REQ_STATUS_FLSHD;
>>  		ret = 0;
>>  	}
>> @@ -701,7 +703,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
>>  	 * remove it from the list.
>>  	 */
>>  	spin_lock(&client->lock);
>> -	list_del(&req->req_list);
>> +	list_del_init(&req->req_list);
>>  	spin_unlock(&client->lock);
>>  
>>  	return 0;



diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a64b01c56e30..2ae5f03d872f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 static void p9_conn_cancel(struct p9_conn *m, int err)
 {
 	struct p9_req_t *req, *rtmp;
-	unsigned long flags;
 	LIST_HEAD(cancel_list);

 	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);

-	spin_lock_irqsave(&m->client->lock, flags);
+	spin_lock(&m->client->lock);

 	if (m->err) {
-		spin_unlock_irqrestore(&m->client->lock, flags);
+		spin_unlock(&m->client->lock);
 		return;
 	}

@@ -223,11 +222,12 @@ static void p9_conn_cancel(struct p9_conn *m, int err)

 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
 		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
-		list_del(&req->req_list);
+		list_del_init(&req->req_list);
 		if (!req->t_err)
 			req->t_err = err;
 		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
 	}
+	spin_unlock(&m->client->lock);
 }

 static __poll_t
@@ -369,7 +369,7 @@ static void p9_read_work(struct work_struct *work)
 		spin_lock(&m->client->lock);
 		if (m->req->status != REQ_STATUS_ERROR)
 			status = REQ_STATUS_RCVD;
-		list_del(&m->req->req_list);
+		list_del_init(&m->req->req_list);
 		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req, status);
 		m->rc.sdata = NULL;
@@ -684,7 +684,7 @@ static int p9_fd_cancel(struct p9_client *client,
struct p9_req_t *req)
 	spin_lock(&client->lock);

 	if (req->status == REQ_STATUS_UNSENT) {
-		list_del(&req->req_list);
+		list_del_init(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
 	}
@@ -701,7 +701,7 @@ static int p9_fd_cancelled(struct p9_client *client,
struct p9_req_t *req)
 	 * remove it from the list.
 	 */
 	spin_lock(&client->lock);
-	list_del(&req->req_list);
+	list_del_init(&req->req_list);
 	spin_unlock(&client->lock);

 	return 0;


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-07-23 11:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-20 13:28 [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access Tomas Bortoli
2018-07-23  2:24 ` jiangyiwen
2018-07-23  3:02 ` Dominique Martinet
2018-07-23 11:46   ` Tomas Bortoli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).