All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] vhost_net: fix high cpu load when sendmsg fails
@ 2020-12-09 11:48 wangyunjian
  2020-12-09 12:49   ` Michael S. Tsirkin
  0 siblings, 1 reply; 11+ messages in thread
From: wangyunjian @ 2020-12-09 11:48 UTC (permalink / raw)
  To: mst, jasowang
  Cc: virtualization, netdev, jerry.lilijun, chenchanghu, xudingke,
	Yunjian Wang

From: Yunjian Wang <wangyunjian@huawei.com>

Currently we break the loop and wake up the vhost_worker when
sendmsg fails. When the worker wakes up again, we'll meet the
same error. This will cause high CPU load. To fix this issue,
we can skip this description by ignoring the error.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
---
 drivers/vhost/net.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 531a00d703cd..ac950b1120f5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(sock, &msg, len);
-		if (unlikely(err < 0)) {
-			vhost_discard_vq_desc(vq, 1);
-			vhost_net_enable_vq(net, vq);
-			break;
-		}
-		if (err != len)
-			pr_debug("Truncated TX packet: len %d != %zd\n",
-				 err, len);
+		if (unlikely(err < 0 || err != len))
+			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
 done:
 		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
 		vq->heads[nvq->done_idx].len = 0;
@@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(sock, &msg, len);
-		if (unlikely(err < 0)) {
-			if (zcopy_used) {
+		if (unlikely(err < 0 || err != len)) {
+			if (zcopy_used && err < 0)
 				vhost_net_ubuf_put(ubufs);
-				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-					% UIO_MAXIOV;
-			}
-			vhost_discard_vq_desc(vq, 1);
-			vhost_net_enable_vq(net, vq);
-			break;
+			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
 		}
-		if (err != len)
-			pr_debug("Truncated TX packet: "
-				 " len %d != %zd\n", err, len);
 		if (!zcopy_used)
 			vhost_add_used_and_signal(&net->dev, vq, head, 0);
 		else
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-09 11:48 [PATCH net] vhost_net: fix high cpu load when sendmsg fails wangyunjian
@ 2020-12-09 12:49   ` Michael S. Tsirkin
  0 siblings, 0 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2020-12-09 12:49 UTC (permalink / raw)
  To: wangyunjian
  Cc: jasowang, virtualization, netdev, jerry.lilijun, chenchanghu, xudingke

On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
> From: Yunjian Wang <wangyunjian@huawei.com>
> 
> Currently we break the loop and wake up the vhost_worker when
> sendmsg fails. When the worker wakes up again, we'll meet the
> same error. This will cause high CPU load. To fix this issue,
> we can skip this description by ignoring the error.
> 
> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
> ---
>  drivers/vhost/net.c | 24 +++++-------------------
>  1 file changed, 5 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 531a00d703cd..ac950b1120f5 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>  
>  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>  		err = sock->ops->sendmsg(sock, &msg, len);
> -		if (unlikely(err < 0)) {
> -			vhost_discard_vq_desc(vq, 1);
> -			vhost_net_enable_vq(net, vq);
> -			break;
> -		}
> -		if (err != len)
> -			pr_debug("Truncated TX packet: len %d != %zd\n",
> -				 err, len);
> +		if (unlikely(err < 0 || err != len))
> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
>  done:
>  		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>  		vq->heads[nvq->done_idx].len = 0;

One of the reasons for sendmsg to fail is ENOBUFS.
In that case for sure we don't want to drop packet.
There could be other transient errors.
Which error did you encounter, specifically?

> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
>  
>  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>  		err = sock->ops->sendmsg(sock, &msg, len);
> -		if (unlikely(err < 0)) {
> -			if (zcopy_used) {
> +		if (unlikely(err < 0 || err != len)) {
> +			if (zcopy_used && err < 0)
>  				vhost_net_ubuf_put(ubufs);
> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
> -					% UIO_MAXIOV;
> -			}
> -			vhost_discard_vq_desc(vq, 1);
> -			vhost_net_enable_vq(net, vq);
> -			break;
> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
>  		}
> -		if (err != len)
> -			pr_debug("Truncated TX packet: "
> -				 " len %d != %zd\n", err, len);
>  		if (!zcopy_used)
>  			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>  		else
> -- 
> 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
@ 2020-12-09 12:49   ` Michael S. Tsirkin
  0 siblings, 0 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2020-12-09 12:49 UTC (permalink / raw)
  To: wangyunjian; +Cc: netdev, jerry.lilijun, virtualization, chenchanghu, xudingke

On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
> From: Yunjian Wang <wangyunjian@huawei.com>
> 
> Currently we break the loop and wake up the vhost_worker when
> sendmsg fails. When the worker wakes up again, we'll meet the
> same error. This will cause high CPU load. To fix this issue,
> we can skip this description by ignoring the error.
> 
> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
> ---
>  drivers/vhost/net.c | 24 +++++-------------------
>  1 file changed, 5 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 531a00d703cd..ac950b1120f5 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>  
>  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>  		err = sock->ops->sendmsg(sock, &msg, len);
> -		if (unlikely(err < 0)) {
> -			vhost_discard_vq_desc(vq, 1);
> -			vhost_net_enable_vq(net, vq);
> -			break;
> -		}
> -		if (err != len)
> -			pr_debug("Truncated TX packet: len %d != %zd\n",
> -				 err, len);
> +		if (unlikely(err < 0 || err != len))
> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
>  done:
>  		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>  		vq->heads[nvq->done_idx].len = 0;

One of the reasons for sendmsg to fail is ENOBUFS.
In that case for sure we don't want to drop packet.
There could be other transient errors.
Which error did you encounter, specifically?

> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
>  
>  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>  		err = sock->ops->sendmsg(sock, &msg, len);
> -		if (unlikely(err < 0)) {
> -			if (zcopy_used) {
> +		if (unlikely(err < 0 || err != len)) {
> +			if (zcopy_used && err < 0)
>  				vhost_net_ubuf_put(ubufs);
> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
> -					% UIO_MAXIOV;
> -			}
> -			vhost_discard_vq_desc(vq, 1);
> -			vhost_net_enable_vq(net, vq);
> -			break;
> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err, len);
>  		}
> -		if (err != len)
> -			pr_debug("Truncated TX packet: "
> -				 " len %d != %zd\n", err, len);
>  		if (!zcopy_used)
>  			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>  		else
> -- 
> 2.23.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-09 12:49   ` Michael S. Tsirkin
  (?)
@ 2020-12-09 13:27   ` wangyunjian
  2020-12-11  2:52       ` Jason Wang
  -1 siblings, 1 reply; 11+ messages in thread
From: wangyunjian @ 2020-12-09 13:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: jasowang, virtualization, netdev, Lilijun (Jerry), chenchanghu, xudingke

> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Wednesday, December 9, 2020 8:50 PM
> To: wangyunjian <wangyunjian@huawei.com>
> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
> chenchanghu <chenchanghu@huawei.com>; xudingke <xudingke@huawei.com>
> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
> 
> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
> > From: Yunjian Wang <wangyunjian@huawei.com>
> >
> > Currently we break the loop and wake up the vhost_worker when sendmsg
> > fails. When the worker wakes up again, we'll meet the same error. This
> > will cause high CPU load. To fix this issue, we can skip this
> > description by ignoring the error.
> >
> > Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
> > ---
> >  drivers/vhost/net.c | 24 +++++-------------------
> >  1 file changed, 5 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
> > 531a00d703cd..ac950b1120f5 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net,
> > struct socket *sock)
> >
> >  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
> >  		err = sock->ops->sendmsg(sock, &msg, len);
> > -		if (unlikely(err < 0)) {
> > -			vhost_discard_vq_desc(vq, 1);
> > -			vhost_net_enable_vq(net, vq);
> > -			break;
> > -		}
> > -		if (err != len)
> > -			pr_debug("Truncated TX packet: len %d != %zd\n",
> > -				 err, len);
> > +		if (unlikely(err < 0 || err != len))
> > +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
> > +len);
> >  done:
> >  		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> >  		vq->heads[nvq->done_idx].len = 0;
> 
> One of the reasons for sendmsg to fail is ENOBUFS.
> In that case for sure we don't want to drop packet.

Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.

> There could be other transient errors.
> Which error did you encounter, specifically?

Currently a guest vm send a skb which length is more than 64k.
If virtio hdr is wrong, the problem will also be triggered.

Thanks

> 
> > @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net
> > *net, struct socket *sock)
> >
> >  		/* TODO: Check specific error and bomb out unless ENOBUFS? */
> >  		err = sock->ops->sendmsg(sock, &msg, len);
> > -		if (unlikely(err < 0)) {
> > -			if (zcopy_used) {
> > +		if (unlikely(err < 0 || err != len)) {
> > +			if (zcopy_used && err < 0)
> >  				vhost_net_ubuf_put(ubufs);
> > -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
> > -					% UIO_MAXIOV;
> > -			}
> > -			vhost_discard_vq_desc(vq, 1);
> > -			vhost_net_enable_vq(net, vq);
> > -			break;
> > +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
> > +len);
> >  		}
> > -		if (err != len)
> > -			pr_debug("Truncated TX packet: "
> > -				 " len %d != %zd\n", err, len);
> >  		if (!zcopy_used)
> >  			vhost_add_used_and_signal(&net->dev, vq, head, 0);
> >  		else
> > --
> > 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-09 13:27   ` wangyunjian
@ 2020-12-11  2:52       ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-11  2:52 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: virtualization, netdev, Lilijun (Jerry), chenchanghu, xudingke


On 2020/12/9 下午9:27, wangyunjian wrote:
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> Sent: Wednesday, December 9, 2020 8:50 PM
>> To: wangyunjian <wangyunjian@huawei.com>
>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>> chenchanghu <chenchanghu@huawei.com>; xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>
>>> Currently we break the loop and wake up the vhost_worker when sendmsg
>>> fails. When the worker wakes up again, we'll meet the same error. This
>>> will cause high CPU load. To fix this issue, we can skip this
>>> description by ignoring the error.
>>>
>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>> ---
>>>   drivers/vhost/net.c | 24 +++++-------------------
>>>   1 file changed, 5 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>> 531a00d703cd..ac950b1120f5 100644
>>> --- a/drivers/vhost/net.c
>>> +++ b/drivers/vhost/net.c
>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net,
>>> struct socket *sock)
>>>
>>>   		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>>>   		err = sock->ops->sendmsg(sock, &msg, len);
>>> -		if (unlikely(err < 0)) {
>>> -			vhost_discard_vq_desc(vq, 1);
>>> -			vhost_net_enable_vq(net, vq);
>>> -			break;
>>> -		}
>>> -		if (err != len)
>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>> -				 err, len);
>>> +		if (unlikely(err < 0 || err != len))
>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
>>> +len);
>>>   done:
>>>   		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>   		vq->heads[nvq->done_idx].len = 0;
>> One of the reasons for sendmsg to fail is ENOBUFS.
>> In that case for sure we don't want to drop packet.
> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.


I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().

Thanks


>
>> There could be other transient errors.
>> Which error did you encounter, specifically?
> Currently a guest vm send a skb which length is more than 64k.
> If virtio hdr is wrong, the problem will also be triggered.
>
> Thanks
>
>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net
>>> *net, struct socket *sock)
>>>
>>>   		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>>>   		err = sock->ops->sendmsg(sock, &msg, len);
>>> -		if (unlikely(err < 0)) {
>>> -			if (zcopy_used) {
>>> +		if (unlikely(err < 0 || err != len)) {
>>> +			if (zcopy_used && err < 0)
>>>   				vhost_net_ubuf_put(ubufs);
>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>> -					% UIO_MAXIOV;
>>> -			}
>>> -			vhost_discard_vq_desc(vq, 1);
>>> -			vhost_net_enable_vq(net, vq);
>>> -			break;
>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
>>> +len);
>>>   		}
>>> -		if (err != len)
>>> -			pr_debug("Truncated TX packet: "
>>> -				 " len %d != %zd\n", err, len);
>>>   		if (!zcopy_used)
>>>   			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>   		else
>>> --
>>> 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
@ 2020-12-11  2:52       ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-11  2:52 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: netdev, xudingke, Lilijun (Jerry), chenchanghu, virtualization


On 2020/12/9 下午9:27, wangyunjian wrote:
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> Sent: Wednesday, December 9, 2020 8:50 PM
>> To: wangyunjian <wangyunjian@huawei.com>
>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>> chenchanghu <chenchanghu@huawei.com>; xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>
>>> Currently we break the loop and wake up the vhost_worker when sendmsg
>>> fails. When the worker wakes up again, we'll meet the same error. This
>>> will cause high CPU load. To fix this issue, we can skip this
>>> description by ignoring the error.
>>>
>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>> ---
>>>   drivers/vhost/net.c | 24 +++++-------------------
>>>   1 file changed, 5 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>> 531a00d703cd..ac950b1120f5 100644
>>> --- a/drivers/vhost/net.c
>>> +++ b/drivers/vhost/net.c
>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net *net,
>>> struct socket *sock)
>>>
>>>   		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>>>   		err = sock->ops->sendmsg(sock, &msg, len);
>>> -		if (unlikely(err < 0)) {
>>> -			vhost_discard_vq_desc(vq, 1);
>>> -			vhost_net_enable_vq(net, vq);
>>> -			break;
>>> -		}
>>> -		if (err != len)
>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>> -				 err, len);
>>> +		if (unlikely(err < 0 || err != len))
>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
>>> +len);
>>>   done:
>>>   		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>   		vq->heads[nvq->done_idx].len = 0;
>> One of the reasons for sendmsg to fail is ENOBUFS.
>> In that case for sure we don't want to drop packet.
> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.


I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().

Thanks


>
>> There could be other transient errors.
>> Which error did you encounter, specifically?
> Currently a guest vm send a skb which length is more than 64k.
> If virtio hdr is wrong, the problem will also be triggered.
>
> Thanks
>
>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct vhost_net
>>> *net, struct socket *sock)
>>>
>>>   		/* TODO: Check specific error and bomb out unless ENOBUFS? */
>>>   		err = sock->ops->sendmsg(sock, &msg, len);
>>> -		if (unlikely(err < 0)) {
>>> -			if (zcopy_used) {
>>> +		if (unlikely(err < 0 || err != len)) {
>>> +			if (zcopy_used && err < 0)
>>>   				vhost_net_ubuf_put(ubufs);
>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>> -					% UIO_MAXIOV;
>>> -			}
>>> -			vhost_discard_vq_desc(vq, 1);
>>> -			vhost_net_enable_vq(net, vq);
>>> -			break;
>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n", err,
>>> +len);
>>>   		}
>>> -		if (err != len)
>>> -			pr_debug("Truncated TX packet: "
>>> -				 " len %d != %zd\n", err, len);
>>>   		if (!zcopy_used)
>>>   			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>   		else
>>> --
>>> 2.23.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-11  2:52       ` Jason Wang
  (?)
@ 2020-12-11  7:37       ` wangyunjian
  2020-12-14  3:13           ` Jason Wang
  2020-12-14  3:13           ` Jason Wang
  -1 siblings, 2 replies; 11+ messages in thread
From: wangyunjian @ 2020-12-11  7:37 UTC (permalink / raw)
  To: Jason Wang, Michael S. Tsirkin
  Cc: virtualization, netdev, Lilijun (Jerry),
	chenchanghu, xudingke, huangbin (J)

> -----Original Message-----
> From: Jason Wang [mailto:jasowang@redhat.com]
> Sent: Friday, December 11, 2020 10:53 AM
> To: wangyunjian <wangyunjian@huawei.com>; Michael S. Tsirkin
> <mst@redhat.com>
> Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
> (Jerry) <jerry.lilijun@huawei.com>; chenchanghu <chenchanghu@huawei.com>;
> xudingke <xudingke@huawei.com>
> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
> 
> 
> On 2020/12/9 下午9:27, wangyunjian wrote:
> >> -----Original Message-----
> >> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> >> Sent: Wednesday, December 9, 2020 8:50 PM
> >> To: wangyunjian <wangyunjian@huawei.com>
> >> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
> >> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
> >> chenchanghu <chenchanghu@huawei.com>; xudingke
> <xudingke@huawei.com>
> >> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg
> >> fails
> >>
> >> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
> >>> From: Yunjian Wang <wangyunjian@huawei.com>
> >>>
> >>> Currently we break the loop and wake up the vhost_worker when
> >>> sendmsg fails. When the worker wakes up again, we'll meet the same
> >>> error. This will cause high CPU load. To fix this issue, we can skip
> >>> this description by ignoring the error.
> >>>
> >>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
> >>> ---
> >>>   drivers/vhost/net.c | 24 +++++-------------------
> >>>   1 file changed, 5 insertions(+), 19 deletions(-)
> >>>
> >>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
> >>> 531a00d703cd..ac950b1120f5 100644
> >>> --- a/drivers/vhost/net.c
> >>> +++ b/drivers/vhost/net.c
> >>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net
> >>> *net, struct socket *sock)
> >>>
> >>>   		/* TODO: Check specific error and bomb out unless ENOBUFS?
> */
> >>>   		err = sock->ops->sendmsg(sock, &msg, len);
> >>> -		if (unlikely(err < 0)) {
> >>> -			vhost_discard_vq_desc(vq, 1);
> >>> -			vhost_net_enable_vq(net, vq);
> >>> -			break;
> >>> -		}
> >>> -		if (err != len)
> >>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
> >>> -				 err, len);
> >>> +		if (unlikely(err < 0 || err != len))
> >>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
> err,
> >>> +len);
> >>>   done:
> >>>   		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> >>>   		vq->heads[nvq->done_idx].len = 0;
> >> One of the reasons for sendmsg to fail is ENOBUFS.
> >> In that case for sure we don't want to drop packet.
> > Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.
> 
> 
> I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().

This patch 'net: add alloc_skb_with_frags() helper' modifys the return value
of sock_alloc_send_pskb() from -ENOBUFS to -EAGAIN when we exceeds sndbuf.
So the return value of tun_alloc_skb has been changed.

We don't drop packet if the reasons for sendmsg to fail is EAGAIN.
How about this?

Thanks

> 
> Thanks
> 
> 
> >
> >> There could be other transient errors.
> >> Which error did you encounter, specifically?
> > Currently a guest vm send a skb which length is more than 64k.
> > If virtio hdr is wrong, the problem will also be triggered.
> >
> > Thanks
> >
> >>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct
> >>> vhost_net *net, struct socket *sock)
> >>>
> >>>   		/* TODO: Check specific error and bomb out unless ENOBUFS?
> */
> >>>   		err = sock->ops->sendmsg(sock, &msg, len);
> >>> -		if (unlikely(err < 0)) {
> >>> -			if (zcopy_used) {
> >>> +		if (unlikely(err < 0 || err != len)) {
> >>> +			if (zcopy_used && err < 0)
> >>>   				vhost_net_ubuf_put(ubufs);
> >>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
> >>> -					% UIO_MAXIOV;
> >>> -			}
> >>> -			vhost_discard_vq_desc(vq, 1);
> >>> -			vhost_net_enable_vq(net, vq);
> >>> -			break;
> >>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
> err,
> >>> +len);
> >>>   		}
> >>> -		if (err != len)
> >>> -			pr_debug("Truncated TX packet: "
> >>> -				 " len %d != %zd\n", err, len);
> >>>   		if (!zcopy_used)
> >>>   			vhost_add_used_and_signal(&net->dev, vq, head, 0);
> >>>   		else
> >>> --
> >>> 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-11  7:37       ` wangyunjian
@ 2020-12-14  3:13           ` Jason Wang
  2020-12-14  3:13           ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-14  3:13 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: virtualization, netdev, Lilijun (Jerry),
	chenchanghu, xudingke, huangbin (J)


On 2020/12/11 下午3:37, wangyunjian wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Friday, December 11, 2020 10:53 AM
>> To: wangyunjian <wangyunjian@huawei.com>; Michael S. Tsirkin
>> <mst@redhat.com>
>> Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
>> (Jerry) <jerry.lilijun@huawei.com>; chenchanghu <chenchanghu@huawei.com>;
>> xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>>
>> On 2020/12/9 下午9:27, wangyunjian wrote:
>>>> -----Original Message-----
>>>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>>>> Sent: Wednesday, December 9, 2020 8:50 PM
>>>> To: wangyunjian <wangyunjian@huawei.com>
>>>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>>>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>>>> chenchanghu <chenchanghu@huawei.com>; xudingke
>> <xudingke@huawei.com>
>>>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg
>>>> fails
>>>>
>>>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>>>
>>>>> Currently we break the loop and wake up the vhost_worker when
>>>>> sendmsg fails. When the worker wakes up again, we'll meet the same
>>>>> error. This will cause high CPU load. To fix this issue, we can skip
>>>>> this description by ignoring the error.
>>>>>
>>>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>>>> ---
>>>>>    drivers/vhost/net.c | 24 +++++-------------------
>>>>>    1 file changed, 5 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>>>> 531a00d703cd..ac950b1120f5 100644
>>>>> --- a/drivers/vhost/net.c
>>>>> +++ b/drivers/vhost/net.c
>>>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net
>>>>> *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> -		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>>>> -				 err, len);
>>>>> +		if (unlikely(err < 0 || err != len))
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    done:
>>>>>    		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>>>    		vq->heads[nvq->done_idx].len = 0;
>>>> One of the reasons for sendmsg to fail is ENOBUFS.
>>>> In that case for sure we don't want to drop packet.
>>> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.
>>
>> I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().
> This patch 'net: add alloc_skb_with_frags() helper' modifys the return value
> of sock_alloc_send_pskb() from -ENOBUFS to -EAGAIN when we exceeds sndbuf.
> So the return value of tun_alloc_skb has been changed.


Ok.


>
> We don't drop packet if the reasons for sendmsg to fail is EAGAIN.
> How about this?


It should work.

Btw, the patch doesn't add the head to the used ring. This may confuses 
the driver.

Thanks


>
> Thanks
>
>> Thanks
>>
>>
>>>> There could be other transient errors.
>>>> Which error did you encounter, specifically?
>>> Currently a guest vm send a skb which length is more than 64k.
>>> If virtio hdr is wrong, the problem will also be triggered.
>>>
>>> Thanks
>>>
>>>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct
>>>>> vhost_net *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			if (zcopy_used) {
>>>>> +		if (unlikely(err < 0 || err != len)) {
>>>>> +			if (zcopy_used && err < 0)
>>>>>    				vhost_net_ubuf_put(ubufs);
>>>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>>>> -					% UIO_MAXIOV;
>>>>> -			}
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: "
>>>>> -				 " len %d != %zd\n", err, len);
>>>>>    		if (!zcopy_used)
>>>>>    			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>>>    		else
>>>>> --
>>>>> 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
@ 2020-12-14  3:13           ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-14  3:13 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: netdev, Lilijun (Jerry),
	virtualization, chenchanghu, huangbin (J),
	xudingke


On 2020/12/11 下午3:37, wangyunjian wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Friday, December 11, 2020 10:53 AM
>> To: wangyunjian <wangyunjian@huawei.com>; Michael S. Tsirkin
>> <mst@redhat.com>
>> Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
>> (Jerry) <jerry.lilijun@huawei.com>; chenchanghu <chenchanghu@huawei.com>;
>> xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>>
>> On 2020/12/9 下午9:27, wangyunjian wrote:
>>>> -----Original Message-----
>>>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>>>> Sent: Wednesday, December 9, 2020 8:50 PM
>>>> To: wangyunjian <wangyunjian@huawei.com>
>>>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>>>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>>>> chenchanghu <chenchanghu@huawei.com>; xudingke
>> <xudingke@huawei.com>
>>>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg
>>>> fails
>>>>
>>>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>>>
>>>>> Currently we break the loop and wake up the vhost_worker when
>>>>> sendmsg fails. When the worker wakes up again, we'll meet the same
>>>>> error. This will cause high CPU load. To fix this issue, we can skip
>>>>> this description by ignoring the error.
>>>>>
>>>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>>>> ---
>>>>>    drivers/vhost/net.c | 24 +++++-------------------
>>>>>    1 file changed, 5 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>>>> 531a00d703cd..ac950b1120f5 100644
>>>>> --- a/drivers/vhost/net.c
>>>>> +++ b/drivers/vhost/net.c
>>>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net
>>>>> *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> -		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>>>> -				 err, len);
>>>>> +		if (unlikely(err < 0 || err != len))
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    done:
>>>>>    		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>>>    		vq->heads[nvq->done_idx].len = 0;
>>>> One of the reasons for sendmsg to fail is ENOBUFS.
>>>> In that case for sure we don't want to drop packet.
>>> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.
>>
>> I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().
> This patch 'net: add alloc_skb_with_frags() helper' modifys the return value
> of sock_alloc_send_pskb() from -ENOBUFS to -EAGAIN when we exceeds sndbuf.
> So the return value of tun_alloc_skb has been changed.


Ok.


>
> We don't drop packet if the reasons for sendmsg to fail is EAGAIN.
> How about this?


It should work.

Btw, the patch doesn't add the head to the used ring. This may confuses 
the driver.

Thanks


>
> Thanks
>
>> Thanks
>>
>>
>>>> There could be other transient errors.
>>>> Which error did you encounter, specifically?
>>> Currently a guest vm send a skb which length is more than 64k.
>>> If virtio hdr is wrong, the problem will also be triggered.
>>>
>>> Thanks
>>>
>>>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct
>>>>> vhost_net *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			if (zcopy_used) {
>>>>> +		if (unlikely(err < 0 || err != len)) {
>>>>> +			if (zcopy_used && err < 0)
>>>>>    				vhost_net_ubuf_put(ubufs);
>>>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>>>> -					% UIO_MAXIOV;
>>>>> -			}
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: "
>>>>> -				 " len %d != %zd\n", err, len);
>>>>>    		if (!zcopy_used)
>>>>>    			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>>>    		else
>>>>> --
>>>>> 2.23.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
  2020-12-11  7:37       ` wangyunjian
@ 2020-12-14  3:13           ` Jason Wang
  2020-12-14  3:13           ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-14  3:13 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: virtualization, netdev, Lilijun (Jerry),
	chenchanghu, xudingke, huangbin (J)


On 2020/12/11 下午3:37, wangyunjian wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Friday, December 11, 2020 10:53 AM
>> To: wangyunjian <wangyunjian@huawei.com>; Michael S. Tsirkin
>> <mst@redhat.com>
>> Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
>> (Jerry) <jerry.lilijun@huawei.com>; chenchanghu <chenchanghu@huawei.com>;
>> xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>>
>> On 2020/12/9 下午9:27, wangyunjian wrote:
>>>> -----Original Message-----
>>>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>>>> Sent: Wednesday, December 9, 2020 8:50 PM
>>>> To: wangyunjian <wangyunjian@huawei.com>
>>>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>>>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>>>> chenchanghu <chenchanghu@huawei.com>; xudingke
>> <xudingke@huawei.com>
>>>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg
>>>> fails
>>>>
>>>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>>>
>>>>> Currently we break the loop and wake up the vhost_worker when
>>>>> sendmsg fails. When the worker wakes up again, we'll meet the same
>>>>> error. This will cause high CPU load. To fix this issue, we can skip
>>>>> this description by ignoring the error.
>>>>>
>>>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>>>> ---
>>>>>    drivers/vhost/net.c | 24 +++++-------------------
>>>>>    1 file changed, 5 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>>>> 531a00d703cd..ac950b1120f5 100644
>>>>> --- a/drivers/vhost/net.c
>>>>> +++ b/drivers/vhost/net.c
>>>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net
>>>>> *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> -		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>>>> -				 err, len);
>>>>> +		if (unlikely(err < 0 || err != len))
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    done:
>>>>>    		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>>>    		vq->heads[nvq->done_idx].len = 0;
>>>> One of the reasons for sendmsg to fail is ENOBUFS.
>>>> In that case for sure we don't want to drop packet.
>>> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.
>>
>> I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().
> This patch 'net: add alloc_skb_with_frags() helper' modifys the return value
> of sock_alloc_send_pskb() from -ENOBUFS to -EAGAIN when we exceeds sndbuf.
> So the return value of tun_alloc_skb has been changed.


Ok.


>
> We don't drop packet if the reasons for sendmsg to fail is EAGAIN.
> How about this?


It should work.

Btw, the patch doesn't add the head to the used ring. This may confuse 
the driver.

Thanks


>
> Thanks
>
>> Thanks
>>
>>
>>>> There could be other transient errors.
>>>> Which error did you encounter, specifically?
>>> Currently a guest vm send a skb which length is more than 64k.
>>> If virtio hdr is wrong, the problem will also be triggered.
>>>
>>> Thanks
>>>
>>>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct
>>>>> vhost_net *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			if (zcopy_used) {
>>>>> +		if (unlikely(err < 0 || err != len)) {
>>>>> +			if (zcopy_used && err < 0)
>>>>>    				vhost_net_ubuf_put(ubufs);
>>>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>>>> -					% UIO_MAXIOV;
>>>>> -			}
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: "
>>>>> -				 " len %d != %zd\n", err, len);
>>>>>    		if (!zcopy_used)
>>>>>    			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>>>    		else
>>>>> --
>>>>> 2.23.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
@ 2020-12-14  3:13           ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2020-12-14  3:13 UTC (permalink / raw)
  To: wangyunjian, Michael S. Tsirkin
  Cc: netdev, Lilijun (Jerry),
	virtualization, chenchanghu, huangbin (J),
	xudingke


On 2020/12/11 下午3:37, wangyunjian wrote:
>> -----Original Message-----
>> From: Jason Wang [mailto:jasowang@redhat.com]
>> Sent: Friday, December 11, 2020 10:53 AM
>> To: wangyunjian <wangyunjian@huawei.com>; Michael S. Tsirkin
>> <mst@redhat.com>
>> Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
>> (Jerry) <jerry.lilijun@huawei.com>; chenchanghu <chenchanghu@huawei.com>;
>> xudingke <xudingke@huawei.com>
>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg fails
>>
>>
>> On 2020/12/9 下午9:27, wangyunjian wrote:
>>>> -----Original Message-----
>>>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>>>> Sent: Wednesday, December 9, 2020 8:50 PM
>>>> To: wangyunjian <wangyunjian@huawei.com>
>>>> Cc: jasowang@redhat.com; virtualization@lists.linux-foundation.org;
>>>> netdev@vger.kernel.org; Lilijun (Jerry) <jerry.lilijun@huawei.com>;
>>>> chenchanghu <chenchanghu@huawei.com>; xudingke
>> <xudingke@huawei.com>
>>>> Subject: Re: [PATCH net] vhost_net: fix high cpu load when sendmsg
>>>> fails
>>>>
>>>> On Wed, Dec 09, 2020 at 07:48:24PM +0800, wangyunjian wrote:
>>>>> From: Yunjian Wang <wangyunjian@huawei.com>
>>>>>
>>>>> Currently we break the loop and wake up the vhost_worker when
>>>>> sendmsg fails. When the worker wakes up again, we'll meet the same
>>>>> error. This will cause high CPU load. To fix this issue, we can skip
>>>>> this description by ignoring the error.
>>>>>
>>>>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>>>>> ---
>>>>>    drivers/vhost/net.c | 24 +++++-------------------
>>>>>    1 file changed, 5 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index
>>>>> 531a00d703cd..ac950b1120f5 100644
>>>>> --- a/drivers/vhost/net.c
>>>>> +++ b/drivers/vhost/net.c
>>>>> @@ -829,14 +829,8 @@ static void handle_tx_copy(struct vhost_net
>>>>> *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> -		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: len %d != %zd\n",
>>>>> -				 err, len);
>>>>> +		if (unlikely(err < 0 || err != len))
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    done:
>>>>>    		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
>>>>>    		vq->heads[nvq->done_idx].len = 0;
>>>> One of the reasons for sendmsg to fail is ENOBUFS.
>>>> In that case for sure we don't want to drop packet.
>>> Now the function tap_sendmsg()/tun_sendmsg() don't return ENOBUFS.
>>
>> I think not, it can happen if we exceeds sndbuf. E.g see tun_alloc_skb().
> This patch 'net: add alloc_skb_with_frags() helper' modifys the return value
> of sock_alloc_send_pskb() from -ENOBUFS to -EAGAIN when we exceeds sndbuf.
> So the return value of tun_alloc_skb has been changed.


Ok.


>
> We don't drop packet if the reasons for sendmsg to fail is EAGAIN.
> How about this?


It should work.

Btw, the patch doesn't add the head to the used ring. This may confuse 
the driver.

Thanks


>
> Thanks
>
>> Thanks
>>
>>
>>>> There could be other transient errors.
>>>> Which error did you encounter, specifically?
>>> Currently a guest vm send a skb which length is more than 64k.
>>> If virtio hdr is wrong, the problem will also be triggered.
>>>
>>> Thanks
>>>
>>>>> @@ -925,19 +919,11 @@ static void handle_tx_zerocopy(struct
>>>>> vhost_net *net, struct socket *sock)
>>>>>
>>>>>    		/* TODO: Check specific error and bomb out unless ENOBUFS?
>> */
>>>>>    		err = sock->ops->sendmsg(sock, &msg, len);
>>>>> -		if (unlikely(err < 0)) {
>>>>> -			if (zcopy_used) {
>>>>> +		if (unlikely(err < 0 || err != len)) {
>>>>> +			if (zcopy_used && err < 0)
>>>>>    				vhost_net_ubuf_put(ubufs);
>>>>> -				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
>>>>> -					% UIO_MAXIOV;
>>>>> -			}
>>>>> -			vhost_discard_vq_desc(vq, 1);
>>>>> -			vhost_net_enable_vq(net, vq);
>>>>> -			break;
>>>>> +			vq_err(vq, "Fail to sending packets err : %d, len : %zd\n",
>> err,
>>>>> +len);
>>>>>    		}
>>>>> -		if (err != len)
>>>>> -			pr_debug("Truncated TX packet: "
>>>>> -				 " len %d != %zd\n", err, len);
>>>>>    		if (!zcopy_used)
>>>>>    			vhost_add_used_and_signal(&net->dev, vq, head, 0);
>>>>>    		else
>>>>> --
>>>>> 2.23.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-12-14  3:15 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-09 11:48 [PATCH net] vhost_net: fix high cpu load when sendmsg fails wangyunjian
2020-12-09 12:49 ` Michael S. Tsirkin
2020-12-09 12:49   ` Michael S. Tsirkin
2020-12-09 13:27   ` wangyunjian
2020-12-11  2:52     ` Jason Wang
2020-12-11  2:52       ` Jason Wang
2020-12-11  7:37       ` wangyunjian
2020-12-14  3:13         ` Jason Wang
2020-12-14  3:13           ` Jason Wang
2020-12-14  3:13         ` Jason Wang
2020-12-14  3:13           ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.