All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: Stefano Garzarella <sgarzare@redhat.com>, netdev@vger.kernel.org
Cc: kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	Stefan Hajnoczi <stefanha@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [PATCH v2 1/8] vsock/virtio: limit the memory used per-socket
Date: Mon, 13 May 2019 17:58:53 +0800	[thread overview]
Message-ID: <3b275b52-63d9-d260-1652-8e8bf7dd679f__11187.4601499707$1557741555$gmane$org@redhat.com> (raw)
In-Reply-To: <20190510125843.95587-2-sgarzare@redhat.com>


On 2019/5/10 下午8:58, Stefano Garzarella wrote:
> Since virtio-vsock was introduced, the buffers filled by the host
> and pushed to the guest using the vring, are directly queued in
> a per-socket list avoiding to copy it.
> These buffers are preallocated by the guest with a fixed
> size (4 KB).
>
> The maximum amount of memory used by each socket should be
> controlled by the credit mechanism.
> The default credit available per-socket is 256 KB, but if we use
> only 1 byte per packet, the guest can queue up to 262144 of 4 KB
> buffers, using up to 1 GB of memory per-socket. In addition, the
> guest will continue to fill the vring with new 4 KB free buffers
> to avoid starvation of other sockets.
>
> This patch solves this issue copying the payload in a new buffer.
> Then it is queued in the per-socket list, and the 4KB buffer used
> by the host is freed.
>
> In this way, the memory used by each socket respects the credit
> available, and we still avoid starvation, paying the cost of an
> extra memory copy. When the buffer is completely full we do a
> "zero-copy", moving the buffer directly in the per-socket list.


I wonder in the long run we should use generic socket accouting 
mechanism provided by kernel (e.g socket, skb, sndbuf, recvbug, 
truesize) instead of vsock specific thing to avoid duplicating efforts.


>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>   drivers/vhost/vsock.c                   |  2 +
>   include/linux/virtio_vsock.h            |  8 +++
>   net/vmw_vsock/virtio_transport.c        |  1 +
>   net/vmw_vsock/virtio_transport_common.c | 95 ++++++++++++++++++-------
>   4 files changed, 81 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index bb5fc0e9fbc2..7964e2daee09 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -320,6 +320,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>   		return NULL;
>   	}
>   
> +	pkt->buf_len = pkt->len;
> +
>   	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>   	if (nbytes != pkt->len) {
>   		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index e223e2632edd..345f04ee9193 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -54,9 +54,17 @@ struct virtio_vsock_pkt {
>   	void *buf;
>   	u32 len;
>   	u32 off;
> +	u32 buf_len;
>   	bool reply;
>   };
>   
> +struct virtio_vsock_buf {
> +	struct list_head list;
> +	void *addr;
> +	u32 len;
> +	u32 off;
> +};
> +
>   struct virtio_vsock_pkt_info {
>   	u32 remote_cid, remote_port;
>   	struct vsock_sock *vsk;
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index 15eb5d3d4750..af1d2ce12f54 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -280,6 +280,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
>   			break;
>   		}
>   
> +		pkt->buf_len = buf_len;
>   		pkt->len = buf_len;
>   
>   		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index 602715fc9a75..0248d6808755 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -65,6 +65,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>   		pkt->buf = kmalloc(len, GFP_KERNEL);
>   		if (!pkt->buf)
>   			goto out_pkt;
> +
> +		pkt->buf_len = len;
> +
>   		err = memcpy_from_msg(pkt->buf, info->msg, len);
>   		if (err)
>   			goto out;
> @@ -86,6 +89,46 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
>   	return NULL;
>   }
>   
> +static struct virtio_vsock_buf *
> +virtio_transport_alloc_buf(struct virtio_vsock_pkt *pkt, bool zero_copy)
> +{
> +	struct virtio_vsock_buf *buf;
> +
> +	if (pkt->len == 0)
> +		return NULL;
> +
> +	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
> +	if (!buf)
> +		return NULL;
> +
> +	/* If the buffer in the virtio_vsock_pkt is full, we can move it to
> +	 * the new virtio_vsock_buf avoiding the copy, because we are sure that
> +	 * we are not use more memory than that counted by the credit mechanism.
> +	 */
> +	if (zero_copy && pkt->len == pkt->buf_len) {
> +		buf->addr = pkt->buf;
> +		pkt->buf = NULL;
> +	} else {


Is the copy still needed if we're just few bytes less? We meet similar 
issue for virito-net, and virtio-net solve this by always copy first 
128bytes for big packets.

See receive_big().

Thanks


> +		buf->addr = kmalloc(pkt->len, GFP_KERNEL);
> +		if (!buf->addr) {
> +			kfree(buf);
> +			return NULL;
> +		}
> +
> +		memcpy(buf->addr, pkt->buf, pkt->len);
> +	}
> +
> +	buf->len = pkt->len;
> +
> +	return buf;
> +}
> +
> +static void virtio_transport_free_buf(struct virtio_vsock_buf *buf)
> +{
> +	kfree(buf->addr);
> +	kfree(buf);
> +}
> +
>   /* Packet capture */
>   static struct sk_buff *virtio_transport_build_skb(void *opaque)
>   {
> @@ -190,17 +233,15 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>   	return virtio_transport_get_ops()->send_pkt(pkt);
>   }
>   
> -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
> -					struct virtio_vsock_pkt *pkt)
> +static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len)
>   {
> -	vvs->rx_bytes += pkt->len;
> +	vvs->rx_bytes += len;
>   }
>   
> -static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
> -					struct virtio_vsock_pkt *pkt)
> +static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, u32 len)
>   {
> -	vvs->rx_bytes -= pkt->len;
> -	vvs->fwd_cnt += pkt->len;
> +	vvs->rx_bytes -= len;
> +	vvs->fwd_cnt += len;
>   }
>   
>   void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
> @@ -254,36 +295,36 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>   				   size_t len)
>   {
>   	struct virtio_vsock_sock *vvs = vsk->trans;
> -	struct virtio_vsock_pkt *pkt;
> +	struct virtio_vsock_buf *buf;
>   	size_t bytes, total = 0;
>   	int err = -EFAULT;
>   
>   	spin_lock_bh(&vvs->rx_lock);
>   	while (total < len && !list_empty(&vvs->rx_queue)) {
> -		pkt = list_first_entry(&vvs->rx_queue,
> -				       struct virtio_vsock_pkt, list);
> +		buf = list_first_entry(&vvs->rx_queue,
> +				       struct virtio_vsock_buf, list);
>   
>   		bytes = len - total;
> -		if (bytes > pkt->len - pkt->off)
> -			bytes = pkt->len - pkt->off;
> +		if (bytes > buf->len - buf->off)
> +			bytes = buf->len - buf->off;
>   
>   		/* sk_lock is held by caller so no one else can dequeue.
>   		 * Unlock rx_lock since memcpy_to_msg() may sleep.
>   		 */
>   		spin_unlock_bh(&vvs->rx_lock);
>   
> -		err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
> +		err = memcpy_to_msg(msg, buf->addr + buf->off, bytes);
>   		if (err)
>   			goto out;
>   
>   		spin_lock_bh(&vvs->rx_lock);
>   
>   		total += bytes;
> -		pkt->off += bytes;
> -		if (pkt->off == pkt->len) {
> -			virtio_transport_dec_rx_pkt(vvs, pkt);
> -			list_del(&pkt->list);
> -			virtio_transport_free_pkt(pkt);
> +		buf->off += bytes;
> +		if (buf->off == buf->len) {
> +			virtio_transport_dec_rx_pkt(vvs, buf->len);
> +			list_del(&buf->list);
> +			virtio_transport_free_buf(buf);
>   		}
>   	}
>   	spin_unlock_bh(&vvs->rx_lock);
> @@ -841,20 +882,24 @@ virtio_transport_recv_connected(struct sock *sk,
>   {
>   	struct vsock_sock *vsk = vsock_sk(sk);
>   	struct virtio_vsock_sock *vvs = vsk->trans;
> +	struct virtio_vsock_buf *buf;
>   	int err = 0;
>   
>   	switch (le16_to_cpu(pkt->hdr.op)) {
>   	case VIRTIO_VSOCK_OP_RW:
>   		pkt->len = le32_to_cpu(pkt->hdr.len);
> -		pkt->off = 0;
> +		buf = virtio_transport_alloc_buf(pkt, true);
>   
> -		spin_lock_bh(&vvs->rx_lock);
> -		virtio_transport_inc_rx_pkt(vvs, pkt);
> -		list_add_tail(&pkt->list, &vvs->rx_queue);
> -		spin_unlock_bh(&vvs->rx_lock);
> +		if (buf) {
> +			spin_lock_bh(&vvs->rx_lock);
> +			virtio_transport_inc_rx_pkt(vvs, pkt->len);
> +			list_add_tail(&buf->list, &vvs->rx_queue);
> +			spin_unlock_bh(&vvs->rx_lock);
>   
> -		sk->sk_data_ready(sk);
> -		return err;
> +			sk->sk_data_ready(sk);
> +		}
> +
> +		break;
>   	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
>   		sk->sk_write_space(sk);
>   		break;
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

  parent reply	other threads:[~2019-05-13  9:58 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-10 12:58 [PATCH v2 0/8] vsock/virtio: optimizations to increase the throughput Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 1/8] vsock/virtio: limit the memory used per-socket Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-12 16:57   ` Michael S. Tsirkin
2019-05-12 16:57     ` Michael S. Tsirkin
2019-05-13 16:40     ` Stefano Garzarella
2019-05-13 16:40     ` Stefano Garzarella
2019-05-13  9:58   ` Jason Wang
2019-05-13 17:23     ` Stefano Garzarella
2019-05-14  3:25       ` Jason Wang
2019-05-14  3:25       ` Jason Wang
2019-05-14  3:40         ` Jason Wang
2019-05-14  3:40         ` Jason Wang
2019-05-14 16:35         ` Stefano Garzarella
2019-05-14 16:35         ` Stefano Garzarella
2019-05-15  2:48           ` Jason Wang
2019-05-15  2:48             ` Jason Wang
2019-05-28 16:45             ` Stefano Garzarella
2019-05-28 16:45             ` Stefano Garzarella
2019-05-29  0:59               ` Jason Wang
2019-05-29  0:59               ` Jason Wang
2019-05-13 17:23     ` Stefano Garzarella
2019-05-13  9:58   ` Jason Wang [this message]
2019-05-16 15:25   ` Stefan Hajnoczi
2019-05-17  8:25     ` Stefano Garzarella
2019-05-17  8:25     ` Stefano Garzarella
2019-05-20  8:57       ` Stefan Hajnoczi
2019-05-20  8:57       ` Stefan Hajnoczi
2019-05-16 15:25   ` Stefan Hajnoczi
2019-05-10 12:58 ` [PATCH v2 2/8] vsock/virtio: free packets during the socket release Stefano Garzarella
2019-05-10 22:20   ` David Miller
2019-05-10 22:20   ` David Miller
2019-05-11  8:27     ` Stefano Garzarella
2019-05-11  8:27       ` Stefano Garzarella
2019-05-16 15:32   ` Stefan Hajnoczi
2019-05-16 15:32   ` Stefan Hajnoczi
2019-05-17  8:26     ` Stefano Garzarella
2019-05-17  8:26     ` Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 3/8] vsock/virtio: fix locking for fwd_cnt and buf_alloc Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 4/8] vsock/virtio: reduce credit update messages Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 5/8] vhost/vsock: split packets to send using multiple buffers Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 6/8] vsock/virtio: change the maximum packet size allowed Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 7/8] vsock/virtio: increase RX buffer size to 64 KiB Stefano Garzarella
2019-05-13 10:01   ` Jason Wang
2019-05-13 17:51     ` Stefano Garzarella
2019-05-13 17:51     ` Stefano Garzarella
2019-05-14  3:38       ` Jason Wang
2019-05-14 16:20         ` Stefano Garzarella
2019-05-14 16:20         ` Stefano Garzarella
2019-05-15  2:50           ` Jason Wang
2019-05-15  8:22             ` Stefano Garzarella
2019-05-15  8:22             ` Stefano Garzarella
2019-05-15  2:50           ` Jason Wang
2019-05-14  3:38       ` Jason Wang
2019-05-13 10:01   ` Jason Wang
2019-05-10 12:58 ` Stefano Garzarella
2019-05-10 12:58 ` [PATCH v2 8/8] vsock/virtio: make the RX buffer size tunable Stefano Garzarella
2019-05-10 12:58 ` Stefano Garzarella
2019-05-13 10:05   ` Jason Wang
2019-05-13 10:05     ` Jason Wang
2019-05-13 12:46     ` Jason Wang
2019-05-13 12:46     ` Jason Wang
2019-05-14 16:10       ` Stefano Garzarella
2019-05-14 16:10       ` Stefano Garzarella
2019-05-13  9:33 ` [PATCH v2 0/8] vsock/virtio: optimizations to increase the throughput Jason Wang
2019-05-13 16:49   ` Stefano Garzarella
2019-05-13 16:49   ` Stefano Garzarella
2019-05-20 14:09   ` Stefano Garzarella
2019-05-20 14:09   ` Stefano Garzarella
2019-05-13  9:33 ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='3b275b52-63d9-d260-1652-8e8bf7dd679f__11187.4601499707$1557741555$gmane$org@redhat.com' \
    --to=jasowang@redhat.com \
    --cc=davem@davemloft.net \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.