From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic
Date: Mon, 21 May 2018 17:04:25 +0800 [thread overview]
Message-ID: <1526893473-20128-5-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1526893473-20128-1-git-send-email-jasowang@redhat.com>
Instead of mixing zerocopy and datacopy logics, this patch tries to
split datacopy logic out. This results for a more compact code and
specific optimization could be done on top more easily.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/net.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 102 insertions(+), 9 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4ebac76..4682fcc 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -492,9 +492,95 @@ static bool vhost_has_more_pkts(struct vhost_net *net,
likely(!vhost_exceeds_maxpend(net));
}
+static void handle_tx_copy(struct vhost_net *net)
+{
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+ struct vhost_virtqueue *vq = &nvq->vq;
+ unsigned out, in;
+ int head;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT,
+ };
+ size_t len, total_len = 0;
+ int err;
+ size_t hdr_size;
+ struct socket *sock;
+ struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+ int sent_pkts = 0;
+
+ mutex_lock(&vq->mutex);
+ sock = vq->private_data;
+ if (!sock)
+ goto out;
+
+ if (!vq_iotlb_prefetch(vq))
+ goto out;
+
+ vhost_disable_notify(&net->dev, vq);
+ vhost_net_disable_vq(net, vq);
+
+ hdr_size = nvq->vhost_hlen;
+
+ for (;;) {
+ head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in);
+ /* On error, stop handling until the next kick. */
+ if (unlikely(head < 0))
+ break;
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+ vhost_disable_notify(&net->dev, vq);
+ continue;
+ }
+ break;
+ }
+ if (in) {
+ vq_err(vq, "Unexpected descriptor format for TX: "
+ "out %d, int %d\n", out, in);
+ break;
+ }
+
+ len = init_iov_iter(vq, &msg.msg_iter, hdr_size, out);
+ if (len < 0)
+ break;
+
+ total_len += len;
+ if (total_len < VHOST_NET_WEIGHT &&
+ vhost_has_more_pkts(net, vq)) {
+ msg.msg_flags |= MSG_MORE;
+ } else {
+ msg.msg_flags &= ~MSG_MORE;
+ }
+
+ /* TODO: Check specific error and bomb out unless ENOBUFS? */
+ err = sock->ops->sendmsg(sock, &msg, len);
+ if (unlikely(err < 0)) {
+ vhost_discard_vq_desc(vq, 1);
+ vhost_net_enable_vq(net, vq);
+ break;
+ }
+ if (err != len)
+ pr_debug("Truncated TX packet: "
+ " len %d != %zd\n", err, len);
+ vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ if (vhost_exceeds_weight(++sent_pkts, total_len)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+out:
+ mutex_unlock(&vq->mutex);
+}
+
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
-static void handle_tx(struct vhost_net *net)
+static void handle_tx_zerocopy(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
@@ -512,7 +598,7 @@ static void handle_tx(struct vhost_net *net)
size_t hdr_size;
struct socket *sock;
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
- bool zcopy, zcopy_used;
+ bool zcopy_used;
int sent_pkts = 0;
mutex_lock(&vq->mutex);
@@ -527,13 +613,10 @@ static void handle_tx(struct vhost_net *net)
vhost_net_disable_vq(net, vq);
hdr_size = nvq->vhost_hlen;
- zcopy = nvq->ubufs;
for (;;) {
/* Release DMAs done buffers first */
- if (zcopy)
- vhost_zerocopy_signal_used(net, vq);
-
+ vhost_zerocopy_signal_used(net, vq);
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
ARRAY_SIZE(vq->iov),
@@ -559,9 +642,9 @@ static void handle_tx(struct vhost_net *net)
if (len < 0)
break;
- zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
- && !vhost_exceeds_maxpend(net)
- && vhost_net_tx_select_zcopy(net);
+ zcopy_used = len >= VHOST_GOODCOPY_LEN
+ && !vhost_exceeds_maxpend(net)
+ && vhost_net_tx_select_zcopy(net);
/* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) {
@@ -620,6 +703,16 @@ static void handle_tx(struct vhost_net *net)
mutex_unlock(&vq->mutex);
}
+static void handle_tx(struct vhost_net *net)
+{
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+
+ if (nvq->ubufs)
+ handle_tx_zerocopy(net);
+ else
+ handle_tx_copy(net);
+}
+
static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
{
struct sk_buff *head;
--
2.7.4
next prev parent reply other threads:[~2018-05-21 9:08 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-21 9:04 [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 01/12] vhost_net: introduce helper to initialize tx iov iter Jason Wang
2018-05-21 16:24 ` Jesse Brandeburg
2018-05-22 12:26 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 02/12] vhost_net: introduce vhost_exceeds_weight() Jason Wang
2018-05-21 16:29 ` Jesse Brandeburg
2018-05-22 12:27 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 03/12] vhost_net: introduce vhost_has_more_pkts() Jason Wang
2018-05-21 16:39 ` Jesse Brandeburg
2018-05-22 12:31 ` Jason Wang
2018-05-21 9:04 ` Jason Wang [this message]
2018-05-21 16:46 ` [RFC PATCH net-next 04/12] vhost_net: split out datacopy logic Jesse Brandeburg
2018-05-22 12:39 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 05/12] vhost_net: batch update used ring for datacopy TX Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 06/12] tuntap: enable premmption early Jason Wang
2018-05-21 14:32 ` Michael S. Tsirkin
2018-05-21 9:04 ` [RFC PATCH net-next 07/12] tuntap: simplify error handling in tun_build_skb() Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 08/12] tuntap: tweak on the path of non-xdp case " Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 09/12] tuntap: split out XDP logic Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 10/12] vhost_net: build xdp buff Jason Wang
2018-05-21 16:56 ` Jesse Brandeburg
2018-05-21 22:21 ` Michael S. Tsirkin
2018-05-22 12:41 ` Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 11/12] vhost_net: passing raw xdp buff to tun Jason Wang
2018-05-21 9:04 ` [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets Jason Wang
2018-05-21 14:33 ` Michael S. Tsirkin
2018-05-25 17:53 ` [RFC PATCH net-next 00/12] XDP batching for TUN/vhost_net Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1526893473-20128-5-git-send-email-jasowang@redhat.com \
--to=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).