All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v1 0/4] vDPA support in qemu
@ 2020-04-20  9:32 Cindy Lu
  2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
                   ` (5 more replies)
  0 siblings, 6 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-20  9:32 UTC (permalink / raw)
  To: mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, lulu, hanand, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, maxime.coquelin, lingshan.zhu

vDPA device is a device that uses a datapath which complies with the
virtio specifications with vendor specific control path. vDPA devices
can be both physically located on the hardware or emulated by software.
This RFC introduce the vDPA support in qemu

Cindy Lu (3):
  net: Introduce qemu_get_peer
  vhost-vdpa: introduce vhost-vdpa net client
  vhost-vdpa: implement vhost-vdpa backend

Jason Wang (1):
  vhost: introduce vhost_set_vring_ready method

 hw/net/vhost_net-stub.c           |   5 +
 hw/net/vhost_net.c                |  75 +++++-
 hw/net/virtio-net.c               |   9 +
 hw/virtio/Makefile.objs           |   2 +-
 hw/virtio/vhost-backend.c         |   3 +
 hw/virtio/vhost-vdpa.c            | 376 ++++++++++++++++++++++++++++++
 hw/virtio/vhost.c                 |   5 +
 hw/virtio/virtio-pci.c            |  13 ++
 hw/virtio/virtio.c                |   6 +
 include/hw/virtio/vhost-backend.h |   8 +-
 include/hw/virtio/vhost-vdpa.h    |  14 ++
 include/hw/virtio/virtio-bus.h    |   4 +
 include/net/net.h                 |   1 +
 include/net/vhost-vdpa.h          |  18 ++
 include/net/vhost_net.h           |   2 +
 net/Makefile.objs                 |   2 +-
 net/clients.h                     |   2 +
 net/net.c                         |   7 +
 net/vhost-vdpa.c                  | 211 +++++++++++++++++
 qapi/net.json                     |  21 +-
 20 files changed, 773 insertions(+), 11 deletions(-)
 create mode 100644 hw/virtio/vhost-vdpa.c
 create mode 100644 include/hw/virtio/vhost-vdpa.h
 create mode 100644 include/net/vhost-vdpa.h
 create mode 100644 net/vhost-vdpa.c

-- 
2.21.1



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [RFC v1 1/4] net: Introduce qemu_get_peer
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
@ 2020-04-20  9:32 ` Cindy Lu
  2020-04-21  3:23   ` Jason Wang
  2020-04-21 15:01   ` Laurent Vivier
  2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-20  9:32 UTC (permalink / raw)
  To: mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, lulu, hanand, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, maxime.coquelin, lingshan.zhu

This is a small function  that can get the peer from given NetClientState and queue_index

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/net/vhost_net.c | 16 ++++++++++------
 include/net/net.h  |  1 +
 net/net.c          |  6 ++++++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6b82803fa7..4096d64aaf 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -306,7 +306,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
     VirtioBusState *vbus = VIRTIO_BUS(qbus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
+    struct vhost_net *net;
     int r, e, i;
+    NetClientState *peer;
 
     if (!k->set_guest_notifiers) {
         error_report("binding does not support guest notifiers");
@@ -314,9 +316,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     }
 
     for (i = 0; i < total_queues; i++) {
-        struct vhost_net *net;
 
-        net = get_vhost_net(ncs[i].peer);
+        peer = qemu_get_peer(ncs, i);
+        net = get_vhost_net(peer);
         vhost_net_set_vq_index(net, i * 2);
 
         /* Suppress the masking guest notifiers on vhost user
@@ -335,15 +337,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     }
 
     for (i = 0; i < total_queues; i++) {
-        r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
+        peer = qemu_get_peer(ncs, i);
+        r = vhost_net_start_one(get_vhost_net(peer), dev);
 
         if (r < 0) {
             goto err_start;
         }
 
-        if (ncs[i].peer->vring_enable) {
+        if (peer->vring_enable) {
             /* restore vring enable state */
-            r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable);
+            r = vhost_set_vring_enable(peer, peer->vring_enable);
 
             if (r < 0) {
                 goto err_start;
@@ -355,7 +358,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
 
 err_start:
     while (--i >= 0) {
-        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
+        peer = qemu_get_peer(ncs , i);
+        vhost_net_stop_one(get_vhost_net(peer), dev);
     }
     e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
     if (e < 0) {
diff --git a/include/net/net.h b/include/net/net.h
index e175ba9677..0a74324ccd 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -175,6 +175,7 @@ void hmp_info_network(Monitor *mon, const QDict *qdict);
 void net_socket_rs_init(SocketReadState *rs,
                         SocketReadStateFinalize *finalize,
                         bool vnet_hdr);
+NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
 
 /* NIC info */
 
diff --git a/net/net.c b/net/net.c
index 84aa6d8d00..ac5080dda1 100644
--- a/net/net.c
+++ b/net/net.c
@@ -324,6 +324,12 @@ void *qemu_get_nic_opaque(NetClientState *nc)
 
     return nic->opaque;
 }
+NetClientState *qemu_get_peer(NetClientState *nc, int queue_index)
+{
+    NetClientState *ncs  =  nc + queue_index;
+    assert(ncs != NULL);
+    return ncs->peer;
+}
 
 static void qemu_cleanup_net_client(NetClientState *nc)
 {
-- 
2.21.1



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
  2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
@ 2020-04-20  9:32 ` Cindy Lu
  2020-04-20 14:49   ` Eric Blake
                     ` (2 more replies)
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
                   ` (3 subsequent siblings)
  5 siblings, 3 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-20  9:32 UTC (permalink / raw)
  To: mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, lulu, hanand, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, maxime.coquelin, lingshan.zhu

This patch set introduces a new net client type: vhost-vdpa.
vhost-vdpa net client will set up a vDPA device which is svhostdevpecified
by a "vhostdev" parameter.

Author: Tiwei Bie
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 include/net/vhost-vdpa.h |  18 ++++
 include/net/vhost_net.h  |   1 +
 net/Makefile.objs        |   2 +-
 net/clients.h            |   2 +
 net/net.c                |   1 +
 net/vhost-vdpa.c         | 211 +++++++++++++++++++++++++++++++++++++++
 qapi/net.json            |  21 +++-
 7 files changed, 253 insertions(+), 3 deletions(-)
 create mode 100644 include/net/vhost-vdpa.h
 create mode 100644 net/vhost-vdpa.c

diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
new file mode 100644
index 0000000000..9ddd538dad
--- /dev/null
+++ b/include/net/vhost-vdpa.h
@@ -0,0 +1,18 @@
+/*
+ * vhost-vdpa.h
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_VDPA_H
+#define VHOST_VDPA_H
+
+struct vhost_net;
+struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
+uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
+
+#endif /* VHOST_VDPA_H */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 77e47398c4..6f3a624cf7 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -39,5 +39,6 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
+int vhost_set_state(NetClientState *nc, int state);
 
 #endif
diff --git a/net/Makefile.objs b/net/Makefile.objs
index c5d076d19c..da459cfc19 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
 tap-obj-y ?= tap-stub.o
 common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
 common-obj-$(CONFIG_WIN32) += tap-win32.o
-
+common-obj-$(CONFIG_VHOST_KERNEL) += vhost-vdpa.o
 vde.o-libs = $(VDE_LIBS)
 
 common-obj-$(CONFIG_CAN_BUS) += can/
diff --git a/net/clients.h b/net/clients.h
index a6ef267e19..92f9b59aed 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
 int net_init_vhost_user(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp);
 
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp);
 #endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/net.c b/net/net.c
index ac5080dda1..2beb95388a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -964,6 +964,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
         [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
 #ifdef CONFIG_VHOST_NET_USER
         [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
+        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
 #endif
 #ifdef CONFIG_L2TPV3
         [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
new file mode 100644
index 0000000000..5daeba0b76
--- /dev/null
+++ b/net/vhost-vdpa.c
@@ -0,0 +1,211 @@
+/*
+ * vhost-vdpa.c
+ *
+ * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
+ * Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-vdpa.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "chardev/char-fe.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <err.h>
+#include <linux/virtio_net.h>
+
+
+typedef struct VhostVDPAState {
+    NetClientState nc;
+    struct vhost_vdpa vhost_vdpa;
+    VHostNetState *vhost_net;
+    uint64_t acked_features;
+    bool started;
+} VhostVDPAState;
+
+VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    return s->vhost_net;
+}
+
+uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    return s->acked_features;
+}
+
+static void vhost_vdpa_stop(NetClientState *ncs)
+{
+    VhostVDPAState *s;
+
+    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    s = DO_UPCAST(VhostVDPAState, nc, ncs);
+
+    if (s->vhost_net) {
+        /* save acked features */
+        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+        if (features) {
+            s->acked_features = features;
+         }
+        vhost_net_cleanup(s->vhost_net);
+    }
+}
+
+static int vhost_vdpa_start(NetClientState *ncs, void *be)
+{
+    VhostNetOptions options;
+    struct vhost_net *net = NULL;
+    VhostVDPAState *s;
+
+    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
+
+    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    s = DO_UPCAST(VhostVDPAState, nc, ncs);
+
+    options.net_backend = ncs;
+    options.opaque      = be;
+    options.busyloop_timeout = 0;
+    net = vhost_net_init(&options);
+    if (!net) {
+        error_report("failed to init vhost_net for queue");
+        goto err;
+     }
+
+     if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
+     }
+     s->vhost_net = net;
+
+    return 0;
+
+err:
+    if (net) {
+        vhost_net_cleanup(net);
+    }
+    vhost_vdpa_stop(ncs);
+    return -1;
+}
+static void vhost_vdpa_cleanup(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+
+    if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
+        s->vhost_net = NULL;
+    }
+
+    qemu_purge_queued_packets(nc);
+}
+
+static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    return true;
+}
+
+static bool vhost_vdpa_has_ufo(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    uint64_t  features = 0;
+
+    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
+    features = vhost_net_get_features(s->vhost_net, features);
+    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
+
+}
+
+static NetClientInfo net_vhost_vdpa_info = {
+        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
+        .size = sizeof(VhostVDPAState),
+        .cleanup = vhost_vdpa_cleanup,
+        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
+        .has_ufo = vhost_vdpa_has_ufo,
+};
+
+static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
+                               const char *name, const char *vhostdev)
+{
+    NetClientState *nc = NULL;
+    VhostVDPAState *s;
+    int vdpa_device_fd;
+    assert(name);
+
+    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
+    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
+    nc->queue_index = 0;
+
+    s = DO_UPCAST(VhostVDPAState, nc, nc);
+
+    vdpa_device_fd = open(vhostdev, O_RDWR);
+    if (vdpa_device_fd == -1) {
+        return -errno;
+    }
+    s->vhost_vdpa.device_fd = vdpa_device_fd;
+    vhost_vdpa_start(nc, (void *)&s->vhost_vdpa);
+
+    assert(s->vhost_net);
+
+    return 0;
+}
+
+static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *name = opaque;
+    const char *driver, *netdev;
+
+    driver = qemu_opt_get(opts, "driver");
+    netdev = qemu_opt_get(opts, "netdev");
+
+    if (!driver || !netdev) {
+        return 0;
+    }
+
+    if (strcmp(netdev, name) == 0 &&
+        !g_str_has_prefix(driver, "virtio-net-")) {
+        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
+        return -1;
+    }
+
+    return 0;
+}
+
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
+{
+    const NetdevVhostVDPAOptions *vhost_vdpa_opts;
+
+    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+    vhost_vdpa_opts = &netdev->u.vhost_vdpa;
+
+    /* verify net frontend */
+    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
+                          (char *)name, errp)) {
+        return -1;
+    }
+
+
+    return net_vhost_vdpa_init(peer, "vhost_vdpa", name,
+                               vhost_vdpa_opts->vhostdev);
+
+    return 0;
+}
diff --git a/qapi/net.json b/qapi/net.json
index 335295be50..35a5ccee39 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -441,6 +441,22 @@
     '*queues':        'int' } }
 
 ##
+# @NetdevVhostVDPAOptions:
+#
+# Vhost-vdpa network backend
+#
+# @vhostdev: name of a mdev dev path in sysfs
+#
+# @queues: number of queues to be created for multiqueue vhost-vdpa
+#          (default: 1) (Since 2.11)
+#
+# Since: 2.11
+##
+{ 'struct': 'NetdevVhostVDPAOptions',
+  'data': {
+    '*vhostdev':     'str',
+    '*queues':       'int' } }
+##
 # @NetClientDriver:
 #
 # Available netdev drivers.
@@ -451,7 +467,7 @@
 ##
 { 'enum': 'NetClientDriver',
   'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
-            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
+            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
 
 ##
 # @Netdev:
@@ -479,7 +495,8 @@
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
     'netmap':   'NetdevNetmapOptions',
-    'vhost-user': 'NetdevVhostUserOptions' } }
+    'vhost-user': 'NetdevVhostUserOptions',
+    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
 
 ##
 # @NetLegacy:
-- 
2.21.1



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
  2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
  2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
@ 2020-04-20  9:32 ` Cindy Lu
  2020-04-20 14:51   ` Eric Blake
                     ` (4 more replies)
  2020-04-20  9:32 ` [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method Cindy Lu
                   ` (2 subsequent siblings)
  5 siblings, 5 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-20  9:32 UTC (permalink / raw)
  To: mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, lulu, hanand, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, maxime.coquelin, lingshan.zhu

Currently we have 2 types of vhost backends in QEMU: vhost kernel and
vhost-user. The above patch provides a generic device for vDPA purpose,
this vDPA device exposes to user space a non-vendor-specific configuration
interface for setting up a vhost HW accelerator, this patch set introduces
a third vhost backend called vhost-vdpa based on the vDPA interface.

Vhost-vdpa usage:

  qemu-system-x86_64 -cpu host -enable-kvm \
    ......
  -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
  -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \

Author: Tiwei Bie
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/net/vhost_net.c                |  43 ++++
 hw/net/virtio-net.c               |   9 +
 hw/virtio/Makefile.objs           |   2 +-
 hw/virtio/vhost-backend.c         |   3 +
 hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
 hw/virtio/vhost.c                 |   5 +
 include/hw/virtio/vhost-backend.h |   6 +-
 include/hw/virtio/vhost-vdpa.h    |  14 ++
 8 files changed, 459 insertions(+), 2 deletions(-)
 create mode 100644 hw/virtio/vhost-vdpa.c
 create mode 100644 include/hw/virtio/vhost-vdpa.h

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 4096d64aaf..0d13fda2fc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -17,8 +17,10 @@
 #include "net/net.h"
 #include "net/tap.h"
 #include "net/vhost-user.h"
+#include "net/vhost-vdpa.h"
 
 #include "standard-headers/linux/vhost_types.h"
+#include "linux-headers/linux/vhost.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "qemu/error-report.h"
@@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
     VHOST_INVALID_FEATURE_BIT
 };
 
+static const int vdpa_feature_bits[] = {
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_F_ANY_LAYOUT,
+    VIRTIO_F_VERSION_1,
+    VIRTIO_NET_F_CSUM,
+    VIRTIO_NET_F_GUEST_CSUM,
+    VIRTIO_NET_F_GSO,
+    VIRTIO_NET_F_GUEST_TSO4,
+    VIRTIO_NET_F_GUEST_TSO6,
+    VIRTIO_NET_F_GUEST_ECN,
+    VIRTIO_NET_F_GUEST_UFO,
+    VIRTIO_NET_F_HOST_TSO4,
+    VIRTIO_NET_F_HOST_TSO6,
+    VIRTIO_NET_F_HOST_ECN,
+    VIRTIO_NET_F_HOST_UFO,
+    VIRTIO_NET_F_MRG_RXBUF,
+    VIRTIO_NET_F_MTU,
+    VIRTIO_F_IOMMU_PLATFORM,
+    VIRTIO_NET_F_GUEST_ANNOUNCE,
+    VHOST_INVALID_FEATURE_BIT
+};
 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
 {
     const int *feature_bits = 0;
@@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
     case NET_CLIENT_DRIVER_VHOST_USER:
         feature_bits = user_feature_bits;
         break;
+    case NET_CLIENT_DRIVER_VHOST_VDPA:
+        feature_bits = vdpa_feature_bits;
+        break;
     default:
         error_report("Feature bits not defined for this type: %d",
                 net->nc->info->type);
@@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
         assert(vhost_net);
         break;
 #endif
+    case NET_CLIENT_DRIVER_VHOST_VDPA:
+        vhost_net = vhost_vdpa_get_vhost_net(nc);
+        assert(vhost_net);
+        break;
     default:
         break;
     }
@@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 
     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
 }
+int vhost_set_state(NetClientState *nc, int state)
+{
+    struct vhost_net *net = get_vhost_net(nc);
+    struct vhost_dev *hdev = &net->dev;
+    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        if (hdev->vhost_ops->vhost_set_state) {
+                return hdev->vhost_ops->vhost_set_state(hdev, state);
+             }
+        }
+    return 0;
+}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index db3d7c38e6..bbecd7ab96 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     NetClientState *nc = qemu_get_queue(n->nic);
     int queues = n->multiqueue ? n->max_queues : 1;
+    NetClientState *peer = nc->peer;
+    uint8_t status_set  = vdev->status ;
+    uint8_t vhost_started_pre = n->vhost_started;
 
     if (!get_vhost_net(nc->peer)) {
         return;
@@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
                 return;
             }
         }
+        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
 
         n->vhost_started = 1;
         r = vhost_net_start(vdev, n->nic->ncs, queues);
@@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
             n->vhost_started = 0;
+            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
         }
     } else {
         vhost_net_stop(vdev, n->nic->ncs, queues);
+        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
         n->vhost_started = 0;
     }
+    if (vhost_started_pre != n->vhost_started) {
+            vhost_set_state(peer, status_set);
+    }
 }
 
 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index e2f70fbb89..17361d959e 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y)
 common-obj-y += virtio-bus.o
 obj-y += virtio.o
 
-obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o
+obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o
 common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
 obj-$(CONFIG_VHOST_USER) += vhost-user.o
 
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 48905383f8..935cd9e561 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
         dev->vhost_ops = &user_ops;
         break;
 #endif
+    case VHOST_BACKEND_TYPE_VDPA:
+        dev->vhost_ops = &vdpa_ops;
+        break;
     default:
         error_report("Unknown vhost backend type");
         r = -1;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
new file mode 100644
index 0000000000..213b327600
--- /dev/null
+++ b/hw/virtio/vhost-vdpa.c
@@ -0,0 +1,379 @@
+/*
+ * vhost-vdpa
+ *
+ *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
+ *  Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vhost.h>
+#include <linux/vfio.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/virtio-net.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "qemu/main-loop.h"
+#include <linux/kvm.h>
+#include "sysemu/kvm.h"
+
+
+static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
+{
+    return (!memory_region_is_ram(section->mr) &&
+            !memory_region_is_iommu(section->mr)) ||
+           /*
+            * Sizing an enabled 64-bit BAR can cause spurious mappings to
+            * addresses in the upper part of the 64-bit address space.  These
+            * are never accessed by the CPU and beyond the address width of
+            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
+            */
+           section->offset_within_address_space & (1ULL << 63);
+}
+
+static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+                              void *vaddr, bool readonly)
+{
+    struct vhost_msg_v2 msg;
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = VHOST_IOTLB_MSG_V2;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.uaddr = (uint64_t)vaddr;
+    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
+    msg.iotlb.type = VHOST_IOTLB_UPDATE;
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
+                                hwaddr size)
+{
+    struct vhost_msg_v2 msg;
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = VHOST_IOTLB_MSG_V2;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+static void vhost_vdpa_listener_region_add(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    void *vaddr;
+    int ret;
+
+    if (vhost_vdpa_listener_skipped_section(section)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    memory_region_ref(section->mr);
+
+    /* Here we assume that memory_region_is_ram(section->mr)==true */
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
+                             vaddr, section->readonly);
+    if (ret) {
+        error_report("vhost vdpa map fail!");
+        if (memory_region_is_ram_device(section->mr)) {
+            /* Allow unexpected mappings not to be fatal for RAM devices */
+            error_report("map ram fail!");
+          return ;
+        }
+        goto fail;
+    }
+
+    return;
+
+fail:
+    if (memory_region_is_ram_device(section->mr)) {
+        error_report("failed to vdpa_dma_map. pci p2p may not work");
+        return;
+
+    }
+    /*
+     * On the initfn path, store the first error in the container so we
+     * can gracefully fail.  Runtime, there's not much we can do other
+     * than throw a hardware error.
+     */
+    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
+    return;
+
+}
+
+static void vhost_vdpa_listener_region_del(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    int ret;
+    bool try_unmap = true;
+
+    if (vhost_vdpa_listener_skipped_section(section)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    if (try_unmap) {
+        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
+        if (ret) {
+            error_report("vhost_vdpa dma unmap error!");
+        }
+    }
+
+    memory_region_unref(section->mr);
+}
+
+static const MemoryListener vhost_vdpa_memory_listener = {
+    .region_add = vhost_vdpa_listener_region_add,
+    .region_del = vhost_vdpa_listener_region_del,
+};
+
+
+static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
+                             void *arg)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int fd = v->device_fd;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    return ioctl(fd, request, arg);
+}
+
+
+
+static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
+{
+    struct vhost_vdpa *v;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    v = opaque;
+    dev->opaque =  opaque ;
+
+    v->listener = vhost_vdpa_memory_listener;
+    memory_listener_register(&v->listener, &address_space_memory);
+
+    return 0;
+}
+
+static int vhost_vdpa_cleanup(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v;
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    v = dev->opaque;
+    memory_listener_unregister(&v->listener);
+
+    dev->opaque = NULL;
+    return 0;
+}
+
+static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
+{
+    return INT_MAX;
+}
+
+static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                   struct vhost_log *log)
+{
+    return 0;
+}
+
+static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+
+    if (mem->padding) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
+                                     struct vhost_vring_addr *addr)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
+}
+
+static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
+                                    struct vhost_vring_state *ring)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
+}
+
+static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
+}
+
+static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+
+    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
+}
+
+static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
+}
+
+static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
+}
+
+static int vhost_vdpa_set_features(struct vhost_dev *dev,
+                                   uint64_t features)
+{
+
+    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
+    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
+
+}
+
+static int vhost_vdpa_get_features(struct vhost_dev *dev,
+                                   uint64_t *features)
+{
+    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
+}
+
+static int vhost_vdpa_set_owner(struct vhost_dev *dev)
+{
+    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+}
+
+static int vhost_vdpa_reset_device(struct vhost_dev *dev)
+{
+    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
+}
+
+static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
+{
+    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+    return idx - dev->vq_index;
+}
+
+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+    int i;
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        struct vhost_vring_state state = {
+            .index = dev->vq_index + i,
+            .num   = enable,
+        };
+
+        state.num = 1;
+
+        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
+{
+    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
+}
+
+
+const VhostOps vdpa_ops = {
+        .backend_type = VHOST_BACKEND_TYPE_VDPA,
+        .vhost_backend_init = vhost_vdpa_init,
+        .vhost_backend_cleanup = vhost_vdpa_cleanup,
+        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
+        .vhost_set_log_base = vhost_vdpa_set_log_base,
+        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
+        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
+        .vhost_set_vring_endian = NULL,
+        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
+        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
+        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
+        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
+        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
+        .vhost_set_features = vhost_vdpa_set_features,
+        .vhost_get_features = vhost_vdpa_get_features,
+        .vhost_set_owner = vhost_vdpa_set_owner,
+        .vhost_reset_device = vhost_vdpa_reset_device,
+        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
+        .vhost_requires_shm_log = NULL,
+        .vhost_migration_done = NULL,
+        .vhost_backend_can_merge = NULL,
+        .vhost_net_set_mtu = NULL,
+        .vhost_set_iotlb_callback = NULL,
+        .vhost_send_device_iotlb_msg = NULL,
+        .vhost_set_state = vhost_vdpa_set_state,
+};
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4da0d5a6c5..d1f2c4add7 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
         .log_guest_addr = vq->used_phys,
         .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
     };
+    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
+        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
+        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
+        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
+    }
     int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
     if (r < 0) {
         VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 6f6670783f..d81bd9885f 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -17,7 +17,8 @@ typedef enum VhostBackendType {
     VHOST_BACKEND_TYPE_NONE = 0,
     VHOST_BACKEND_TYPE_KERNEL = 1,
     VHOST_BACKEND_TYPE_USER = 2,
-    VHOST_BACKEND_TYPE_MAX = 3,
+    VHOST_BACKEND_TYPE_VDPA = 3,
+    VHOST_BACKEND_TYPE_MAX = 4,
 } VhostBackendType;
 
 typedef enum VhostSetConfigType {
@@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
 typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
                                         struct vhost_inflight *inflight);
 
+typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -152,9 +154,11 @@ typedef struct VhostOps {
     vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
     vhost_get_inflight_fd_op vhost_get_inflight_fd;
     vhost_set_inflight_fd_op vhost_set_inflight_fd;
+    vhost_set_state_op vhost_set_state;
 } VhostOps;
 
 extern const VhostOps user_ops;
+extern const VhostOps vdpa_ops;
 
 int vhost_set_backend_type(struct vhost_dev *dev,
                            VhostBackendType backend_type);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
new file mode 100644
index 0000000000..889c1a4410
--- /dev/null
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -0,0 +1,14 @@
+
+#ifndef HW_VIRTIO_VHOST_VDPA_H
+#define HW_VIRTIO_VHOST_VDPA_H
+
+#include "hw/virtio/virtio.h"
+
+typedef struct vhost_vdpa {
+    int device_fd;
+    MemoryListener listener;
+} VhostVDPA;
+
+extern AddressSpace address_space_memory;
+
+#endif
-- 
2.21.1



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
                   ` (2 preceding siblings ...)
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
@ 2020-04-20  9:32 ` Cindy Lu
  2020-04-21  3:59   ` Jason Wang
  2020-04-21  4:03 ` [RFC v1 0/4] vDPA support in qemu Jason Wang
  2020-04-21  4:05 ` Jason Wang
  5 siblings, 1 reply; 28+ messages in thread
From: Cindy Lu @ 2020-04-20  9:32 UTC (permalink / raw)
  To: mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, lulu, hanand, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, maxime.coquelin, lingshan.zhu

From: Jason Wang <jasowang@redhat.com>

Vhost-vdpa introduces VHOST_VDPA_SET_VRING_ENABLE which complies the
semantic of queue_enable defined in virtio spec. This method can be
used for preventing device from executing request for a specific
virtqueue. This patch introduces the vhost_ops for this.

Note that, we've already had vhost_set_vring_enable which has different
semantic which allows to enable or disable a specific virtqueue for
some kinds of vhost backends. E.g vhost-user use this to changes the
number of active queue pairs.

Author: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/vhost_net-stub.c           |  5 +++++
 hw/net/vhost_net.c                | 16 ++++++++++++++++
 hw/virtio/vhost-vdpa.c            |  9 +++------
 hw/virtio/virtio-pci.c            | 13 +++++++++++++
 hw/virtio/virtio.c                |  6 ++++++
 include/hw/virtio/vhost-backend.h |  2 ++
 include/hw/virtio/virtio-bus.h    |  4 ++++
 include/net/vhost_net.h           |  1 +
 8 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index aac0e98228..f5ef1e3055 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -86,6 +86,11 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
     return 0;
 }
 
+int vhost_set_vring_ready(NetClientState *nc)
+{
+    return 0;
+}
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 {
     return 0;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 0d13fda2fc..463e333531 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -380,6 +380,10 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
                 goto err_start;
             }
         }
+
+        if (virtio_queue_enabled(dev, i)) {
+            vhost_set_vring_ready(peer);
+        }
     }
 
     return 0;
@@ -487,6 +491,18 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
     return 0;
 }
 
+int vhost_set_vring_ready(NetClientState *nc)
+{
+    VHostNetState *net = get_vhost_net(nc);
+    const VhostOps *vhost_ops = net->dev.vhost_ops;
+
+    if (vhost_ops && vhost_ops->vhost_set_vring_ready) {
+        return vhost_ops->vhost_set_vring_ready(&net->dev);
+    }
+
+    return 0;
+}
+
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 {
     const VhostOps *vhost_ops = net->dev.vhost_ops;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 213b327600..49224ef9f8 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -325,18 +325,15 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
     return idx - dev->vq_index;
 }
 
-static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
 {
     int i;
 
     for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_vring_state state = {
             .index = dev->vq_index + i,
-            .num   = enable,
+            .num = 1,
         };
-
-        state.num = 1;
-
         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
     }
 
@@ -368,7 +365,7 @@ const VhostOps vdpa_ops = {
         .vhost_set_owner = vhost_vdpa_set_owner,
         .vhost_reset_device = vhost_vdpa_reset_device,
         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
-        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
+        .vhost_set_vring_ready = vhost_vdpa_set_vring_ready,
         .vhost_requires_shm_log = NULL,
         .vhost_migration_done = NULL,
         .vhost_backend_can_merge = NULL,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c6b47a9c73..4aaf5d953e 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1103,6 +1103,18 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
     return pci_get_address_space(dev);
 }
 
+static bool virtio_pci_queue_enabled(DeviceState *d, int n)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        return proxy->vqs[vdev->queue_sel].enabled;
+    }
+
+    return virtio_queue_get_desc_addr(vdev, n) != 0;
+}
+
 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
                                    struct virtio_pci_cap *cap)
 {
@@ -2053,6 +2065,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
     k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
     k->get_dma_as = virtio_pci_get_dma_as;
+    k->queue_enabled = virtio_pci_queue_enabled;
 }
 
 static const TypeInfo virtio_pci_bus_info = {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 04716b5f6c..09732a8836 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3169,6 +3169,12 @@ hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
 
 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
 {
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (k->queue_enabled)
+        return k->queue_enabled(qbus->parent, n);
+
     return virtio_queue_get_desc_addr(vdev, n) != 0;
 }
 
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index d81bd9885f..ce8de6d308 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -78,6 +78,7 @@ typedef int (*vhost_reset_device_op)(struct vhost_dev *dev);
 typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx);
 typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
                                          int enable);
+typedef int (*vhost_set_vring_ready_op)(struct vhost_dev *dev);
 typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
 typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
                                        char *mac_addr);
@@ -140,6 +141,7 @@ typedef struct VhostOps {
     vhost_reset_device_op vhost_reset_device;
     vhost_get_vq_index_op vhost_get_vq_index;
     vhost_set_vring_enable_op vhost_set_vring_enable;
+    vhost_set_vring_ready_op vhost_set_vring_ready;
     vhost_requires_shm_log_op vhost_requires_shm_log;
     vhost_migration_done_op vhost_migration_done;
     vhost_backend_can_merge_op vhost_backend_can_merge;
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index 38c9399cd4..0f6f215925 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -83,6 +83,10 @@ typedef struct VirtioBusClass {
      */
     int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier,
                             int n, bool assign);
+    /*
+     * Whether queue number n is enabled.
+     */
+    bool (*queue_enabled)(DeviceState *d, int n);
     /*
      * Does the transport have variable vring alignment?
      * (ie can it ever call virtio_queue_set_align()?)
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 6f3a624cf7..db473ff4d2 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -35,6 +35,7 @@ int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
 VHostNetState *get_vhost_net(NetClientState *nc);
 
 int vhost_set_vring_enable(NetClientState * nc, int enable);
+int vhost_set_vring_ready(NetClientState * nc);
 
 uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
-- 
2.21.1



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
@ 2020-04-20 14:49   ` Eric Blake
  2020-04-21  3:40   ` Jason Wang
  2020-04-21 15:47   ` Laurent Vivier
  2 siblings, 0 replies; 28+ messages in thread
From: Eric Blake @ 2020-04-20 14:49 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, hanand, hch, eperezma,
	jgg, shahafs, kevin.tian, parav, vmireyno, cunming.liang, gdawar,
	jiri, xiao.w.wang, stefanha, zhihong.wang, aadam, rdunlap,
	maxime.coquelin, lingshan.zhu

On 4/20/20 4:32 AM, Cindy Lu wrote:
> This patch set introduces a new net client type: vhost-vdpa.
> vhost-vdpa net client will set up a vDPA device which is svhostdevpecified

looks like you pasted 'vhostdev' in the middle of 'specified'

> by a "vhostdev" parameter.
> 
> Author: Tiwei Bie

Should this be a 'Signed-off-by' tag?

> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---

> +++ b/include/net/vhost-vdpa.h
> @@ -0,0 +1,18 @@
> +/*
> + * vhost-vdpa.h
> + *
> + * Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.

Claiming "All rights reserved" is at odds with claiming "GPL", which 
specifically requires that you are NOT reserving all rights but are 
instead granting various rights to others insofar as they preserve this 
code as free software.  I can overlook it on BSD licenses (as there are 
lots of examples of bad copy-and-paste on various templates that uses 
the misleading term), but not on GPL licenses.  Also, you may want to 
consider if adding 2020 to your copyright date is appropriate.


> +++ b/net/vhost-vdpa.c
> @@ -0,0 +1,211 @@
> +/*
> + * vhost-vdpa.c
> + *
> + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + * Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.

Another inconsistent license.


> +++ b/qapi/net.json
> @@ -441,6 +441,22 @@
>       '*queues':        'int' } }
>   
>   ##
> +# @NetdevVhostVDPAOptions:
> +#
> +# Vhost-vdpa network backend
> +#
> +# @vhostdev: name of a mdev dev path in sysfs
> +#
> +# @queues: number of queues to be created for multiqueue vhost-vdpa
> +#          (default: 1) (Since 2.11)

There's no need to add a 'since' tag for an individual member if...

> +#
> +# Since: 2.11

...the struct itself was introduced in the same release.  However, using 
2.11 as the release is wrong; the next release will be 5.1.

> +##
> +{ 'struct': 'NetdevVhostVDPAOptions',
> +  'data': {
> +    '*vhostdev':     'str',
> +    '*queues':       'int' } }
> +##
>   # @NetClientDriver:
>   #
>   # Available netdev drivers.
> @@ -451,7 +467,7 @@
>   ##
>   { 'enum': 'NetClientDriver',

Missing documentation that 'vhost-vdpa' was added in 5.1.

>     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
>   
>   ##
>   # @Netdev:
> @@ -479,7 +495,8 @@
>       'bridge':   'NetdevBridgeOptions',
>       'hubport':  'NetdevHubPortOptions',
>       'netmap':   'NetdevNetmapOptions',
> -    'vhost-user': 'NetdevVhostUserOptions' } }
> +    'vhost-user': 'NetdevVhostUserOptions',
> +    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
>   
>   ##
>   # @NetLegacy:
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
@ 2020-04-20 14:51   ` Eric Blake
  2020-04-21  3:56   ` Jason Wang
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 28+ messages in thread
From: Eric Blake @ 2020-04-20 14:51 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, hanand, hch, eperezma,
	jgg, shahafs, kevin.tian, parav, vmireyno, cunming.liang, gdawar,
	jiri, xiao.w.wang, stefanha, zhihong.wang, aadam, rdunlap,
	maxime.coquelin, lingshan.zhu

On 4/20/20 4:32 AM, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>    qemu-system-x86_64 -cpu host -enable-kvm \
>      ......
>    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie

Another questionable authorship line; should this be Signed-off-by?  (Do 
we have permission from Tiwei Bie to include this code?)

> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---

> +++ b/hw/virtio/vhost-vdpa.c
> @@ -0,0 +1,379 @@
> +/*
> + * vhost-vdpa
> + *
> + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + *  Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.

Another questionable "All rights reserved"


> --- /dev/null
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -0,0 +1,14 @@
> +
> +#ifndef HW_VIRTIO_VHOST_VDPA_H
> +#define HW_VIRTIO_VHOST_VDPA_H
> +

All new files should include a copyright and license, even if they are 
short.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 1/4] net: Introduce qemu_get_peer
  2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
@ 2020-04-21  3:23   ` Jason Wang
  2020-04-21  8:10     ` Cindy Lu
  2020-04-21 15:01   ` Laurent Vivier
  1 sibling, 1 reply; 28+ messages in thread
From: Jason Wang @ 2020-04-21  3:23 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> This is a small function  that can get the peer from given NetClientState and queue_index


Unnecessary space  between 'function' and 'that'.


>
> Signed-off-by: Cindy Lu <lulu@redhat.com>


Please split this patch into two parts:

1) introduce the function
2) the actual user for this fucntion


> ---
>   hw/net/vhost_net.c | 16 ++++++++++------
>   include/net/net.h  |  1 +
>   net/net.c          |  6 ++++++
>   3 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 6b82803fa7..4096d64aaf 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -306,7 +306,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>       BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
>       VirtioBusState *vbus = VIRTIO_BUS(qbus);
>       VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
> +    struct vhost_net *net;
>       int r, e, i;
> +    NetClientState *peer;
>   
>       if (!k->set_guest_notifiers) {
>           error_report("binding does not support guest notifiers");
> @@ -314,9 +316,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>       }
>   
>       for (i = 0; i < total_queues; i++) {
> -        struct vhost_net *net;
>   
> -        net = get_vhost_net(ncs[i].peer);
> +        peer = qemu_get_peer(ncs, i);
> +        net = get_vhost_net(peer);
>           vhost_net_set_vq_index(net, i * 2);
>   
>           /* Suppress the masking guest notifiers on vhost user
> @@ -335,15 +337,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>       }
>   
>       for (i = 0; i < total_queues; i++) {
> -        r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
> +        peer = qemu_get_peer(ncs, i);
> +        r = vhost_net_start_one(get_vhost_net(peer), dev);
>   
>           if (r < 0) {
>               goto err_start;
>           }
>   
> -        if (ncs[i].peer->vring_enable) {
> +        if (peer->vring_enable) {
>               /* restore vring enable state */
> -            r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable);
> +            r = vhost_set_vring_enable(peer, peer->vring_enable);
>   
>               if (r < 0) {
>                   goto err_start;
> @@ -355,7 +358,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>   
>   err_start:
>       while (--i >= 0) {
> -        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> +        peer = qemu_get_peer(ncs , i);
> +        vhost_net_stop_one(get_vhost_net(peer), dev);
>       }
>       e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
>       if (e < 0) {
> diff --git a/include/net/net.h b/include/net/net.h
> index e175ba9677..0a74324ccd 100644
> --- a/include/net/net.h
> +++ b/include/net/net.h
> @@ -175,6 +175,7 @@ void hmp_info_network(Monitor *mon, const QDict *qdict);
>   void net_socket_rs_init(SocketReadState *rs,
>                           SocketReadStateFinalize *finalize,
>                           bool vnet_hdr);
> +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
>   
>   /* NIC info */
>   
> diff --git a/net/net.c b/net/net.c
> index 84aa6d8d00..ac5080dda1 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -324,6 +324,12 @@ void *qemu_get_nic_opaque(NetClientState *nc)
>   
>       return nic->opaque;
>   }
> +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index)
> +{
> +    NetClientState *ncs  =  nc + queue_index;


Unnecessary space around '='.

Thanks


> +    assert(ncs != NULL);
> +    return ncs->peer;
> +}
>   
>   static void qemu_cleanup_net_client(NetClientState *nc)
>   {



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
  2020-04-20 14:49   ` Eric Blake
@ 2020-04-21  3:40   ` Jason Wang
  2020-04-21  9:46     ` Cindy Lu
  2020-04-21 15:47   ` Laurent Vivier
  2 siblings, 1 reply; 28+ messages in thread
From: Jason Wang @ 2020-04-21  3:40 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> This patch set introduces a new net client type: vhost-vdpa.
> vhost-vdpa net client will set up a vDPA device which is svhostdevpecified


typo.


> by a "vhostdev" parameter.
>
> Author: Tiwei Bie


Please keep the sobs from the original patch.


> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>   include/net/vhost-vdpa.h |  18 ++++
>   include/net/vhost_net.h  |   1 +
>   net/Makefile.objs        |   2 +-
>   net/clients.h            |   2 +
>   net/net.c                |   1 +
>   net/vhost-vdpa.c         | 211 +++++++++++++++++++++++++++++++++++++++
>   qapi/net.json            |  21 +++-
>   7 files changed, 253 insertions(+), 3 deletions(-)
>   create mode 100644 include/net/vhost-vdpa.h
>   create mode 100644 net/vhost-vdpa.c


I think this patch should come after patch 3.

And it's better to make this as a module like vhost-user.


>
> diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> new file mode 100644
> index 0000000000..9ddd538dad
> --- /dev/null
> +++ b/include/net/vhost-vdpa.h
> @@ -0,0 +1,18 @@
> +/*
> + * vhost-vdpa.h
> + *
> + * Copyright(c) 2017 Intel Corporation. All rights reserved.


2020 and please add Red Hat here as well.


> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef VHOST_VDPA_H
> +#define VHOST_VDPA_H
> +
> +struct vhost_net;
> +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> +
> +#endif /* VHOST_VDPA_H */
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index 77e47398c4..6f3a624cf7 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -39,5 +39,6 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
>   uint64_t vhost_net_get_acked_features(VHostNetState *net);
>   
>   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
> +int vhost_set_state(NetClientState *nc, int state);


This function is not used in this patch.


>   
>   #endif
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index c5d076d19c..da459cfc19 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
>   tap-obj-y ?= tap-stub.o
>   common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
>   common-obj-$(CONFIG_WIN32) += tap-win32.o
> -
> +common-obj-$(CONFIG_VHOST_KERNEL) += vhost-vdpa.o
>   vde.o-libs = $(VDE_LIBS)
>   
>   common-obj-$(CONFIG_CAN_BUS) += can/
> diff --git a/net/clients.h b/net/clients.h
> index a6ef267e19..92f9b59aed 100644
> --- a/net/clients.h
> +++ b/net/clients.h
> @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
>   int net_init_vhost_user(const Netdev *netdev, const char *name,
>                           NetClientState *peer, Error **errp);
>   
> +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> +                        NetClientState *peer, Error **errp);
>   #endif /* QEMU_NET_CLIENTS_H */
> diff --git a/net/net.c b/net/net.c
> index ac5080dda1..2beb95388a 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -964,6 +964,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>           [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
>   #ifdef CONFIG_VHOST_NET_USER
>           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
> +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
>   #endif
>   #ifdef CONFIG_L2TPV3
>           [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> new file mode 100644
> index 0000000000..5daeba0b76
> --- /dev/null
> +++ b/net/vhost-vdpa.c
> @@ -0,0 +1,211 @@
> +/*
> + * vhost-vdpa.c
> + *
> + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + * Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "clients.h"
> +#include "net/vhost_net.h"
> +#include "net/vhost-vdpa.h"
> +#include "hw/virtio/vhost-vdpa.h"
> +#include "chardev/char-fe.h"


I don't think we use charfe in this file.


> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "qemu/option.h"
> +#include "qapi/error.h"
> +#include "trace.h"


I think we don't use tracing in this file.


> +#include <linux/vfio.h>
> +#include <sys/ioctl.h>
> +#include <err.h>
> +#include <linux/virtio_net.h>
> +
> +
> +typedef struct VhostVDPAState {
> +    NetClientState nc;


This may not work for the case of multiqueue, you can either fix this or 
just leave a comment for TODO.


> +    struct vhost_vdpa vhost_vdpa;


This explains why patch 3 should come first.


> +    VHostNetState *vhost_net;
> +    uint64_t acked_features;
> +    bool started;
> +} VhostVDPAState;
> +
> +VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    return s->vhost_net;
> +}
> +
> +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    return s->acked_features;
> +}
> +
> +static void vhost_vdpa_stop(NetClientState *ncs)
> +{
> +    VhostVDPAState *s;
> +
> +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> +
> +    if (s->vhost_net) {
> +        /* save acked features */
> +        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
> +        if (features) {
> +            s->acked_features = features;
> +         }
> +        vhost_net_cleanup(s->vhost_net);
> +    }
> +}
> +
> +static int vhost_vdpa_start(NetClientState *ncs, void *be)
> +{


The name of this function is confusing, we don't start vhost_vdpa actually.


> +    VhostNetOptions options;
> +    struct vhost_net *net = NULL;
> +    VhostVDPAState *s;
> +
> +    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
> +
> +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> +
> +    options.net_backend = ncs;
> +    options.opaque      = be;
> +    options.busyloop_timeout = 0;
> +    net = vhost_net_init(&options);
> +    if (!net) {
> +        error_report("failed to init vhost_net for queue");
> +        goto err;
> +     }
> +
> +     if (s->vhost_net) {
> +        vhost_net_cleanup(s->vhost_net);
> +        g_free(s->vhost_net);
> +     }
> +     s->vhost_net = net;
> +
> +    return 0;
> +
> +err:
> +    if (net) {
> +        vhost_net_cleanup(net);
> +    }
> +    vhost_vdpa_stop(ncs);
> +    return -1;
> +}
> +static void vhost_vdpa_cleanup(NetClientState *nc)
> +{
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +
> +    if (s->vhost_net) {
> +        vhost_net_cleanup(s->vhost_net);
> +        g_free(s->vhost_net);
> +        s->vhost_net = NULL;
> +    }
> +
> +    qemu_purge_queued_packets(nc);
> +}
> +
> +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
> +{
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +
> +    return true;
> +}
> +
> +static bool vhost_vdpa_has_ufo(NetClientState *nc)
> +{
> +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    uint64_t  features = 0;
> +
> +    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
> +    features = vhost_net_get_features(s->vhost_net, features);
> +    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
> +
> +}
> +
> +static NetClientInfo net_vhost_vdpa_info = {
> +        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
> +        .size = sizeof(VhostVDPAState),
> +        .cleanup = vhost_vdpa_cleanup,
> +        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> +        .has_ufo = vhost_vdpa_has_ufo,
> +};
> +
> +static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
> +                               const char *name, const char *vhostdev)
> +{
> +    NetClientState *nc = NULL;
> +    VhostVDPAState *s;
> +    int vdpa_device_fd;
> +    assert(name);
> +
> +    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
> +    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
> +    nc->queue_index = 0;
> +
> +    s = DO_UPCAST(VhostVDPAState, nc, nc);
> +
> +    vdpa_device_fd = open(vhostdev, O_RDWR);
> +    if (vdpa_device_fd == -1) {
> +        return -errno;
> +    }
> +    s->vhost_vdpa.device_fd = vdpa_device_fd;


Need to add a step to verify the device type and fail if it was not a 
networking device.


> +    vhost_vdpa_start(nc, (void *)&s->vhost_vdpa);
> +
> +    assert(s->vhost_net);
> +
> +    return 0;
> +}
> +
> +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +    const char *name = opaque;
> +    const char *driver, *netdev;
> +
> +    driver = qemu_opt_get(opts, "driver");
> +    netdev = qemu_opt_get(opts, "netdev");
> +
> +    if (!driver || !netdev) {
> +        return 0;
> +    }
> +
> +    if (strcmp(netdev, name) == 0 &&
> +        !g_str_has_prefix(driver, "virtio-net-")) {
> +        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> +                        NetClientState *peer, Error **errp)
> +{
> +    const NetdevVhostVDPAOptions *vhost_vdpa_opts;
> +
> +    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> +    vhost_vdpa_opts = &netdev->u.vhost_vdpa;
> +
> +    /* verify net frontend */
> +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> +                          (char *)name, errp)) {
> +        return -1;
> +    }
> +
> +
> +    return net_vhost_vdpa_init(peer, "vhost_vdpa", name,
> +                               vhost_vdpa_opts->vhostdev);


Please add the support for passing fd via command line.


> +
> +    return 0;
> +}
> diff --git a/qapi/net.json b/qapi/net.json
> index 335295be50..35a5ccee39 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -441,6 +441,22 @@
>       '*queues':        'int' } }
>   
>   ##
> +# @NetdevVhostVDPAOptions:
> +#
> +# Vhost-vdpa network backend
> +#
> +# @vhostdev: name of a mdev dev path in sysfs
> +#
> +# @queues: number of queues to be created for multiqueue vhost-vdpa
> +#          (default: 1) (Since 2.11)
> +#
> +# Since: 2.11


The version number is wrong, I guess it's probably 5.1.

Thanks


> +##
> +{ 'struct': 'NetdevVhostVDPAOptions',
> +  'data': {
> +    '*vhostdev':     'str',
> +    '*queues':       'int' } }
> +##
>   # @NetClientDriver:
>   #
>   # Available netdev drivers.
> @@ -451,7 +467,7 @@
>   ##
>   { 'enum': 'NetClientDriver',
>     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
>   
>   ##
>   # @Netdev:
> @@ -479,7 +495,8 @@
>       'bridge':   'NetdevBridgeOptions',
>       'hubport':  'NetdevHubPortOptions',
>       'netmap':   'NetdevNetmapOptions',
> -    'vhost-user': 'NetdevVhostUserOptions' } }
> +    'vhost-user': 'NetdevVhostUserOptions',
> +    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
>   
>   ##
>   # @NetLegacy:



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
  2020-04-20 14:51   ` Eric Blake
@ 2020-04-21  3:56   ` Jason Wang
  2020-04-21  9:12     ` Cindy Lu
  2020-04-21 15:54   ` Laurent Vivier
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 28+ messages in thread
From: Jason Wang @ 2020-04-21  3:56 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, hanand, hch, eperezma,
	jgg, shahafs, kevin.tian, parav, vmireyno, cunming.liang, gdawar,
	jiri, xiao.w.wang, stefanha, zhihong.wang, aadam, rdunlap,
	maxime.coquelin, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
>
> Vhost-vdpa usage:
>
>    qemu-system-x86_64 -cpu host -enable-kvm \
>      ......
>    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \


Actually, this part should belongs to patch 2.

And we probably need to add a comment that vIOMMU is not supported right 
now.


>
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>   hw/net/vhost_net.c                |  43 ++++
>   hw/net/virtio-net.c               |   9 +
>   hw/virtio/Makefile.objs           |   2 +-
>   hw/virtio/vhost-backend.c         |   3 +
>   hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>   hw/virtio/vhost.c                 |   5 +
>   include/hw/virtio/vhost-backend.h |   6 +-
>   include/hw/virtio/vhost-vdpa.h    |  14 ++
>   8 files changed, 459 insertions(+), 2 deletions(-)
>   create mode 100644 hw/virtio/vhost-vdpa.c
>   create mode 100644 include/hw/virtio/vhost-vdpa.h
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -17,8 +17,10 @@
>   #include "net/net.h"
>   #include "net/tap.h"
>   #include "net/vhost-user.h"
> +#include "net/vhost-vdpa.h"
>   
>   #include "standard-headers/linux/vhost_types.h"
> +#include "linux-headers/linux/vhost.h"
>   #include "hw/virtio/virtio-net.h"
>   #include "net/vhost_net.h"
>   #include "qemu/error-report.h"
> @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
>       VHOST_INVALID_FEATURE_BIT
>   };
>   
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_ANY_LAYOUT,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_NET_F_CSUM,
> +    VIRTIO_NET_F_GUEST_CSUM,
> +    VIRTIO_NET_F_GSO,
> +    VIRTIO_NET_F_GUEST_TSO4,
> +    VIRTIO_NET_F_GUEST_TSO6,
> +    VIRTIO_NET_F_GUEST_ECN,
> +    VIRTIO_NET_F_GUEST_UFO,
> +    VIRTIO_NET_F_HOST_TSO4,
> +    VIRTIO_NET_F_HOST_TSO6,
> +    VIRTIO_NET_F_HOST_ECN,
> +    VIRTIO_NET_F_HOST_UFO,
> +    VIRTIO_NET_F_MRG_RXBUF,
> +    VIRTIO_NET_F_MTU,
> +    VIRTIO_F_IOMMU_PLATFORM,
> +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> +    VHOST_INVALID_FEATURE_BIT
> +};
>   static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>   {
>       const int *feature_bits = 0;
> @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>       case NET_CLIENT_DRIVER_VHOST_USER:
>           feature_bits = user_feature_bits;
>           break;
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        feature_bits = vdpa_feature_bits;
> +        break;
>       default:
>           error_report("Feature bits not defined for this type: %d",
>                   net->nc->info->type);
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>           assert(vhost_net);
>           break;
>   #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;
>       default:
>           break;
>       }
> @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>   
>       return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
>   }
> +int vhost_set_state(NetClientState *nc, int state)
> +{
> +    struct vhost_net *net = get_vhost_net(nc);
> +    struct vhost_dev *hdev = &net->dev;
> +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> +        if (hdev->vhost_ops->vhost_set_state) {
> +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> +             }
> +        }
> +    return 0;
> +}
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index db3d7c38e6..bbecd7ab96 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>       VirtIODevice *vdev = VIRTIO_DEVICE(n);
>       NetClientState *nc = qemu_get_queue(n->nic);
>       int queues = n->multiqueue ? n->max_queues : 1;
> +    NetClientState *peer = nc->peer;


qemu_get_peer()?


> +    uint8_t status_set  = vdev->status ;
> +    uint8_t vhost_started_pre = n->vhost_started;
>   
>       if (!get_vhost_net(nc->peer)) {
>           return;
> @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>                   return;
>               }
>           }
> +        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
>   
>           n->vhost_started = 1;
>           r = vhost_net_start(vdev, n->nic->ncs, queues);
> @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>               error_report("unable to start vhost net: %d: "
>                            "falling back on userspace virtio", -r);
>               n->vhost_started = 0;
> +            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
>           }
>       } else {
>           vhost_net_stop(vdev, n->nic->ncs, queues);
> +        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
>           n->vhost_started = 0;
>       }
> +    if (vhost_started_pre != n->vhost_started) {
> +            vhost_set_state(peer, status_set);
> +    }
>   }


I think this deserves an independent patch.



>   
>   static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
> diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> index e2f70fbb89..17361d959e 100644
> --- a/hw/virtio/Makefile.objs
> +++ b/hw/virtio/Makefile.objs
> @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y)
>   common-obj-y += virtio-bus.o
>   obj-y += virtio.o
>   
> -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o
> +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o vhost-backend.o vhost-vdpa.o
>   common-obj-$(call lnot,$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
>   obj-$(CONFIG_VHOST_USER) += vhost-user.o
>   
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 48905383f8..935cd9e561 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
>           dev->vhost_ops = &user_ops;
>           break;
>   #endif
> +    case VHOST_BACKEND_TYPE_VDPA:
> +        dev->vhost_ops = &vdpa_ops;
> +        break;
>       default:
>           error_report("Unknown vhost backend type");
>           r = -1;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> new file mode 100644
> index 0000000000..213b327600
> --- /dev/null
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -0,0 +1,379 @@
> +/*
> + * vhost-vdpa
> + *
> + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> + *  Copyright(c) 2020 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include <linux/vhost.h>
> +#include <linux/vfio.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +#include "hw/virtio/vhost.h"
> +#include "hw/virtio/vhost-backend.h"
> +#include "hw/virtio/virtio-net.h"
> +#include "hw/virtio/vhost-vdpa.h"
> +#include "qemu/main-loop.h"
> +#include <linux/kvm.h>
> +#include "sysemu/kvm.h"
> +
> +
> +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
> +{
> +    return (!memory_region_is_ram(section->mr) &&
> +            !memory_region_is_iommu(section->mr)) ||
> +           /*
> +            * Sizing an enabled 64-bit BAR can cause spurious mappings to
> +            * addresses in the upper part of the 64-bit address space.  These
> +            * are never accessed by the CPU and beyond the address width of
> +            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
> +            */
> +           section->offset_within_address_space & (1ULL << 63);
> +}
> +
> +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
> +                              void *vaddr, bool readonly)
> +{
> +    struct vhost_msg_v2 msg;
> +    int fd = v->device_fd;
> +    int ret = 0;
> +
> +    msg.type = VHOST_IOTLB_MSG_V2;


Since V2 of the message is used here, I believe we need a kernel patch 
to allow the querying of backend capability.


> +    msg.iotlb.iova = iova;
> +    msg.iotlb.size = size;
> +    msg.iotlb.uaddr = (uint64_t)vaddr;
> +    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
> +    msg.iotlb.type = VHOST_IOTLB_UPDATE;
> +
> +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> +        error_report("failed to write, fd=%d, errno=%d (%s)",
> +            fd, errno, strerror(errno));
> +        return -EIO ;
> +    }
> +
> +    return ret;
> +}
> +
> +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> +                                hwaddr size)
> +{
> +    struct vhost_msg_v2 msg;
> +    int fd = v->device_fd;
> +    int ret = 0;
> +
> +    msg.type = VHOST_IOTLB_MSG_V2;
> +    msg.iotlb.iova = iova;
> +    msg.iotlb.size = size;
> +    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
> +
> +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> +        error_report("failed to write, fd=%d, errno=%d (%s)",
> +            fd, errno, strerror(errno));
> +        return -EIO ;
> +    }
> +
> +    return ret;
> +}
> +
> +static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +    hwaddr iova;
> +    Int128 llend, llsize;
> +    void *vaddr;
> +    int ret;
> +
> +    if (vhost_vdpa_listener_skipped_section(section)) {
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    memory_region_ref(section->mr);
> +
> +    /* Here we assume that memory_region_is_ram(section->mr)==true */
> +
> +    vaddr = memory_region_get_ram_ptr(section->mr) +
> +            section->offset_within_region +
> +            (iova - section->offset_within_address_space);
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> +                             vaddr, section->readonly);
> +    if (ret) {
> +        error_report("vhost vdpa map fail!");
> +        if (memory_region_is_ram_device(section->mr)) {
> +            /* Allow unexpected mappings not to be fatal for RAM devices */
> +            error_report("map ram fail!");
> +          return ;
> +        }
> +        goto fail;
> +    }
> +
> +    return;
> +
> +fail:
> +    if (memory_region_is_ram_device(section->mr)) {
> +        error_report("failed to vdpa_dma_map. pci p2p may not work");
> +        return;
> +
> +    }
> +    /*
> +     * On the initfn path, store the first error in the container so we
> +     * can gracefully fail.  Runtime, there's not much we can do other
> +     * than throw a hardware error.
> +     */
> +    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
> +    return;
> +
> +}
> +
> +static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +    hwaddr iova;
> +    Int128 llend, llsize;
> +    int ret;
> +    bool try_unmap = true;
> +
> +    if (vhost_vdpa_listener_skipped_section(section)) {
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    llsize = int128_sub(llend, int128_make64(iova));
> +
> +    if (try_unmap) {
> +        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> +        if (ret) {
> +            error_report("vhost_vdpa dma unmap error!");
> +        }
> +    }
> +
> +    memory_region_unref(section->mr);
> +}
> +


I think it's better to add comment to explain why vhost-vdpa use a 
different listener other than the one used by other vhost backends (e.g 
kernel or user).


> +static const MemoryListener vhost_vdpa_memory_listener = {
> +    .region_add = vhost_vdpa_listener_region_add,
> +    .region_del = vhost_vdpa_listener_region_del,
> +};
> +
> +
> +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
> +                             void *arg)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    int fd = v->device_fd;
> +
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    return ioctl(fd, request, arg);
> +}
> +
> +
> +
> +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
> +{
> +    struct vhost_vdpa *v;
> +
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    v = opaque;
> +    dev->opaque =  opaque ;
> +
> +    v->listener = vhost_vdpa_memory_listener;
> +    memory_listener_register(&v->listener, &address_space_memory);
> +
> +    return 0;
> +}
> +
> +static int vhost_vdpa_cleanup(struct vhost_dev *dev)
> +{
> +    struct vhost_vdpa *v;
> +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> +
> +    v = dev->opaque;
> +    memory_listener_unregister(&v->listener);
> +
> +    dev->opaque = NULL;
> +    return 0;
> +}
> +


A comment here is need to explain why INT_MAX is used.


> +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
> +{
> +    return INT_MAX;
> +}
> +
> +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> +                                   struct vhost_log *log)
> +{
> +    return 0;
> +}


I think we should fail this function since we don't support dirty page 
tracking now.

And it's not guarantee to use dirty page bitmap in the future.


> +
> +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
> +                                    struct vhost_memory *mem)
> +{
> +
> +    if (mem->padding) {
> +        return -1;
> +    }
> +
> +    return 0;


A comment is need to explain why mem table is not used. (E.g we used 
IOTLB API instead).


> +}
> +
> +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
> +                                     struct vhost_vring_addr *addr)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
> +}
> +
> +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> +                                    struct vhost_vring_state *ring)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
> +}
> +
> +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> +                                     struct vhost_vring_state *ring)
> +{
> +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> +}
> +
> +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> +                                     struct vhost_vring_state *ring)
> +{
> +
> +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> +}
> +
> +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
> +                                     struct vhost_vring_file *file)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
> +}
> +
> +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
> +                                     struct vhost_vring_file *file)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
> +}
> +
> +static int vhost_vdpa_set_features(struct vhost_dev *dev,
> +                                   uint64_t features)
> +{
> +
> +    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);


This seems tricky, I don't think we need this actually.


> +    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
> +
> +}
> +
> +static int vhost_vdpa_get_features(struct vhost_dev *dev,
> +                                   uint64_t *features)
> +{
> +    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
> +}
> +
> +static int vhost_vdpa_set_owner(struct vhost_dev *dev)
> +{
> +    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
> +}
> +
> +static int vhost_vdpa_reset_device(struct vhost_dev *dev)
> +{
> +    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
> +}
> +
> +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
> +{
> +    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
> +
> +    return idx - dev->vq_index;
> +}
> +
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +    int i;
> +
> +    for (i = 0; i < dev->nvqs; ++i) {
> +        struct vhost_vring_state state = {
> +            .index = dev->vq_index + i,
> +            .num   = enable,
> +        };
> +
> +        state.num = 1;
> +
> +        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);


Please make sure patch 4 comes first then we don't need to fix this in 
patch 4.


> +    }
> +
> +    return 0;
> +}
> +
> +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
> +{
> +    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
> +}
> +
> +
> +const VhostOps vdpa_ops = {
> +        .backend_type = VHOST_BACKEND_TYPE_VDPA,
> +        .vhost_backend_init = vhost_vdpa_init,
> +        .vhost_backend_cleanup = vhost_vdpa_cleanup,
> +        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
> +        .vhost_set_log_base = vhost_vdpa_set_log_base,
> +        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
> +        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
> +        .vhost_set_vring_endian = NULL,
> +        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
> +        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
> +        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
> +        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
> +        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
> +        .vhost_set_features = vhost_vdpa_set_features,
> +        .vhost_get_features = vhost_vdpa_get_features,
> +        .vhost_set_owner = vhost_vdpa_set_owner,
> +        .vhost_reset_device = vhost_vdpa_reset_device,
> +        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> +        .vhost_requires_shm_log = NULL,
> +        .vhost_migration_done = NULL,
> +        .vhost_backend_can_merge = NULL,
> +        .vhost_net_set_mtu = NULL,
> +        .vhost_set_iotlb_callback = NULL,
> +        .vhost_send_device_iotlb_msg = NULL,
> +        .vhost_set_state = vhost_vdpa_set_state,
> +};
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 4da0d5a6c5..d1f2c4add7 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
>           .log_guest_addr = vq->used_phys,
>           .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
>       };
> +    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
> +        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
> +        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
> +        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
> +    }


Comment is needed to explain why vDPA differs from others.

Thanks


>       int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
>       if (r < 0) {
>           VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 6f6670783f..d81bd9885f 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
>       VHOST_BACKEND_TYPE_NONE = 0,
>       VHOST_BACKEND_TYPE_KERNEL = 1,
>       VHOST_BACKEND_TYPE_USER = 2,
> -    VHOST_BACKEND_TYPE_MAX = 3,
> +    VHOST_BACKEND_TYPE_VDPA = 3,
> +    VHOST_BACKEND_TYPE_MAX = 4,
>   } VhostBackendType;
>   
>   typedef enum VhostSetConfigType {
> @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
>   typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
>                                           struct vhost_inflight *inflight);
>   
> +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
>   typedef struct VhostOps {
>       VhostBackendType backend_type;
>       vhost_backend_init vhost_backend_init;
> @@ -152,9 +154,11 @@ typedef struct VhostOps {
>       vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
>       vhost_get_inflight_fd_op vhost_get_inflight_fd;
>       vhost_set_inflight_fd_op vhost_set_inflight_fd;
> +    vhost_set_state_op vhost_set_state;
>   } VhostOps;
>   
>   extern const VhostOps user_ops;
> +extern const VhostOps vdpa_ops;
>   
>   int vhost_set_backend_type(struct vhost_dev *dev,
>                              VhostBackendType backend_type);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> new file mode 100644
> index 0000000000..889c1a4410
> --- /dev/null
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -0,0 +1,14 @@
> +
> +#ifndef HW_VIRTIO_VHOST_VDPA_H
> +#define HW_VIRTIO_VHOST_VDPA_H
> +
> +#include "hw/virtio/virtio.h"
> +
> +typedef struct vhost_vdpa {
> +    int device_fd;
> +    MemoryListener listener;
> +} VhostVDPA;
> +
> +extern AddressSpace address_space_memory;
> +
> +#endif



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method
  2020-04-20  9:32 ` [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method Cindy Lu
@ 2020-04-21  3:59   ` Jason Wang
  2020-04-21  8:42     ` Cindy Lu
  0 siblings, 1 reply; 28+ messages in thread
From: Jason Wang @ 2020-04-21  3:59 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, hanand, hch, eperezma,
	jgg, shahafs, kevin.tian, parav, vmireyno, cunming.liang, gdawar,
	jiri, xiao.w.wang, stefanha, zhihong.wang, aadam, rdunlap,
	maxime.coquelin, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> From: Jason Wang <jasowang@redhat.com>
>
> Vhost-vdpa introduces VHOST_VDPA_SET_VRING_ENABLE which complies the
> semantic of queue_enable defined in virtio spec. This method can be
> used for preventing device from executing request for a specific
> virtqueue. This patch introduces the vhost_ops for this.
>
> Note that, we've already had vhost_set_vring_enable which has different
> semantic which allows to enable or disable a specific virtqueue for
> some kinds of vhost backends. E.g vhost-user use this to changes the
> number of active queue pairs.


This patch seems to mix fours things, please use dedicated patches for:

1) introduce queue_enabled method for virtio-bus
2) implement queue_enabled for virtio-pci
3) introduce vhost_set_vring_ready method for vhost ops
4) implement vhost_set_vring_ready for vdpa (need to be squashed into 
the patch of vhost-vdpa).

Thanks


>
> Author: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>   hw/net/vhost_net-stub.c           |  5 +++++
>   hw/net/vhost_net.c                | 16 ++++++++++++++++
>   hw/virtio/vhost-vdpa.c            |  9 +++------
>   hw/virtio/virtio-pci.c            | 13 +++++++++++++
>   hw/virtio/virtio.c                |  6 ++++++
>   include/hw/virtio/vhost-backend.h |  2 ++
>   include/hw/virtio/virtio-bus.h    |  4 ++++
>   include/net/vhost_net.h           |  1 +
>   8 files changed, 50 insertions(+), 6 deletions(-)
>
> diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
> index aac0e98228..f5ef1e3055 100644
> --- a/hw/net/vhost_net-stub.c
> +++ b/hw/net/vhost_net-stub.c
> @@ -86,6 +86,11 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
>       return 0;
>   }
>   
> +int vhost_set_vring_ready(NetClientState *nc)
> +{
> +    return 0;
> +}
> +
>   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>   {
>       return 0;
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 0d13fda2fc..463e333531 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -380,6 +380,10 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>                   goto err_start;
>               }
>           }
> +
> +        if (virtio_queue_enabled(dev, i)) {
> +            vhost_set_vring_ready(peer);
> +        }
>       }
>   
>       return 0;
> @@ -487,6 +491,18 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
>       return 0;
>   }
>   
> +int vhost_set_vring_ready(NetClientState *nc)
> +{
> +    VHostNetState *net = get_vhost_net(nc);
> +    const VhostOps *vhost_ops = net->dev.vhost_ops;
> +
> +    if (vhost_ops && vhost_ops->vhost_set_vring_ready) {
> +        return vhost_ops->vhost_set_vring_ready(&net->dev);
> +    }
> +
> +    return 0;
> +}
> +
>   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>   {
>       const VhostOps *vhost_ops = net->dev.vhost_ops;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 213b327600..49224ef9f8 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -325,18 +325,15 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
>       return idx - dev->vq_index;
>   }
>   
> -static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
>   {
>       int i;
>   
>       for (i = 0; i < dev->nvqs; ++i) {
>           struct vhost_vring_state state = {
>               .index = dev->vq_index + i,
> -            .num   = enable,
> +            .num = 1,
>           };
> -
> -        state.num = 1;
> -
>           vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>       }
>   
> @@ -368,7 +365,7 @@ const VhostOps vdpa_ops = {
>           .vhost_set_owner = vhost_vdpa_set_owner,
>           .vhost_reset_device = vhost_vdpa_reset_device,
>           .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> -        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> +        .vhost_set_vring_ready = vhost_vdpa_set_vring_ready,
>           .vhost_requires_shm_log = NULL,
>           .vhost_migration_done = NULL,
>           .vhost_backend_can_merge = NULL,
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index c6b47a9c73..4aaf5d953e 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1103,6 +1103,18 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
>       return pci_get_address_space(dev);
>   }
>   
> +static bool virtio_pci_queue_enabled(DeviceState *d, int n)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +
> +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> +        return proxy->vqs[vdev->queue_sel].enabled;
> +    }
> +
> +    return virtio_queue_get_desc_addr(vdev, n) != 0;
> +}
> +
>   static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
>                                      struct virtio_pci_cap *cap)
>   {
> @@ -2053,6 +2065,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
>       k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
>       k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
>       k->get_dma_as = virtio_pci_get_dma_as;
> +    k->queue_enabled = virtio_pci_queue_enabled;
>   }
>   
>   static const TypeInfo virtio_pci_bus_info = {
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 04716b5f6c..09732a8836 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -3169,6 +3169,12 @@ hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
>   
>   bool virtio_queue_enabled(VirtIODevice *vdev, int n)
>   {
> +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> +
> +    if (k->queue_enabled)
> +        return k->queue_enabled(qbus->parent, n);
> +
>       return virtio_queue_get_desc_addr(vdev, n) != 0;
>   }
>   
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index d81bd9885f..ce8de6d308 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -78,6 +78,7 @@ typedef int (*vhost_reset_device_op)(struct vhost_dev *dev);
>   typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx);
>   typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
>                                            int enable);
> +typedef int (*vhost_set_vring_ready_op)(struct vhost_dev *dev);
>   typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
>   typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
>                                          char *mac_addr);
> @@ -140,6 +141,7 @@ typedef struct VhostOps {
>       vhost_reset_device_op vhost_reset_device;
>       vhost_get_vq_index_op vhost_get_vq_index;
>       vhost_set_vring_enable_op vhost_set_vring_enable;
> +    vhost_set_vring_ready_op vhost_set_vring_ready;
>       vhost_requires_shm_log_op vhost_requires_shm_log;
>       vhost_migration_done_op vhost_migration_done;
>       vhost_backend_can_merge_op vhost_backend_can_merge;
> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> index 38c9399cd4..0f6f215925 100644
> --- a/include/hw/virtio/virtio-bus.h
> +++ b/include/hw/virtio/virtio-bus.h
> @@ -83,6 +83,10 @@ typedef struct VirtioBusClass {
>        */
>       int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier,
>                               int n, bool assign);
> +    /*
> +     * Whether queue number n is enabled.
> +     */
> +    bool (*queue_enabled)(DeviceState *d, int n);
>       /*
>        * Does the transport have variable vring alignment?
>        * (ie can it ever call virtio_queue_set_align()?)
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index 6f3a624cf7..db473ff4d2 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -35,6 +35,7 @@ int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
>   VHostNetState *get_vhost_net(NetClientState *nc);
>   
>   int vhost_set_vring_enable(NetClientState * nc, int enable);
> +int vhost_set_vring_ready(NetClientState * nc);
>   
>   uint64_t vhost_net_get_acked_features(VHostNetState *net);
>   



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 0/4] vDPA support in qemu
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
                   ` (3 preceding siblings ...)
  2020-04-20  9:32 ` [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method Cindy Lu
@ 2020-04-21  4:03 ` Jason Wang
  2020-04-21  4:05 ` Jason Wang
  5 siblings, 0 replies; 28+ messages in thread
From: Jason Wang @ 2020-04-21  4:03 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> vDPA device is a device that uses a datapath which complies with the
> virtio specifications with vendor specific control path. vDPA devices
> can be both physically located on the hardware or emulated by software.
> This RFC introduce the vDPA support in qemu


Looks good overall, see comments inline.

And I think we need reorder the patches and split some patches into 
smaller ones.

E.g

1) qemu_get_peer
2) queue_enabled method for virtio-bus
3) queue_enabled for virtio-pci
4) set_vring_ready for vhost_ops
5) vhost_set_state for vhost_ops
6) call vhost_set_state in virtio_net_vhost_status()
7) generic vdpa support
8) vhost-vdpa net backend

Thanks


>
> Cindy Lu (3):
>    net: Introduce qemu_get_peer
>    vhost-vdpa: introduce vhost-vdpa net client
>    vhost-vdpa: implement vhost-vdpa backend
>
> Jason Wang (1):
>    vhost: introduce vhost_set_vring_ready method
>
>   hw/net/vhost_net-stub.c           |   5 +
>   hw/net/vhost_net.c                |  75 +++++-
>   hw/net/virtio-net.c               |   9 +
>   hw/virtio/Makefile.objs           |   2 +-
>   hw/virtio/vhost-backend.c         |   3 +
>   hw/virtio/vhost-vdpa.c            | 376 ++++++++++++++++++++++++++++++
>   hw/virtio/vhost.c                 |   5 +
>   hw/virtio/virtio-pci.c            |  13 ++
>   hw/virtio/virtio.c                |   6 +
>   include/hw/virtio/vhost-backend.h |   8 +-
>   include/hw/virtio/vhost-vdpa.h    |  14 ++
>   include/hw/virtio/virtio-bus.h    |   4 +
>   include/net/net.h                 |   1 +
>   include/net/vhost-vdpa.h          |  18 ++
>   include/net/vhost_net.h           |   2 +
>   net/Makefile.objs                 |   2 +-
>   net/clients.h                     |   2 +
>   net/net.c                         |   7 +
>   net/vhost-vdpa.c                  | 211 +++++++++++++++++
>   qapi/net.json                     |  21 +-
>   20 files changed, 773 insertions(+), 11 deletions(-)
>   create mode 100644 hw/virtio/vhost-vdpa.c
>   create mode 100644 include/hw/virtio/vhost-vdpa.h
>   create mode 100644 include/net/vhost-vdpa.h
>   create mode 100644 net/vhost-vdpa.c
>



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 0/4] vDPA support in qemu
  2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
                   ` (4 preceding siblings ...)
  2020-04-21  4:03 ` [RFC v1 0/4] vDPA support in qemu Jason Wang
@ 2020-04-21  4:05 ` Jason Wang
  2020-04-21  9:47   ` Cindy Lu
  5 siblings, 1 reply; 28+ messages in thread
From: Jason Wang @ 2020-04-21  4:05 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck
  Cc: mhabets, qemu-devel, rob.miller, saugatm, hanand, hch, eperezma,
	jgg, shahafs, kevin.tian, parav, vmireyno, cunming.liang, gdawar,
	jiri, xiao.w.wang, stefanha, zhihong.wang, aadam, rdunlap,
	maxime.coquelin, lingshan.zhu


On 2020/4/20 下午5:32, Cindy Lu wrote:
> vDPA device is a device that uses a datapath which complies with the
> virtio specifications with vendor specific control path. vDPA devices
> can be both physically located on the hardware or emulated by software.
> This RFC introduce the vDPA support in qemu


And please mention the TODO list. E.g:

1) vIOMMU support
2) software assisted live migration

Thanks



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 1/4] net: Introduce qemu_get_peer
  2020-04-21  3:23   ` Jason Wang
@ 2020-04-21  8:10     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-21  8:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: rdunlap, Michael Tsirkin, mhabets, qemu-devel, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, maxime.coquelin,
	Ariel Adam, cohuck, hanand, Zhu, Lingshan

[-- Attachment #1: Type: text/plain, Size: 4123 bytes --]

On Tue, Apr 21, 2020 at 11:23 AM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > This is a small function  that can get the peer from given
> NetClientState and queue_index
>
>
> Unnecessary space  between 'function' and 'that'.
>
>
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
>
>
> Please split this patch into two parts:
>
> 1) introduce the function
> 2) the actual user for this fucntion
>
>
> sure, I will fix this problem.

> > ---
> >   hw/net/vhost_net.c | 16 ++++++++++------
> >   include/net/net.h  |  1 +
> >   net/net.c          |  6 ++++++
> >   3 files changed, 17 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 6b82803fa7..4096d64aaf 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -306,7 +306,9 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >       BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
> >       VirtioBusState *vbus = VIRTIO_BUS(qbus);
> >       VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
> > +    struct vhost_net *net;
> >       int r, e, i;
> > +    NetClientState *peer;
> >
> >       if (!k->set_guest_notifiers) {
> >           error_report("binding does not support guest notifiers");
> > @@ -314,9 +316,9 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >       }
> >
> >       for (i = 0; i < total_queues; i++) {
> > -        struct vhost_net *net;
> >
> > -        net = get_vhost_net(ncs[i].peer);
> > +        peer = qemu_get_peer(ncs, i);
> > +        net = get_vhost_net(peer);
> >           vhost_net_set_vq_index(net, i * 2);
> >
> >           /* Suppress the masking guest notifiers on vhost user
> > @@ -335,15 +337,16 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >       }
> >
> >       for (i = 0; i < total_queues; i++) {
> > -        r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
> > +        peer = qemu_get_peer(ncs, i);
> > +        r = vhost_net_start_one(get_vhost_net(peer), dev);
> >
> >           if (r < 0) {
> >               goto err_start;
> >           }
> >
> > -        if (ncs[i].peer->vring_enable) {
> > +        if (peer->vring_enable) {
> >               /* restore vring enable state */
> > -            r = vhost_set_vring_enable(ncs[i].peer,
> ncs[i].peer->vring_enable);
> > +            r = vhost_set_vring_enable(peer, peer->vring_enable);
> >
> >               if (r < 0) {
> >                   goto err_start;
> > @@ -355,7 +358,8 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >
> >   err_start:
> >       while (--i >= 0) {
> > -        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> > +        peer = qemu_get_peer(ncs , i);
> > +        vhost_net_stop_one(get_vhost_net(peer), dev);
> >       }
> >       e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
> >       if (e < 0) {
> > diff --git a/include/net/net.h b/include/net/net.h
> > index e175ba9677..0a74324ccd 100644
> > --- a/include/net/net.h
> > +++ b/include/net/net.h
> > @@ -175,6 +175,7 @@ void hmp_info_network(Monitor *mon, const QDict
> *qdict);
> >   void net_socket_rs_init(SocketReadState *rs,
> >                           SocketReadStateFinalize *finalize,
> >                           bool vnet_hdr);
> > +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
> >
> >   /* NIC info */
> >
> > diff --git a/net/net.c b/net/net.c
> > index 84aa6d8d00..ac5080dda1 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -324,6 +324,12 @@ void *qemu_get_nic_opaque(NetClientState *nc)
> >
> >       return nic->opaque;
> >   }
> > +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index)
> > +{
> > +    NetClientState *ncs  =  nc + queue_index;
>
>
> Unnecessary space around '='.
>
> Thanks
>
>
> Will correct this

> +    assert(ncs != NULL);
> > +    return ncs->peer;
> > +}
> >
> >   static void qemu_cleanup_net_client(NetClientState *nc)
> >   {
>
>

[-- Attachment #2: Type: text/html, Size: 5681 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method
  2020-04-21  3:59   ` Jason Wang
@ 2020-04-21  8:42     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-21  8:42 UTC (permalink / raw)
  To: Jason Wang
  Cc: cohuck, Michael Tsirkin, mhabets, qemu-devel, hanand, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, Ariel Adam, rdunlap,
	maxime.coquelin, Zhu, Lingshan

[-- Attachment #1: Type: text/plain, Size: 9052 bytes --]

On Tue, Apr 21, 2020 at 12:00 PM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > From: Jason Wang <jasowang@redhat.com>
> >
> > Vhost-vdpa introduces VHOST_VDPA_SET_VRING_ENABLE which complies the
> > semantic of queue_enable defined in virtio spec. This method can be
> > used for preventing device from executing request for a specific
> > virtqueue. This patch introduces the vhost_ops for this.
> >
> > Note that, we've already had vhost_set_vring_enable which has different
> > semantic which allows to enable or disable a specific virtqueue for
> > some kinds of vhost backends. E.g vhost-user use this to changes the
> > number of active queue pairs.
>
>
> This patch seems to mix fours things, please use dedicated patches for:
>
> 1) introduce queue_enabled method for virtio-bus
> 2) implement queue_enabled for virtio-pci
> 3) introduce vhost_set_vring_ready method for vhost ops
> 4) implement vhost_set_vring_ready for vdpa (need to be squashed into
> the patch of vhost-vdpa).
>
> Thanks
>
> Sure, Will fix this

>
> >
> > Author: Jason Wang <jasowang@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >   hw/net/vhost_net-stub.c           |  5 +++++
> >   hw/net/vhost_net.c                | 16 ++++++++++++++++
> >   hw/virtio/vhost-vdpa.c            |  9 +++------
> >   hw/virtio/virtio-pci.c            | 13 +++++++++++++
> >   hw/virtio/virtio.c                |  6 ++++++
> >   include/hw/virtio/vhost-backend.h |  2 ++
> >   include/hw/virtio/virtio-bus.h    |  4 ++++
> >   include/net/vhost_net.h           |  1 +
> >   8 files changed, 50 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
> > index aac0e98228..f5ef1e3055 100644
> > --- a/hw/net/vhost_net-stub.c
> > +++ b/hw/net/vhost_net-stub.c
> > @@ -86,6 +86,11 @@ int vhost_set_vring_enable(NetClientState *nc, int
> enable)
> >       return 0;
> >   }
> >
> > +int vhost_set_vring_ready(NetClientState *nc)
> > +{
> > +    return 0;
> > +}
> > +
> >   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
> >   {
> >       return 0;
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 0d13fda2fc..463e333531 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -380,6 +380,10 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >                   goto err_start;
> >               }
> >           }
> > +
> > +        if (virtio_queue_enabled(dev, i)) {
> > +            vhost_set_vring_ready(peer);
> > +        }
> >       }
> >
> >       return 0;
> > @@ -487,6 +491,18 @@ int vhost_set_vring_enable(NetClientState *nc, int
> enable)
> >       return 0;
> >   }
> >
> > +int vhost_set_vring_ready(NetClientState *nc)
> > +{
> > +    VHostNetState *net = get_vhost_net(nc);
> > +    const VhostOps *vhost_ops = net->dev.vhost_ops;
> > +
> > +    if (vhost_ops && vhost_ops->vhost_set_vring_ready) {
> > +        return vhost_ops->vhost_set_vring_ready(&net->dev);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
> >   {
> >       const VhostOps *vhost_ops = net->dev.vhost_ops;
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 213b327600..49224ef9f8 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -325,18 +325,15 @@ static int vhost_vdpa_get_vq_index(struct
> vhost_dev *dev, int idx)
> >       return idx - dev->vq_index;
> >   }
> >
> > -static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int
> enable)
> > +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
> >   {
> >       int i;
> >
> >       for (i = 0; i < dev->nvqs; ++i) {
> >           struct vhost_vring_state state = {
> >               .index = dev->vq_index + i,
> > -            .num   = enable,
> > +            .num = 1,
> >           };
> > -
> > -        state.num = 1;
> > -
> >           vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
> >       }
> >
> > @@ -368,7 +365,7 @@ const VhostOps vdpa_ops = {
> >           .vhost_set_owner = vhost_vdpa_set_owner,
> >           .vhost_reset_device = vhost_vdpa_reset_device,
> >           .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > -        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > +        .vhost_set_vring_ready = vhost_vdpa_set_vring_ready,
> >           .vhost_requires_shm_log = NULL,
> >           .vhost_migration_done = NULL,
> >           .vhost_backend_can_merge = NULL,
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index c6b47a9c73..4aaf5d953e 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -1103,6 +1103,18 @@ static AddressSpace
> *virtio_pci_get_dma_as(DeviceState *d)
> >       return pci_get_address_space(dev);
> >   }
> >
> > +static bool virtio_pci_queue_enabled(DeviceState *d, int n)
> > +{
> > +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> > +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> > +
> > +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> > +        return proxy->vqs[vdev->queue_sel].enabled;
> > +    }
> > +
> > +    return virtio_queue_get_desc_addr(vdev, n) != 0;
> > +}
> > +
> >   static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
> >                                      struct virtio_pci_cap *cap)
> >   {
> > @@ -2053,6 +2065,7 @@ static void virtio_pci_bus_class_init(ObjectClass
> *klass, void *data)
> >       k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
> >       k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
> >       k->get_dma_as = virtio_pci_get_dma_as;
> > +    k->queue_enabled = virtio_pci_queue_enabled;
> >   }
> >
> >   static const TypeInfo virtio_pci_bus_info = {
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 04716b5f6c..09732a8836 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -3169,6 +3169,12 @@ hwaddr virtio_queue_get_desc_addr(VirtIODevice
> *vdev, int n)
> >
> >   bool virtio_queue_enabled(VirtIODevice *vdev, int n)
> >   {
> > +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> > +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> > +
> > +    if (k->queue_enabled)
> > +        return k->queue_enabled(qbus->parent, n);
> > +
> >       return virtio_queue_get_desc_addr(vdev, n) != 0;
> >   }
> >
> > diff --git a/include/hw/virtio/vhost-backend.h
> b/include/hw/virtio/vhost-backend.h
> > index d81bd9885f..ce8de6d308 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -78,6 +78,7 @@ typedef int (*vhost_reset_device_op)(struct vhost_dev
> *dev);
> >   typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx);
> >   typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
> >                                            int enable);
> > +typedef int (*vhost_set_vring_ready_op)(struct vhost_dev *dev);
> >   typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
> >   typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
> >                                          char *mac_addr);
> > @@ -140,6 +141,7 @@ typedef struct VhostOps {
> >       vhost_reset_device_op vhost_reset_device;
> >       vhost_get_vq_index_op vhost_get_vq_index;
> >       vhost_set_vring_enable_op vhost_set_vring_enable;
> > +    vhost_set_vring_ready_op vhost_set_vring_ready;
> >       vhost_requires_shm_log_op vhost_requires_shm_log;
> >       vhost_migration_done_op vhost_migration_done;
> >       vhost_backend_can_merge_op vhost_backend_can_merge;
> > diff --git a/include/hw/virtio/virtio-bus.h
> b/include/hw/virtio/virtio-bus.h
> > index 38c9399cd4..0f6f215925 100644
> > --- a/include/hw/virtio/virtio-bus.h
> > +++ b/include/hw/virtio/virtio-bus.h
> > @@ -83,6 +83,10 @@ typedef struct VirtioBusClass {
> >        */
> >       int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier,
> >                               int n, bool assign);
> > +    /*
> > +     * Whether queue number n is enabled.
> > +     */
> > +    bool (*queue_enabled)(DeviceState *d, int n);
> >       /*
> >        * Does the transport have variable vring alignment?
> >        * (ie can it ever call virtio_queue_set_align()?)
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index 6f3a624cf7..db473ff4d2 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -35,6 +35,7 @@ int vhost_net_notify_migration_done(VHostNetState
> *net, char* mac_addr);
> >   VHostNetState *get_vhost_net(NetClientState *nc);
> >
> >   int vhost_set_vring_enable(NetClientState * nc, int enable);
> > +int vhost_set_vring_ready(NetClientState * nc);
> >
> >   uint64_t vhost_net_get_acked_features(VHostNetState *net);
> >
>
>

[-- Attachment #2: Type: text/html, Size: 11330 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-21  3:56   ` Jason Wang
@ 2020-04-21  9:12     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-21  9:12 UTC (permalink / raw)
  To: Jason Wang
  Cc: cohuck, Michael Tsirkin, mhabets, qemu-devel, hanand, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, Ariel Adam, rdunlap,
	maxime.coquelin, Zhu, Lingshan

[-- Attachment #1: Type: text/plain, Size: 25253 bytes --]

On Tue, Apr 21, 2020 at 11:57 AM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific
> configuration
> > interface for setting up a vhost HW accelerator, this patch set
> introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >    qemu-system-x86_64 -cpu host -enable-kvm \
> >      ......
> >    -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >    -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
>
>
> Actually, this part should belongs to patch 2.
>
> And we probably need to add a comment that vIOMMU is not supported right
> now.
>
>
> Will fix this problem

> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >   hw/net/vhost_net.c                |  43 ++++
> >   hw/net/virtio-net.c               |   9 +
> >   hw/virtio/Makefile.objs           |   2 +-
> >   hw/virtio/vhost-backend.c         |   3 +
> >   hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >   hw/virtio/vhost.c                 |   5 +
> >   include/hw/virtio/vhost-backend.h |   6 +-
> >   include/hw/virtio/vhost-vdpa.h    |  14 ++
> >   8 files changed, 459 insertions(+), 2 deletions(-)
> >   create mode 100644 hw/virtio/vhost-vdpa.c
> >   create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -17,8 +17,10 @@
> >   #include "net/net.h"
> >   #include "net/tap.h"
> >   #include "net/vhost-user.h"
> > +#include "net/vhost-vdpa.h"
> >
> >   #include "standard-headers/linux/vhost_types.h"
> > +#include "linux-headers/linux/vhost.h"
> >   #include "hw/virtio/virtio-net.h"
> >   #include "net/vhost_net.h"
> >   #include "qemu/error-report.h"
> > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
> >       VHOST_INVALID_FEATURE_BIT
> >   };
> >
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_ANY_LAYOUT,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_NET_F_CSUM,
> > +    VIRTIO_NET_F_GUEST_CSUM,
> > +    VIRTIO_NET_F_GSO,
> > +    VIRTIO_NET_F_GUEST_TSO4,
> > +    VIRTIO_NET_F_GUEST_TSO6,
> > +    VIRTIO_NET_F_GUEST_ECN,
> > +    VIRTIO_NET_F_GUEST_UFO,
> > +    VIRTIO_NET_F_HOST_TSO4,
> > +    VIRTIO_NET_F_HOST_TSO6,
> > +    VIRTIO_NET_F_HOST_ECN,
> > +    VIRTIO_NET_F_HOST_UFO,
> > +    VIRTIO_NET_F_MRG_RXBUF,
> > +    VIRTIO_NET_F_MTU,
> > +    VIRTIO_F_IOMMU_PLATFORM,
> > +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
> >   static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >   {
> >       const int *feature_bits = 0;
> > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct
> vhost_net *net)
> >       case NET_CLIENT_DRIVER_VHOST_USER:
> >           feature_bits = user_feature_bits;
> >           break;
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        feature_bits = vdpa_feature_bits;
> > +        break;
> >       default:
> >           error_report("Feature bits not defined for this type: %d",
> >                   net->nc->info->type);
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >           assert(vhost_net);
> >           break;
> >   #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
> >       default:
> >           break;
> >       }
> > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net,
> uint16_t mtu)
> >
> >       return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
> >   }
> > +int vhost_set_state(NetClientState *nc, int state)
> > +{
> > +    struct vhost_net *net = get_vhost_net(nc);
> > +    struct vhost_dev *hdev = &net->dev;
> > +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > +        if (hdev->vhost_ops->vhost_set_state) {
> > +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> > +             }
> > +        }
> > +    return 0;
> > +}
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index db3d7c38e6..bbecd7ab96 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -206,6 +206,9 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >       VirtIODevice *vdev = VIRTIO_DEVICE(n);
> >       NetClientState *nc = qemu_get_queue(n->nic);
> >       int queues = n->multiqueue ? n->max_queues : 1;
> > +    NetClientState *peer = nc->peer;
>
>
> qemu_get_peer()?
>
> will fix it

>
> > +    uint8_t status_set  = vdev->status ;
> > +    uint8_t vhost_started_pre = n->vhost_started;
> >
> >       if (!get_vhost_net(nc->peer)) {
> >           return;
> > @@ -245,6 +248,7 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >                   return;
> >               }
> >           }
> > +        status_set = status_set | VIRTIO_CONFIG_S_DRIVER_OK;
> >
> >           n->vhost_started = 1;
> >           r = vhost_net_start(vdev, n->nic->ncs, queues);
> > @@ -252,11 +256,16 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> >               error_report("unable to start vhost net: %d: "
> >                            "falling back on userspace virtio", -r);
> >               n->vhost_started = 0;
> > +            status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
> >           }
> >       } else {
> >           vhost_net_stop(vdev, n->nic->ncs, queues);
> > +        status_set = status_set & ~VIRTIO_CONFIG_S_DRIVER_OK;
> >           n->vhost_started = 0;
> >       }
> > +    if (vhost_started_pre != n->vhost_started) {
> > +            vhost_set_state(peer, status_set);
> > +    }
> >   }
>
>
> I think this deserves an independent patch.
>
>
> will fix it

>
> >
> >   static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
> > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> > index e2f70fbb89..17361d959e 100644
> > --- a/hw/virtio/Makefile.objs
> > +++ b/hw/virtio/Makefile.objs
> > @@ -2,7 +2,7 @@ ifeq ($(CONFIG_VIRTIO),y)
> >   common-obj-y += virtio-bus.o
> >   obj-y += virtio.o
> >
> > -obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o
> vhost-backend.o
> > +obj-$(call lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL)) += vhost.o
> vhost-backend.o vhost-vdpa.o
> >   common-obj-$(call lnot,$(call
> lor,$(CONFIG_VHOST_USER),$(CONFIG_VHOST_KERNEL))) += vhost-stub.o
> >   obj-$(CONFIG_VHOST_USER) += vhost-user.o
> >
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 48905383f8..935cd9e561 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -286,6 +286,9 @@ int vhost_set_backend_type(struct vhost_dev *dev,
> VhostBackendType backend_type)
> >           dev->vhost_ops = &user_ops;
> >           break;
> >   #endif
> > +    case VHOST_BACKEND_TYPE_VDPA:
> > +        dev->vhost_ops = &vdpa_ops;
> > +        break;
> >       default:
> >           error_report("Unknown vhost backend type");
> >           r = -1;
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > new file mode 100644
> > index 0000000000..213b327600
> > --- /dev/null
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -0,0 +1,379 @@
> > +/*
> > + * vhost-vdpa
> > + *
> > + *  Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> > + *  Copyright(c) 2020 Red Hat, Inc.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include <linux/vhost.h>
> > +#include <linux/vfio.h>
> > +#include <sys/eventfd.h>
> > +#include <sys/ioctl.h>
> > +#include "hw/virtio/vhost.h"
> > +#include "hw/virtio/vhost-backend.h"
> > +#include "hw/virtio/virtio-net.h"
> > +#include "hw/virtio/vhost-vdpa.h"
> > +#include "qemu/main-loop.h"
> > +#include <linux/kvm.h>
> > +#include "sysemu/kvm.h"
> > +
> > +
> > +static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
> *section)
> > +{
> > +    return (!memory_region_is_ram(section->mr) &&
> > +            !memory_region_is_iommu(section->mr)) ||
> > +           /*
> > +            * Sizing an enabled 64-bit BAR can cause spurious mappings
> to
> > +            * addresses in the upper part of the 64-bit address space.
> These
> > +            * are never accessed by the CPU and beyond the address
> width of
> > +            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU
> width.
> > +            */
> > +           section->offset_within_address_space & (1ULL << 63);
> > +}
> > +
> > +static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr
> size,
> > +                              void *vaddr, bool readonly)
> > +{
> > +    struct vhost_msg_v2 msg;
> > +    int fd = v->device_fd;
> > +    int ret = 0;
> > +
> > +    msg.type = VHOST_IOTLB_MSG_V2;
>
>
> Since V2 of the message is used here, I believe we need a kernel patch
> to allow the querying of backend capability.
>
> Sure, I will provide another patch for kernel

>
> > +    msg.iotlb.iova = iova;
> > +    msg.iotlb.size = size;
> > +    msg.iotlb.uaddr = (uint64_t)vaddr;
> > +    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
> > +    msg.iotlb.type = VHOST_IOTLB_UPDATE;
> > +
> > +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> > +        error_report("failed to write, fd=%d, errno=%d (%s)",
> > +            fd, errno, strerror(errno));
> > +        return -EIO ;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
> > +                                hwaddr size)
> > +{
> > +    struct vhost_msg_v2 msg;
> > +    int fd = v->device_fd;
> > +    int ret = 0;
> > +
> > +    msg.type = VHOST_IOTLB_MSG_V2;
> > +    msg.iotlb.iova = iova;
> > +    msg.iotlb.size = size;
> > +    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
> > +
> > +    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
> > +        error_report("failed to write, fd=%d, errno=%d (%s)",
> > +            fd, errno, strerror(errno));
> > +        return -EIO ;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> > +                                           MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa,
> listener);
> > +    hwaddr iova;
> > +    Int128 llend, llsize;
> > +    void *vaddr;
> > +    int ret;
> > +
> > +    if (vhost_vdpa_listener_skipped_section(section)) {
> > +        return;
> > +    }
> > +
> > +    if (unlikely((section->offset_within_address_space &
> ~TARGET_PAGE_MASK) !=
> > +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > +        error_report("%s received unaligned region", __func__);
> > +        return;
> > +    }
> > +
> > +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > +    llend = int128_make64(section->offset_within_address_space);
> > +    llend = int128_add(llend, section->size);
> > +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> > +
> > +    if (int128_ge(int128_make64(iova), llend)) {
> > +        return;
> > +    }
> > +
> > +    memory_region_ref(section->mr);
> > +
> > +    /* Here we assume that memory_region_is_ram(section->mr)==true */
> > +
> > +    vaddr = memory_region_get_ram_ptr(section->mr) +
> > +            section->offset_within_region +
> > +            (iova - section->offset_within_address_space);
> > +
> > +    llsize = int128_sub(llend, int128_make64(iova));
> > +
> > +    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> > +                             vaddr, section->readonly);
> > +    if (ret) {
> > +        error_report("vhost vdpa map fail!");
> > +        if (memory_region_is_ram_device(section->mr)) {
> > +            /* Allow unexpected mappings not to be fatal for RAM
> devices */
> > +            error_report("map ram fail!");
> > +          return ;
> > +        }
> > +        goto fail;
> > +    }
> > +
> > +    return;
> > +
> > +fail:
> > +    if (memory_region_is_ram_device(section->mr)) {
> > +        error_report("failed to vdpa_dma_map. pci p2p may not work");
> > +        return;
> > +
> > +    }
> > +    /*
> > +     * On the initfn path, store the first error in the container so we
> > +     * can gracefully fail.  Runtime, there's not much we can do other
> > +     * than throw a hardware error.
> > +     */
> > +    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
> > +    return;
> > +
> > +}
> > +
> > +static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> > +                                           MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa,
> listener);
> > +    hwaddr iova;
> > +    Int128 llend, llsize;
> > +    int ret;
> > +    bool try_unmap = true;
> > +
> > +    if (vhost_vdpa_listener_skipped_section(section)) {
> > +        return;
> > +    }
> > +
> > +    if (unlikely((section->offset_within_address_space &
> ~TARGET_PAGE_MASK) !=
> > +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > +        error_report("%s received unaligned region", __func__);
> > +        return;
> > +    }
> > +
> > +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > +    llend = int128_make64(section->offset_within_address_space);
> > +    llend = int128_add(llend, section->size);
> > +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> > +
> > +    if (int128_ge(int128_make64(iova), llend)) {
> > +        return;
> > +    }
> > +
> > +    llsize = int128_sub(llend, int128_make64(iova));
> > +
> > +    if (try_unmap) {
> > +        ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
> > +        if (ret) {
> > +            error_report("vhost_vdpa dma unmap error!");
> > +        }
> > +    }
> > +
> > +    memory_region_unref(section->mr);
> > +}
> > +
>
>
> I think it's better to add comment to explain why vhost-vdpa use a
> different listener other than the one used by other vhost backends (e.g
> kernel or user).
>
> will fix it

>
> > +static const MemoryListener vhost_vdpa_memory_listener = {
> > +    .region_add = vhost_vdpa_listener_region_add,
> > +    .region_del = vhost_vdpa_listener_region_del,
> > +};
> > +
> > +
> > +static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int
> request,
> > +                             void *arg)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +    int fd = v->device_fd;
> > +
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    return ioctl(fd, request, arg);
> > +}
> > +
> > +
> > +
> > +static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
> > +{
> > +    struct vhost_vdpa *v;
> > +
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    v = opaque;
> > +    dev->opaque =  opaque ;
> > +
> > +    v->listener = vhost_vdpa_memory_listener;
> > +    memory_listener_register(&v->listener, &address_space_memory);
> > +
> > +    return 0;
> > +}
> > +
> > +static int vhost_vdpa_cleanup(struct vhost_dev *dev)
> > +{
> > +    struct vhost_vdpa *v;
> > +    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
> > +
> > +    v = dev->opaque;
> > +    memory_listener_unregister(&v->listener);
> > +
> > +    dev->opaque = NULL;
> > +    return 0;
> > +}
> > +
>
>
> A comment here is need to explain why INT_MAX is used.
>
>
> will do

> > +static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
> > +{
> > +    return INT_MAX;
> > +}
> > +
> > +static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> > +                                   struct vhost_log *log)
> > +{
> > +    return 0;
> > +}
>
>
> I think we should fail this function since we don't support dirty page
> tracking now.
>
> And it's not guarantee to use dirty page bitmap in the future.
>
>
> > +
> > +static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
> > +                                    struct vhost_memory *mem)
> > +{
> > +
> > +    if (mem->padding) {
> > +        return -1;
> > +    }
> > +
> > +    return 0;
>
>
> A comment is need to explain why mem table is not used. (E.g we used
> IOTLB API instead).
>
> will do

>
> > +}
> > +
> > +static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
> > +                                     struct vhost_vring_addr *addr)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
> > +                                    struct vhost_vring_state *ring)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> > +                                     struct vhost_vring_state *ring)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> > +}
> > +
> > +static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> > +                                     struct vhost_vring_state *ring)
> > +{
> > +
> > +    return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
> > +                                     struct vhost_vring_file *file)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
> > +}
> > +
> > +static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
> > +                                     struct vhost_vring_file *file)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
> > +}
> > +
> > +static int vhost_vdpa_set_features(struct vhost_dev *dev,
> > +                                   uint64_t features)
> > +{
> > +
> > +    features |= (1ULL << VIRTIO_F_IOMMU_PLATFORM);
>
>
> This seems tricky, I don't think we need this actually.
>
>
> I will double check for this problem

> > +    return vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
> > +
> > +}
> > +
> > +static int vhost_vdpa_get_features(struct vhost_dev *dev,
> > +                                   uint64_t *features)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
> > +}
> > +
> > +static int vhost_vdpa_set_owner(struct vhost_dev *dev)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
> > +}
> > +
> > +static int vhost_vdpa_reset_device(struct vhost_dev *dev)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_RESET_OWNER, NULL);
> > +}
> > +
> > +static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
> > +{
> > +    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
> > +
> > +    return idx - dev->vq_index;
> > +}
> > +
> > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int
> enable)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < dev->nvqs; ++i) {
> > +        struct vhost_vring_state state = {
> > +            .index = dev->vq_index + i,
> > +            .num   = enable,
> > +        };
> > +
> > +        state.num = 1;
> > +
> > +        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>
>
> Please make sure patch 4 comes first then we don't need to fix this in
> patch 4.
>
> will do

>
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vhost_vdpa_set_state(struct vhost_dev *dev, int state)
> > +{
> > +    return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &state);
> > +}
> > +
> > +
> > +const VhostOps vdpa_ops = {
> > +        .backend_type = VHOST_BACKEND_TYPE_VDPA,
> > +        .vhost_backend_init = vhost_vdpa_init,
> > +        .vhost_backend_cleanup = vhost_vdpa_cleanup,
> > +        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
> > +        .vhost_set_log_base = vhost_vdpa_set_log_base,
> > +        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
> > +        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
> > +        .vhost_set_vring_endian = NULL,
> > +        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
> > +        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
> > +        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
> > +        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
> > +        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
> > +        .vhost_set_features = vhost_vdpa_set_features,
> > +        .vhost_get_features = vhost_vdpa_get_features,
> > +        .vhost_set_owner = vhost_vdpa_set_owner,
> > +        .vhost_reset_device = vhost_vdpa_reset_device,
> > +        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > +        .vhost_requires_shm_log = NULL,
> > +        .vhost_migration_done = NULL,
> > +        .vhost_backend_can_merge = NULL,
> > +        .vhost_net_set_mtu = NULL,
> > +        .vhost_set_iotlb_callback = NULL,
> > +        .vhost_send_device_iotlb_msg = NULL,
> > +        .vhost_set_state = vhost_vdpa_set_state,
> > +};
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 4da0d5a6c5..d1f2c4add7 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -746,6 +746,11 @@ static int vhost_virtqueue_set_addr(struct
> vhost_dev *dev,
> >           .log_guest_addr = vq->used_phys,
> >           .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
> >       };
> > +    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA) {
> > +        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
> > +        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
> > +        addr.used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
> > +    }
>
>
> Comment is needed to explain why vDPA differs from others.
>
> Thanks
>
> will do

>
> >       int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
> >       if (r < 0) {
> >           VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
> > diff --git a/include/hw/virtio/vhost-backend.h
> b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..d81bd9885f 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
> >       VHOST_BACKEND_TYPE_NONE = 0,
> >       VHOST_BACKEND_TYPE_KERNEL = 1,
> >       VHOST_BACKEND_TYPE_USER = 2,
> > -    VHOST_BACKEND_TYPE_MAX = 3,
> > +    VHOST_BACKEND_TYPE_VDPA = 3,
> > +    VHOST_BACKEND_TYPE_MAX = 4,
> >   } VhostBackendType;
> >
> >   typedef enum VhostSetConfigType {
> > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct
> vhost_dev *dev,
> >   typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
> >                                           struct vhost_inflight
> *inflight);
> >
> > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
> >   typedef struct VhostOps {
> >       VhostBackendType backend_type;
> >       vhost_backend_init vhost_backend_init;
> > @@ -152,9 +154,11 @@ typedef struct VhostOps {
> >       vhost_backend_mem_section_filter_op
> vhost_backend_mem_section_filter;
> >       vhost_get_inflight_fd_op vhost_get_inflight_fd;
> >       vhost_set_inflight_fd_op vhost_set_inflight_fd;
> > +    vhost_set_state_op vhost_set_state;
> >   } VhostOps;
> >
> >   extern const VhostOps user_ops;
> > +extern const VhostOps vdpa_ops;
> >
> >   int vhost_set_backend_type(struct vhost_dev *dev,
> >                              VhostBackendType backend_type);
> > diff --git a/include/hw/virtio/vhost-vdpa.h
> b/include/hw/virtio/vhost-vdpa.h
> > new file mode 100644
> > index 0000000000..889c1a4410
> > --- /dev/null
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -0,0 +1,14 @@
> > +
> > +#ifndef HW_VIRTIO_VHOST_VDPA_H
> > +#define HW_VIRTIO_VHOST_VDPA_H
> > +
> > +#include "hw/virtio/virtio.h"
> > +
> > +typedef struct vhost_vdpa {
> > +    int device_fd;
> > +    MemoryListener listener;
> > +} VhostVDPA;
> > +
> > +extern AddressSpace address_space_memory;
> > +
> > +#endif
>
>

[-- Attachment #2: Type: text/html, Size: 32575 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-21  3:40   ` Jason Wang
@ 2020-04-21  9:46     ` Cindy Lu
  2020-04-21 15:06       ` Laurent Vivier
  0 siblings, 1 reply; 28+ messages in thread
From: Cindy Lu @ 2020-04-21  9:46 UTC (permalink / raw)
  To: Jason Wang
  Cc: rdunlap, Michael Tsirkin, mhabets, qemu-devel, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, maxime.coquelin,
	Ariel Adam, cohuck, hanand, Zhu, Lingshan

[-- Attachment #1: Type: text/plain, Size: 13138 bytes --]

On Tue, Apr 21, 2020 at 11:40 AM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > This patch set introduces a new net client type: vhost-vdpa.
> > vhost-vdpa net client will set up a vDPA device which is
> svhostdevpecified
>
>
> typo.
>
> Will fix this

>
> > by a "vhostdev" parameter.
> >
> > Author: Tiwei Bie
>
>
> Please keep the sobs from the original patch.
>
>
> Will fix this

> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >   include/net/vhost-vdpa.h |  18 ++++
> >   include/net/vhost_net.h  |   1 +
> >   net/Makefile.objs        |   2 +-
> >   net/clients.h            |   2 +
> >   net/net.c                |   1 +
> >   net/vhost-vdpa.c         | 211 +++++++++++++++++++++++++++++++++++++++
> >   qapi/net.json            |  21 +++-
> >   7 files changed, 253 insertions(+), 3 deletions(-)
> >   create mode 100644 include/net/vhost-vdpa.h
> >   create mode 100644 net/vhost-vdpa.c
>
>
> I think this patch should come after patch 3.
>
> And it's better to make this as a module like vhost-user.
>
> Got it, Will fix this

>
> >
> > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > new file mode 100644
> > index 0000000000..9ddd538dad
> > --- /dev/null
> > +++ b/include/net/vhost-vdpa.h
> > @@ -0,0 +1,18 @@
> > +/*
> > + * vhost-vdpa.h
> > + *
> > + * Copyright(c) 2017 Intel Corporation. All rights reserved.
>
>
> 2020 and please add Red Hat here as well.
>
>
> Will fix this

> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef VHOST_VDPA_H
> > +#define VHOST_VDPA_H
> > +
> > +struct vhost_net;
> > +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> > +
> > +#endif /* VHOST_VDPA_H */
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index 77e47398c4..6f3a624cf7 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -39,5 +39,6 @@ int vhost_set_vring_enable(NetClientState * nc, int
> enable);
> >   uint64_t vhost_net_get_acked_features(VHostNetState *net);
> >
> >   int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
> > +int vhost_set_state(NetClientState *nc, int state);
>
>
> This function is not used in this patch.
>
> Will fix this

>
> >
> >   #endif
> > diff --git a/net/Makefile.objs b/net/Makefile.objs
> > index c5d076d19c..da459cfc19 100644
> > --- a/net/Makefile.objs
> > +++ b/net/Makefile.objs
> > @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
> >   tap-obj-y ?= tap-stub.o
> >   common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
> >   common-obj-$(CONFIG_WIN32) += tap-win32.o
> > -
> > +common-obj-$(CONFIG_VHOST_KERNEL) += vhost-vdpa.o
> >   vde.o-libs = $(VDE_LIBS)
> >
> >   common-obj-$(CONFIG_CAN_BUS) += can/
> > diff --git a/net/clients.h b/net/clients.h
> > index a6ef267e19..92f9b59aed 100644
> > --- a/net/clients.h
> > +++ b/net/clients.h
> > @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char
> *name,
> >   int net_init_vhost_user(const Netdev *netdev, const char *name,
> >                           NetClientState *peer, Error **errp);
> >
> > +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> > +                        NetClientState *peer, Error **errp);
> >   #endif /* QEMU_NET_CLIENTS_H */
> > diff --git a/net/net.c b/net/net.c
> > index ac5080dda1..2beb95388a 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -964,6 +964,7 @@ static int (* const
> net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
> >           [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
> >   #ifdef CONFIG_VHOST_NET_USER
> >           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
> > +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
> >   #endif
> >   #ifdef CONFIG_L2TPV3
> >           [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > new file mode 100644
> > index 0000000000..5daeba0b76
> > --- /dev/null
> > +++ b/net/vhost-vdpa.c
> > @@ -0,0 +1,211 @@
> > +/*
> > + * vhost-vdpa.c
> > + *
> > + * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
> > + * Copyright(c) 2020 Red Hat, Inc.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "clients.h"
> > +#include "net/vhost_net.h"
> > +#include "net/vhost-vdpa.h"
> > +#include "hw/virtio/vhost-vdpa.h"
> > +#include "chardev/char-fe.h"
>
>
> I don't think we use charfe in this file.
>
>
> > +#include "qemu/config-file.h"
> > +#include "qemu/error-report.h"
> > +#include "qemu/option.h"
> > +#include "qapi/error.h"
> > +#include "trace.h"
>
>
> I think we don't use tracing in this file.
>
> Will fix this

>
> > +#include <linux/vfio.h>
> > +#include <sys/ioctl.h>
> > +#include <err.h>
> > +#include <linux/virtio_net.h>
> > +
> > +
> > +typedef struct VhostVDPAState {
> > +    NetClientState nc;
>
>
> This may not work for the case of multiqueue, you can either fix this or
> just leave a comment for TODO.
>
> Will fix this

>
> > +    struct vhost_vdpa vhost_vdpa;
>
>
> This explains why patch 3 should come first.
>
>
> > +    VHostNetState *vhost_net;
> > +    uint64_t acked_features;
> > +    bool started;
> > +} VhostVDPAState;
> > +
> > +VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    return s->vhost_net;
> > +}
> > +
> > +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    return s->acked_features;
> > +}
> > +
> > +static void vhost_vdpa_stop(NetClientState *ncs)
> > +{
> > +    VhostVDPAState *s;
> > +
> > +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> > +
> > +    if (s->vhost_net) {
> > +        /* save acked features */
> > +        uint64_t features = vhost_net_get_acked_features(s->vhost_net);
> > +        if (features) {
> > +            s->acked_features = features;
> > +         }
> > +        vhost_net_cleanup(s->vhost_net);
> > +    }
> > +}
> > +
> > +static int vhost_vdpa_start(NetClientState *ncs, void *be)
> > +{
>
>
> The name of this function is confusing, we don't start vhost_vdpa actually.
>
> ok Got it , I will change it

>
> > +    VhostNetOptions options;
> > +    struct vhost_net *net = NULL;
> > +    VhostVDPAState *s;
> > +
> > +    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
> > +
> > +    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, ncs);
> > +
> > +    options.net_backend = ncs;
> > +    options.opaque      = be;
> > +    options.busyloop_timeout = 0;
> > +    net = vhost_net_init(&options);
> > +    if (!net) {
> > +        error_report("failed to init vhost_net for queue");
> > +        goto err;
> > +     }
> > +
> > +     if (s->vhost_net) {
> > +        vhost_net_cleanup(s->vhost_net);
> > +        g_free(s->vhost_net);
> > +     }
> > +     s->vhost_net = net;
> > +
> > +    return 0;
> > +
> > +err:
> > +    if (net) {
> > +        vhost_net_cleanup(net);
> > +    }
> > +    vhost_vdpa_stop(ncs);
> > +    return -1;
> > +}
> > +static void vhost_vdpa_cleanup(NetClientState *nc)
> > +{
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +
> > +    if (s->vhost_net) {
> > +        vhost_net_cleanup(s->vhost_net);
> > +        g_free(s->vhost_net);
> > +        s->vhost_net = NULL;
> > +    }
> > +
> > +    qemu_purge_queued_packets(nc);
> > +}
> > +
> > +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
> > +{
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +
> > +    return true;
> > +}
> > +
> > +static bool vhost_vdpa_has_ufo(NetClientState *nc)
> > +{
> > +    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    uint64_t  features = 0;
> > +
> > +    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
> > +    features = vhost_net_get_features(s->vhost_net, features);
> > +    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
> > +
> > +}
> > +
> > +static NetClientInfo net_vhost_vdpa_info = {
> > +        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
> > +        .size = sizeof(VhostVDPAState),
> > +        .cleanup = vhost_vdpa_cleanup,
> > +        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
> > +        .has_ufo = vhost_vdpa_has_ufo,
> > +};
> > +
> > +static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
> > +                               const char *name, const char *vhostdev)
> > +{
> > +    NetClientState *nc = NULL;
> > +    VhostVDPAState *s;
> > +    int vdpa_device_fd;
> > +    assert(name);
> > +
> > +    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
> > +    snprintf(nc->info_str, sizeof(nc->info_str), "vhost-vdpa");
> > +    nc->queue_index = 0;
> > +
> > +    s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +
> > +    vdpa_device_fd = open(vhostdev, O_RDWR);
> > +    if (vdpa_device_fd == -1) {
> > +        return -errno;
> > +    }
> > +    s->vhost_vdpa.device_fd = vdpa_device_fd;
>
>
> Need to add a step to verify the device type and fail if it was not a
> networking device.
>
> sure will add the check .

>
> > +    vhost_vdpa_start(nc, (void *)&s->vhost_vdpa);
> > +
> > +    assert(s->vhost_net);
> > +
> > +    return 0;
> > +}
> > +
> > +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error
> **errp)
> > +{
> > +    const char *name = opaque;
> > +    const char *driver, *netdev;
> > +
> > +    driver = qemu_opt_get(opts, "driver");
> > +    netdev = qemu_opt_get(opts, "netdev");
> > +
> > +    if (!driver || !netdev) {
> > +        return 0;
> > +    }
> > +
> > +    if (strcmp(netdev, name) == 0 &&
> > +        !g_str_has_prefix(driver, "virtio-net-")) {
> > +        error_setg(errp, "vhost-vdpa requires frontend driver
> virtio-net-*");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> > +                        NetClientState *peer, Error **errp)
> > +{
> > +    const NetdevVhostVDPAOptions *vhost_vdpa_opts;
> > +
> > +    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > +    vhost_vdpa_opts = &netdev->u.vhost_vdpa;
> > +
> > +    /* verify net frontend */
> > +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> > +                          (char *)name, errp)) {
> > +        return -1;
> > +    }
> > +
> > +
> > +    return net_vhost_vdpa_init(peer, "vhost_vdpa", name,
> > +                               vhost_vdpa_opts->vhostdev);
>
>
> Please add the support for passing fd via command line.
>

Will add the support

>
>
> +
> > +    return 0;
> > +}
> > diff --git a/qapi/net.json b/qapi/net.json
> > index 335295be50..35a5ccee39 100644
> > --- a/qapi/net.json
> > +++ b/qapi/net.json
> > @@ -441,6 +441,22 @@
> >       '*queues':        'int' } }
> >
> >   ##
> > +# @NetdevVhostVDPAOptions:
> > +#
> > +# Vhost-vdpa network backend
> > +#
> > +# @vhostdev: name of a mdev dev path in sysfs
> > +#
> > +# @queues: number of queues to be created for multiqueue vhost-vdpa
> > +#          (default: 1) (Since 2.11)
> > +#
> > +# Since: 2.11
>
>
> The version number is wrong, I guess it's probably 5.1.
>
> Thanks
>
> Will fix this

>
> > +##
> > +{ 'struct': 'NetdevVhostVDPAOptions',
> > +  'data': {
> > +    '*vhostdev':     'str',
> > +    '*queues':       'int' } }
> > +##
> >   # @NetClientDriver:
> >   #
> >   # Available netdev drivers.
> > @@ -451,7 +467,7 @@
> >   ##
> >   { 'enum': 'NetClientDriver',
> >     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> > -            'bridge', 'hubport', 'netmap', 'vhost-user' ] }
> > +            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ]
> }
> >
> >   ##
> >   # @Netdev:
> > @@ -479,7 +495,8 @@
> >       'bridge':   'NetdevBridgeOptions',
> >       'hubport':  'NetdevHubPortOptions',
> >       'netmap':   'NetdevNetmapOptions',
> > -    'vhost-user': 'NetdevVhostUserOptions' } }
> > +    'vhost-user': 'NetdevVhostUserOptions',
> > +    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
> >
> >   ##
> >   # @NetLegacy:
>
>

[-- Attachment #2: Type: text/html, Size: 18446 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 0/4] vDPA support in qemu
  2020-04-21  4:05 ` Jason Wang
@ 2020-04-21  9:47   ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-21  9:47 UTC (permalink / raw)
  To: Jason Wang
  Cc: cohuck, Michael Tsirkin, mhabets, qemu-devel, hanand, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, Ariel Adam, rdunlap,
	maxime.coquelin, Zhu, Lingshan

[-- Attachment #1: Type: text/plain, Size: 597 bytes --]

Thanks Jason, I will fix these problems and send new version soon

On Tue, Apr 21, 2020 at 12:05 PM Jason Wang <jasowang@redhat.com> wrote:

>
> On 2020/4/20 下午5:32, Cindy Lu wrote:
> > vDPA device is a device that uses a datapath which complies with the
> > virtio specifications with vendor specific control path. vDPA devices
> > can be both physically located on the hardware or emulated by software.
> > This RFC introduce the vDPA support in qemu
>
>
> And please mention the TODO list. E.g:
>
> 1) vIOMMU support
> 2) software assisted live migration
>
> Thanks
>
>

[-- Attachment #2: Type: text/html, Size: 931 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 1/4] net: Introduce qemu_get_peer
  2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
  2020-04-21  3:23   ` Jason Wang
@ 2020-04-21 15:01   ` Laurent Vivier
  1 sibling, 0 replies; 28+ messages in thread
From: Laurent Vivier @ 2020-04-21 15:01 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu

On 20/04/2020 11:32, Cindy Lu wrote:
> This is a small function  that can get the peer from given NetClientState and queue_index
> 
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/net/vhost_net.c | 16 ++++++++++------
>  include/net/net.h  |  1 +
>  net/net.c          |  6 ++++++
>  3 files changed, 17 insertions(+), 6 deletions(-)
> 
...
> diff --git a/net/net.c b/net/net.c
> index 84aa6d8d00..ac5080dda1 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -324,6 +324,12 @@ void *qemu_get_nic_opaque(NetClientState *nc)
>  
>      return nic->opaque;
>  }
> +NetClientState *qemu_get_peer(NetClientState *nc, int queue_index)
> +{
> +    NetClientState *ncs  =  nc + queue_index;
> +    assert(ncs != NULL);

This will not assert if nc is NULL and queue_index != 0.

You should check value of nc and then calculate ncs.

Thanks,
Laurent



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-21  9:46     ` Cindy Lu
@ 2020-04-21 15:06       ` Laurent Vivier
  0 siblings, 0 replies; 28+ messages in thread
From: Laurent Vivier @ 2020-04-21 15:06 UTC (permalink / raw)
  To: Cindy Lu, Jason Wang
  Cc: cohuck, Michael Tsirkin, mhabets, qemu-devel, hanand, rob.miller,
	saugatm, armbru, hch, Eugenio Perez Martin, jgg, shahafs,
	kevin.tian, parav, vmireyno, Liang, Cunming, gdawar, jiri,
	xiao.w.wang, stefanha, Wang, Zhihong, Ariel Adam, rdunlap,
	maxime.coquelin, Zhu, Lingshan

On 21/04/2020 11:46, Cindy Lu wrote:
> 
> 
> On Tue, Apr 21, 2020 at 11:40 AM Jason Wang <jasowang@redhat.com
> <mailto:jasowang@redhat.com>> wrote:
> 
> 
>     On 2020/4/20 下午5:32, Cindy Lu wrote:
>     > This patch set introduces a new net client type: vhost-vdpa.
>     > vhost-vdpa net client will set up a vDPA device which is
>     svhostdevpecified
> 
> 
>     typo.
> 
> Will fix this   
> 
> 
>     > by a "vhostdev" parameter.
>     >
>     > Author: Tiwei Bie
> 
> 
>     Please keep the sobs from the original patch.
> 
> 
> Will fix this  

If you want to set Tiwei Bie as the original author of the patch you can
use "git commit --author="Tiwei Bie <XXXX>" --amend" to update your patch.

When you will send the series, git-send-email will add the line "From:
Tiwei Bie <XXXX>" at the beginning of the email to identify the original
author. Don't remove his Sob and don't forget yo add yours.

Thanks,
Laurent



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
  2020-04-20 14:49   ` Eric Blake
  2020-04-21  3:40   ` Jason Wang
@ 2020-04-21 15:47   ` Laurent Vivier
  2020-04-22  9:21     ` Cindy Lu
  2 siblings, 1 reply; 28+ messages in thread
From: Laurent Vivier @ 2020-04-21 15:47 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu

On 20/04/2020 11:32, Cindy Lu wrote:
> This patch set introduces a new net client type: vhost-vdpa.
> vhost-vdpa net client will set up a vDPA device which is svhostdevpecified
> by a "vhostdev" parameter.
> 
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  include/net/vhost-vdpa.h |  18 ++++
>  include/net/vhost_net.h  |   1 +
>  net/Makefile.objs        |   2 +-
>  net/clients.h            |   2 +
>  net/net.c                |   1 +
>  net/vhost-vdpa.c         | 211 +++++++++++++++++++++++++++++++++++++++
>  qapi/net.json            |  21 +++-
>  7 files changed, 253 insertions(+), 3 deletions(-)
>  create mode 100644 include/net/vhost-vdpa.h
>  create mode 100644 net/vhost-vdpa.c
> 
> diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> new file mode 100644
> index 0000000000..9ddd538dad
> --- /dev/null
> +++ b/include/net/vhost-vdpa.h
> @@ -0,0 +1,18 @@
> +/*
> + * vhost-vdpa.h
> + *
> + * Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef VHOST_VDPA_H
> +#define VHOST_VDPA_H
> +
> +struct vhost_net;
> +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> +
> +#endif /* VHOST_VDPA_H */
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index 77e47398c4..6f3a624cf7 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -39,5 +39,6 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
>  uint64_t vhost_net_get_acked_features(VHostNetState *net);
>  
>  int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
> +int vhost_set_state(NetClientState *nc, int state);
>  
>  #endif
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index c5d076d19c..da459cfc19 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
>  tap-obj-y ?= tap-stub.o
>  common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
>  common-obj-$(CONFIG_WIN32) += tap-win32.o
> -
> +common-obj-$(CONFIG_VHOST_KERNEL) += vhost-vdpa.o

should it be CONFIG_VHOST_NET_USER as you use net_init_vhost_vdpa()
below inside a "#ifdef CONFIG_VHOST_NET_USER"?

Why don't you define a CONFIG_VHOST_VDPA?

>  vde.o-libs = $(VDE_LIBS)
>  
>  common-obj-$(CONFIG_CAN_BUS) += can/
> diff --git a/net/clients.h b/net/clients.h
> index a6ef267e19..92f9b59aed 100644
> --- a/net/clients.h
> +++ b/net/clients.h
> @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
>  int net_init_vhost_user(const Netdev *netdev, const char *name,
>                          NetClientState *peer, Error **errp);
>  
> +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> +                        NetClientState *peer, Error **errp);
>  #endif /* QEMU_NET_CLIENTS_H */
> diff --git a/net/net.c b/net/net.c
> index ac5080dda1..2beb95388a 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -964,6 +964,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>          [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
>  #ifdef CONFIG_VHOST_NET_USER          ^^^^^^^^^^^^^^^^^^^^^
                   here

>          [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
> +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
>  #endif
>  #ifdef CONFIG_L2TPV3
>          [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> new file mode 100644
> index 0000000000..5daeba0b76
> --- /dev/null
> +++ b/net/vhost-vdpa.c
...
> +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +    const char *name = opaque;
> +    const char *driver, *netdev;
> +
> +    driver = qemu_opt_get(opts, "driver");
> +    netdev = qemu_opt_get(opts, "netdev");
> +
> +    if (!driver || !netdev) {
> +        return 0;
> +    }
> +
> +    if (strcmp(netdev, name) == 0 &&
> +        !g_str_has_prefix(driver, "virtio-net-")) {
> +        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
> +        return -1;
> +    }
>

So perhaps you can build the file only if CONFIG_VIRTIO_NET is set?

Thanks,
Laurent



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
  2020-04-20 14:51   ` Eric Blake
  2020-04-21  3:56   ` Jason Wang
@ 2020-04-21 15:54   ` Laurent Vivier
  2020-04-22  9:24     ` Cindy Lu
  2020-05-07 15:12   ` Maxime Coquelin
  2020-05-07 15:30   ` Maxime Coquelin
  4 siblings, 1 reply; 28+ messages in thread
From: Laurent Vivier @ 2020-04-21 15:54 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck, jasowang
  Cc: mhabets, qemu-devel, rob.miller, saugatm, maxime.coquelin, hch,
	eperezma, jgg, shahafs, kevin.tian, parav, vmireyno,
	cunming.liang, gdawar, jiri, xiao.w.wang, stefanha, zhihong.wang,
	aadam, rdunlap, hanand, lingshan.zhu

On 20/04/2020 11:32, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>   qemu-system-x86_64 -cpu host -enable-kvm \
>     ......
>   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie

Use "git commit --author" to set that.

> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/net/vhost_net.c                |  43 ++++
>  hw/net/virtio-net.c               |   9 +
>  hw/virtio/Makefile.objs           |   2 +-
>  hw/virtio/vhost-backend.c         |   3 +
>  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>  hw/virtio/vhost.c                 |   5 +
>  include/hw/virtio/vhost-backend.h |   6 +-
>  include/hw/virtio/vhost-vdpa.h    |  14 ++
>  8 files changed, 459 insertions(+), 2 deletions(-)
>  create mode 100644 hw/virtio/vhost-vdpa.c
>  create mode 100644 include/hw/virtio/vhost-vdpa.h
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
...
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>          assert(vhost_net);
>          break;
>  #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;

This should be inside a "#ifdef".

Thanks,
Laurent



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client
  2020-04-21 15:47   ` Laurent Vivier
@ 2020-04-22  9:21     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-22  9:21 UTC (permalink / raw)
  To: Laurent Vivier
  Cc: rdunlap, Michael Tsirkin, mhabets, qemu-devel, rob.miller,
	saugatm, Markus Armbruster, hch, Eugenio Perez Martin, jgg,
	Jason Wang, shahafs, kevin.tian, parav, vmireyno, Liang, Cunming,
	gdawar, jiri, xiao.w.wang, Stefan Hajnoczi, Wang, Zhihong,
	Maxime Coquelin, Ariel Adam, Cornelia Huck, hanand, Zhu,
	Lingshan

On Tue, Apr 21, 2020 at 11:47 PM Laurent Vivier <lvivier@redhat.com> wrote:
>
> On 20/04/2020 11:32, Cindy Lu wrote:
> > This patch set introduces a new net client type: vhost-vdpa.
> > vhost-vdpa net client will set up a vDPA device which is svhostdevpecified
> > by a "vhostdev" parameter.
> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  include/net/vhost-vdpa.h |  18 ++++
> >  include/net/vhost_net.h  |   1 +
> >  net/Makefile.objs        |   2 +-
> >  net/clients.h            |   2 +
> >  net/net.c                |   1 +
> >  net/vhost-vdpa.c         | 211 +++++++++++++++++++++++++++++++++++++++
> >  qapi/net.json            |  21 +++-
> >  7 files changed, 253 insertions(+), 3 deletions(-)
> >  create mode 100644 include/net/vhost-vdpa.h
> >  create mode 100644 net/vhost-vdpa.c
> >
> > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > new file mode 100644
> > index 0000000000..9ddd538dad
> > --- /dev/null
> > +++ b/include/net/vhost-vdpa.h
> > @@ -0,0 +1,18 @@
> > +/*
> > + * vhost-vdpa.h
> > + *
> > + * Copyright(c) 2017 Intel Corporation. All rights reserved.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef VHOST_VDPA_H
> > +#define VHOST_VDPA_H
> > +
> > +struct vhost_net;
> > +struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > +uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
> > +
> > +#endif /* VHOST_VDPA_H */
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index 77e47398c4..6f3a624cf7 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -39,5 +39,6 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
> >  uint64_t vhost_net_get_acked_features(VHostNetState *net);
> >
> >  int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
> > +int vhost_set_state(NetClientState *nc, int state);
> >
> >  #endif
> > diff --git a/net/Makefile.objs b/net/Makefile.objs
> > index c5d076d19c..da459cfc19 100644
> > --- a/net/Makefile.objs
> > +++ b/net/Makefile.objs
> > @@ -26,7 +26,7 @@ tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
> >  tap-obj-y ?= tap-stub.o
> >  common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
> >  common-obj-$(CONFIG_WIN32) += tap-win32.o
> > -
> > +common-obj-$(CONFIG_VHOST_KERNEL) += vhost-vdpa.o
>
> should it be CONFIG_VHOST_NET_USER as you use net_init_vhost_vdpa()
> below inside a "#ifdef CONFIG_VHOST_NET_USER"?
>
> Why don't you define a CONFIG_VHOST_VDPA?
>
Thanks Laurent for point it out,  There is no dependence between
CONFIG_VHOST_NET_USER and vDPA,
So I will implement a new macro specific for vDPA

> >  vde.o-libs = $(VDE_LIBS)
> >
> >  common-obj-$(CONFIG_CAN_BUS) += can/
> > diff --git a/net/clients.h b/net/clients.h
> > index a6ef267e19..92f9b59aed 100644
> > --- a/net/clients.h
> > +++ b/net/clients.h
> > @@ -61,4 +61,6 @@ int net_init_netmap(const Netdev *netdev, const char *name,
> >  int net_init_vhost_user(const Netdev *netdev, const char *name,
> >                          NetClientState *peer, Error **errp);
> >
> > +int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
> > +                        NetClientState *peer, Error **errp);
> >  #endif /* QEMU_NET_CLIENTS_H */
> > diff --git a/net/net.c b/net/net.c
> > index ac5080dda1..2beb95388a 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -964,6 +964,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
> >          [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
> >  #ifdef CONFIG_VHOST_NET_USER          ^^^^^^^^^^^^^^^^^^^^^
>                    here
>
> >          [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
> > +        [NET_CLIENT_DRIVER_VHOST_VDPA] = net_init_vhost_vdpa,
> >  #endif
> >  #ifdef CONFIG_L2TPV3
> >          [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > new file mode 100644
> > index 0000000000..5daeba0b76
> > --- /dev/null
> > +++ b/net/vhost-vdpa.c
> ...
> > +static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
> > +{
> > +    const char *name = opaque;
> > +    const char *driver, *netdev;
> > +
> > +    driver = qemu_opt_get(opts, "driver");
> > +    netdev = qemu_opt_get(opts, "netdev");
> > +
> > +    if (!driver || !netdev) {
> > +        return 0;
> > +    }
> > +
> > +    if (strcmp(netdev, name) == 0 &&
> > +        !g_str_has_prefix(driver, "virtio-net-")) {
> > +        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
> > +        return -1;
> > +    }
> >
>
> So perhaps you can build the file only if CONFIG_VIRTIO_NET is set?
>
> Thanks,
> Laurent
>
Thanks, There will be a new macro specific for vDPA



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-21 15:54   ` Laurent Vivier
@ 2020-04-22  9:24     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-04-22  9:24 UTC (permalink / raw)
  To: Laurent Vivier
  Cc: rdunlap, Michael Tsirkin, mhabets, qemu-devel, rob.miller,
	saugatm, Markus Armbruster, hch, Eugenio Perez Martin, jgg,
	Jason Wang, shahafs, kevin.tian, parav, vmireyno, Liang, Cunming,
	gdawar, jiri, xiao.w.wang, Stefan Hajnoczi, Wang, Zhihong,
	Maxime Coquelin, Ariel Adam, Cornelia Huck, hanand, Zhu,
	Lingshan

On Tue, Apr 21, 2020 at 11:54 PM Laurent Vivier <lvivier@redhat.com> wrote:
>
> On 20/04/2020 11:32, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific configuration
> > interface for setting up a vhost HW accelerator, this patch set introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >   qemu-system-x86_64 -cpu host -enable-kvm \
> >     ......
> >   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> >
> > Author: Tiwei Bie
>
> Use "git commit --author" to set that.
Thanks, I will fix this

> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/net/vhost_net.c                |  43 ++++
> >  hw/net/virtio-net.c               |   9 +
> >  hw/virtio/Makefile.objs           |   2 +-
> >  hw/virtio/vhost-backend.c         |   3 +
> >  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >  hw/virtio/vhost.c                 |   5 +
> >  include/hw/virtio/vhost-backend.h |   6 +-
> >  include/hw/virtio/vhost-vdpa.h    |  14 ++
> >  8 files changed, 459 insertions(+), 2 deletions(-)
> >  create mode 100644 hw/virtio/vhost-vdpa.c
> >  create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> ...
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >          assert(vhost_net);
> >          break;
> >  #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
>
> This should be inside a "#ifdef".
>
Thanks Laurent, I will add a new macro for vDPA

> Thanks,
> Laurent
>



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
                     ` (2 preceding siblings ...)
  2020-04-21 15:54   ` Laurent Vivier
@ 2020-05-07 15:12   ` Maxime Coquelin
  2020-05-07 15:56     ` Cindy Lu
  2020-05-07 15:30   ` Maxime Coquelin
  4 siblings, 1 reply; 28+ messages in thread
From: Maxime Coquelin @ 2020-05-07 15:12 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck, jasowang
  Cc: shahafs, kevin.tian, parav, xiao.w.wang, saugatm, qemu-devel,
	aadam, cunming.liang, rdunlap, hanand, gdawar, vmireyno, hch,
	eperezma, jiri, jgg, stefanha, zhihong.wang, lingshan.zhu,
	mhabets, rob.miller



On 4/20/20 11:32 AM, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>   qemu-system-x86_64 -cpu host -enable-kvm \
>     ......
>   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/net/vhost_net.c                |  43 ++++
>  hw/net/virtio-net.c               |   9 +
>  hw/virtio/Makefile.objs           |   2 +-
>  hw/virtio/vhost-backend.c         |   3 +
>  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
>  hw/virtio/vhost.c                 |   5 +
>  include/hw/virtio/vhost-backend.h |   6 +-
>  include/hw/virtio/vhost-vdpa.h    |  14 ++
>  8 files changed, 459 insertions(+), 2 deletions(-)
>  create mode 100644 hw/virtio/vhost-vdpa.c
>  create mode 100644 include/hw/virtio/vhost-vdpa.h
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -17,8 +17,10 @@
>  #include "net/net.h"
>  #include "net/tap.h"
>  #include "net/vhost-user.h"
> +#include "net/vhost-vdpa.h"
>  
>  #include "standard-headers/linux/vhost_types.h"
> +#include "linux-headers/linux/vhost.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "qemu/error-report.h"
> @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static const int vdpa_feature_bits[] = {
> +    VIRTIO_F_NOTIFY_ON_EMPTY,
> +    VIRTIO_RING_F_INDIRECT_DESC,
> +    VIRTIO_RING_F_EVENT_IDX,
> +    VIRTIO_F_ANY_LAYOUT,
> +    VIRTIO_F_VERSION_1,
> +    VIRTIO_NET_F_CSUM,
> +    VIRTIO_NET_F_GUEST_CSUM,
> +    VIRTIO_NET_F_GSO,
> +    VIRTIO_NET_F_GUEST_TSO4,
> +    VIRTIO_NET_F_GUEST_TSO6,
> +    VIRTIO_NET_F_GUEST_ECN,
> +    VIRTIO_NET_F_GUEST_UFO,
> +    VIRTIO_NET_F_HOST_TSO4,
> +    VIRTIO_NET_F_HOST_TSO6,
> +    VIRTIO_NET_F_HOST_ECN,
> +    VIRTIO_NET_F_HOST_UFO,
> +    VIRTIO_NET_F_MRG_RXBUF,
> +    VIRTIO_NET_F_MTU,
> +    VIRTIO_F_IOMMU_PLATFORM,
> +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> +    VHOST_INVALID_FEATURE_BIT
> +};
>  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>  {
>      const int *feature_bits = 0;
> @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>      case NET_CLIENT_DRIVER_VHOST_USER:
>          feature_bits = user_feature_bits;
>          break;
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        feature_bits = vdpa_feature_bits;
> +        break;
>      default:
>          error_report("Feature bits not defined for this type: %d",
>                  net->nc->info->type);
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>          assert(vhost_net);
>          break;
>  #endif
> +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> +        assert(vhost_net);
> +        break;
>      default:
>          break;
>      }
> @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
>  
>      return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
>  }
> +int vhost_set_state(NetClientState *nc, int state)
> +{
> +    struct vhost_net *net = get_vhost_net(nc);
> +    struct vhost_dev *hdev = &net->dev;
> +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {

Maybe checking the vhost_set_state callback is implemented is enough,
and it is not need to restrict that to Vhost-vDPA?

> +        if (hdev->vhost_ops->vhost_set_state) {
> +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> +             }
> +        }
> +    return 0;
> +}



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
                     ` (3 preceding siblings ...)
  2020-05-07 15:12   ` Maxime Coquelin
@ 2020-05-07 15:30   ` Maxime Coquelin
  2020-05-07 16:02     ` Cindy Lu
  4 siblings, 1 reply; 28+ messages in thread
From: Maxime Coquelin @ 2020-05-07 15:30 UTC (permalink / raw)
  To: Cindy Lu, mst, armbru, eblake, cohuck, jasowang
  Cc: shahafs, kevin.tian, parav, xiao.w.wang, saugatm, qemu-devel,
	aadam, cunming.liang, rdunlap, hanand, gdawar, vmireyno, hch,
	eperezma, jiri, jgg, stefanha, zhihong.wang, lingshan.zhu,
	mhabets, rob.miller



On 4/20/20 11:32 AM, Cindy Lu wrote:
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 6f6670783f..d81bd9885f 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
>      VHOST_BACKEND_TYPE_NONE = 0,
>      VHOST_BACKEND_TYPE_KERNEL = 1,
>      VHOST_BACKEND_TYPE_USER = 2,
> -    VHOST_BACKEND_TYPE_MAX = 3,
> +    VHOST_BACKEND_TYPE_VDPA = 3,
> +    VHOST_BACKEND_TYPE_MAX = 4,
>  } VhostBackendType;
>  
>  typedef enum VhostSetConfigType {
> @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
>  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
>                                          struct vhost_inflight *inflight);
>  
> +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);

I think state should be of type uint8_t.



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-05-07 15:12   ` Maxime Coquelin
@ 2020-05-07 15:56     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-05-07 15:56 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Cornelia Huck, Michael Tsirkin, Jason Wang, qemu-devel,
	rob.miller, saugatm, Markus Armbruster, hch,
	Eugenio Perez Martin, jgg, mhabets, shahafs, kevin.tian, parav,
	vmireyno, Liang, Cunming, gdawar, jiri, xiao.w.wang,
	Stefan Hajnoczi, Wang, Zhihong, Ariel Adam, rdunlap, hanand, Zhu,
	Lingshan

On Thu, May 7, 2020 at 11:13 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific configuration
> > interface for setting up a vhost HW accelerator, this patch set introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >   qemu-system-x86_64 -cpu host -enable-kvm \
> >     ......
> >   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/net/vhost_net.c                |  43 ++++
> >  hw/net/virtio-net.c               |   9 +
> >  hw/virtio/Makefile.objs           |   2 +-
> >  hw/virtio/vhost-backend.c         |   3 +
> >  hw/virtio/vhost-vdpa.c            | 379 ++++++++++++++++++++++++++++++
> >  hw/virtio/vhost.c                 |   5 +
> >  include/hw/virtio/vhost-backend.h |   6 +-
> >  include/hw/virtio/vhost-vdpa.h    |  14 ++
> >  8 files changed, 459 insertions(+), 2 deletions(-)
> >  create mode 100644 hw/virtio/vhost-vdpa.c
> >  create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -17,8 +17,10 @@
> >  #include "net/net.h"
> >  #include "net/tap.h"
> >  #include "net/vhost-user.h"
> > +#include "net/vhost-vdpa.h"
> >
> >  #include "standard-headers/linux/vhost_types.h"
> > +#include "linux-headers/linux/vhost.h"
> >  #include "hw/virtio/virtio-net.h"
> >  #include "net/vhost_net.h"
> >  #include "qemu/error-report.h"
> > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
> >      VHOST_INVALID_FEATURE_BIT
> >  };
> >
> > +static const int vdpa_feature_bits[] = {
> > +    VIRTIO_F_NOTIFY_ON_EMPTY,
> > +    VIRTIO_RING_F_INDIRECT_DESC,
> > +    VIRTIO_RING_F_EVENT_IDX,
> > +    VIRTIO_F_ANY_LAYOUT,
> > +    VIRTIO_F_VERSION_1,
> > +    VIRTIO_NET_F_CSUM,
> > +    VIRTIO_NET_F_GUEST_CSUM,
> > +    VIRTIO_NET_F_GSO,
> > +    VIRTIO_NET_F_GUEST_TSO4,
> > +    VIRTIO_NET_F_GUEST_TSO6,
> > +    VIRTIO_NET_F_GUEST_ECN,
> > +    VIRTIO_NET_F_GUEST_UFO,
> > +    VIRTIO_NET_F_HOST_TSO4,
> > +    VIRTIO_NET_F_HOST_TSO6,
> > +    VIRTIO_NET_F_HOST_ECN,
> > +    VIRTIO_NET_F_HOST_UFO,
> > +    VIRTIO_NET_F_MRG_RXBUF,
> > +    VIRTIO_NET_F_MTU,
> > +    VIRTIO_F_IOMMU_PLATFORM,
> > +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +    VHOST_INVALID_FEATURE_BIT
> > +};
> >  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >  {
> >      const int *feature_bits = 0;
> > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >      case NET_CLIENT_DRIVER_VHOST_USER:
> >          feature_bits = user_feature_bits;
> >          break;
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        feature_bits = vdpa_feature_bits;
> > +        break;
> >      default:
> >          error_report("Feature bits not defined for this type: %d",
> >                  net->nc->info->type);
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >          assert(vhost_net);
> >          break;
> >  #endif
> > +    case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +        vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +        assert(vhost_net);
> > +        break;
> >      default:
> >          break;
> >      }
> > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
> >
> >      return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
> >  }
> > +int vhost_set_state(NetClientState *nc, int state)
> > +{
> > +    struct vhost_net *net = get_vhost_net(nc);
> > +    struct vhost_dev *hdev = &net->dev;
> > +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>
> Maybe checking the vhost_set_state callback is implemented is enough,
> and it is not need to restrict that to Vhost-vDPA?
Sure, Will remove this

> > +        if (hdev->vhost_ops->vhost_set_state) {
> > +                return hdev->vhost_ops->vhost_set_state(hdev, state);
> > +             }
> > +        }
> > +    return 0;
> > +}
>



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
  2020-05-07 15:30   ` Maxime Coquelin
@ 2020-05-07 16:02     ` Cindy Lu
  0 siblings, 0 replies; 28+ messages in thread
From: Cindy Lu @ 2020-05-07 16:02 UTC (permalink / raw)
  To: Maxime Coquelin
  Cc: Cornelia Huck, Michael Tsirkin, Jason Wang, qemu-devel,
	rob.miller, saugatm, Markus Armbruster, hch,
	Eugenio Perez Martin, jgg, mhabets, shahafs, kevin.tian, parav,
	vmireyno, Liang, Cunming, gdawar, jiri, xiao.w.wang,
	Stefan Hajnoczi, Wang, Zhihong, Ariel Adam, rdunlap, hanand, Zhu,
	Lingshan

On Thu, May 7, 2020 at 11:30 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..d81bd9885f 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
> >      VHOST_BACKEND_TYPE_NONE = 0,
> >      VHOST_BACKEND_TYPE_KERNEL = 1,
> >      VHOST_BACKEND_TYPE_USER = 2,
> > -    VHOST_BACKEND_TYPE_MAX = 3,
> > +    VHOST_BACKEND_TYPE_VDPA = 3,
> > +    VHOST_BACKEND_TYPE_MAX = 4,
> >  } VhostBackendType;
> >
> >  typedef enum VhostSetConfigType {
> > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
> >  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
> >                                          struct vhost_inflight *inflight);
> >
> > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
>
> I think state should be of type uint8_t.
>
ok, I will change this to uint8_t



^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-05-07 16:05 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-20  9:32 [RFC v1 0/4] vDPA support in qemu Cindy Lu
2020-04-20  9:32 ` [RFC v1 1/4] net: Introduce qemu_get_peer Cindy Lu
2020-04-21  3:23   ` Jason Wang
2020-04-21  8:10     ` Cindy Lu
2020-04-21 15:01   ` Laurent Vivier
2020-04-20  9:32 ` [RFC v1 2/4] vhost-vdpa: introduce vhost-vdpa net client Cindy Lu
2020-04-20 14:49   ` Eric Blake
2020-04-21  3:40   ` Jason Wang
2020-04-21  9:46     ` Cindy Lu
2020-04-21 15:06       ` Laurent Vivier
2020-04-21 15:47   ` Laurent Vivier
2020-04-22  9:21     ` Cindy Lu
2020-04-20  9:32 ` [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend Cindy Lu
2020-04-20 14:51   ` Eric Blake
2020-04-21  3:56   ` Jason Wang
2020-04-21  9:12     ` Cindy Lu
2020-04-21 15:54   ` Laurent Vivier
2020-04-22  9:24     ` Cindy Lu
2020-05-07 15:12   ` Maxime Coquelin
2020-05-07 15:56     ` Cindy Lu
2020-05-07 15:30   ` Maxime Coquelin
2020-05-07 16:02     ` Cindy Lu
2020-04-20  9:32 ` [RFC v1 4/4] vhost: introduce vhost_set_vring_ready method Cindy Lu
2020-04-21  3:59   ` Jason Wang
2020-04-21  8:42     ` Cindy Lu
2020-04-21  4:03 ` [RFC v1 0/4] vDPA support in qemu Jason Wang
2020-04-21  4:05 ` Jason Wang
2020-04-21  9:47   ` Cindy Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.