All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: aliguori@us.ibm.com, mst@redhat.com, qemu-devel@nongnu.org,
	blauwirbel@gmail.com, shajnocz@redhat.com
Cc: krkumar2@in.ibm.com, kvm@vger.kernel.org, mprivozn@redhat.com,
	rusty@rustcorp.com.au, gaowanlong@cn.fujitsu.com,
	jwhan@filewood.snu.ac.kr, shiyer@redhat.com,
	Jason Wang <jasowang@redhat.com>
Subject: [PATCH V4 15/22] tap: multiqueue support
Date: Wed, 30 Jan 2013 19:12:34 +0800	[thread overview]
Message-ID: <1359544361-5089-16-git-send-email-jasowang@redhat.com> (raw)
In-Reply-To: <1359544361-5089-1-git-send-email-jasowang@redhat.com>

Recently, linux support multiqueue tap which could let userspace call TUNSETIFF
for a signle device many times to create multiple file descriptors as
independent queues. User could also enable/disabe a specific queue through
TUNSETQUEUE.

The patch adds the generic infrastructure to create multiqueue taps. To achieve
this a new parameter "queues" were introduced to specify how many queues were
expected to be created for tap by qemu itself. Alternatively, management could
also pass multiple pre-created tap file descriptors separated with ':' through a
new parameter fds like -netdev tap,id=hn0,fds="X:Y:..:Z". Multiple vhost file
descriptors could also be passed in this way.

Each TAPState were still associated to a tap fd, which mean multiple TAPStates
were created when user needs multiqueue taps. Since each TAPState contains one
NetClientState, with the multiqueue nic support, an N peers of NetClientState
were built up.

A new parameter, mq_required were introduce in tap_open() to create multiqueue
tap fds.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 include/net/tap.h |    1 -
 net/tap-aix.c     |    3 +-
 net/tap-bsd.c     |    3 +-
 net/tap-haiku.c   |    3 +-
 net/tap-linux.c   |    4 +-
 net/tap-solaris.c |    3 +-
 net/tap.c         |  158 +++++++++++++++++++++++++++++++++++++++++------------
 net/tap_int.h     |    3 +-
 qapi-schema.json  |    5 +-
 9 files changed, 139 insertions(+), 44 deletions(-)

diff --git a/include/net/tap.h b/include/net/tap.h
index c3eb85a..a994f20 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -37,7 +37,6 @@ void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int ecn,
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
-int tap_get_ifname(NetClientState *nc, char *ifname);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/net/tap-aix.c b/net/tap-aix.c
index e760e9a..804d164 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@
 #include "tap_int.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on AIX\n");
     return -1;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4f22109..bcdb268 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -33,7 +33,8 @@
 #include <net/if_tap.h>
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     int fd;
 #ifdef TAPGIFNAME
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index b3b5fbb..e5ce436 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -25,7 +25,8 @@
 #include "tap_int.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on Haiku\n");
     return -1;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 3b21662..a953189 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -36,12 +36,12 @@
 
 #define PATH_NET_TUN "/dev/net/tun"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     struct ifreq ifr;
     int fd, ret;
     int len = sizeof(struct virtio_net_hdr);
-    int mq_required = 0;
 
     TFR(fd = open(PATH_NET_TUN, O_RDWR));
     if (fd < 0) {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 214d95e..9c7278f 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -173,7 +173,8 @@ static int tap_alloc(char *dev, size_t dev_size)
     return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     char  dev[10]="";
     int fd;
diff --git a/net/tap.c b/net/tap.c
index 8610ba2..1bf7609 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -558,17 +558,10 @@ int net_init_bridge(const NetClientOptions *opts, const char *name,
 
 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                         const char *setup_script, char *ifname,
-                        size_t ifname_sz)
+                        size_t ifname_sz, int mq_required)
 {
     int fd, vnet_hdr_required;
 
-    if (tap->has_ifname) {
-        pstrcpy(ifname, ifname_sz, tap->ifname);
-    } else {
-        assert(ifname_sz > 0);
-        ifname[0] = '\0';
-    }
-
     if (tap->has_vnet_hdr) {
         *vnet_hdr = tap->vnet_hdr;
         vnet_hdr_required = *vnet_hdr;
@@ -577,7 +570,8 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
         vnet_hdr_required = 0;
     }
 
-    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required));
+    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
+                      mq_required));
     if (fd < 0) {
         return -1;
     }
@@ -593,6 +587,8 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
     return fd;
 }
 
+#define MAX_TAP_QUEUES 1024
+
 static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                             const char *model, const char *name,
                             const char *ifname, const char *script,
@@ -611,17 +607,12 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
         return -1;
     }
 
-    if (tap->has_fd) {
+    if (tap->has_fd || tap->has_fds) {
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
     } else if (tap->has_helper) {
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                  tap->helper);
     } else {
-        const char *downscript;
-
-        downscript = tap->has_downscript ? tap->downscript :
-            DEFAULT_NETWORK_DOWN_SCRIPT;
-
         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                  "ifname=%s,script=%s,downscript=%s", ifname, script,
                  downscript);
@@ -652,7 +643,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
             error_report("vhost-net requested but could not be initialized");
             return -1;
         }
-    } else if (tap->has_vhostfd) {
+    } else if (tap->has_vhostfd || tap->has_vhostfds) {
         error_report("vhostfd= is not valid without vhost");
         return -1;
     }
@@ -660,27 +651,54 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
     return 0;
 }
 
+static int get_fds(char *str, char *fds[], int max)
+{
+    char *ptr = str, *this;
+    size_t len = strlen(str);
+    int i = 0;
+
+    while (i < max && ptr < str + len) {
+        this = strchr(ptr, ':');
+
+        if (this == NULL) {
+            fds[i] = g_strdup(ptr);
+        } else {
+            fds[i] = g_strndup(ptr, this - ptr);
+        }
+
+        i++;
+        if (this == NULL) {
+            break;
+        } else {
+            ptr = this + 1;
+        }
+    }
+
+    return i;
+}
+
 int net_init_tap(const NetClientOptions *opts, const char *name,
                  NetClientState *peer)
 {
     const NetdevTapOptions *tap;
-
-    int fd, vnet_hdr = 0;
-    const char *model;
-
+    int fd, vnet_hdr = 0, i = 0, queues;
     /* for the no-fd, no-helper case */
     const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
     const char *downscript = NULL;
+    const char *vhostfdname;
     char ifname[128];
 
     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
     tap = opts->tap;
+    queues = tap->has_queues ? tap->queues : 1;
+    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 
     if (tap->has_fd) {
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr || tap->has_helper) {
+            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
+            tap->has_fds) {
             error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "and helper= are invalid with fd=");
+                         "helper=, queues=, and fds= are invalid with fd=");
             return -1;
         }
 
@@ -693,13 +711,61 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        model = "tap";
+        if (net_init_tap_one(tap, peer, "tap", NULL, NULL,
+                             script, downscript,
+                             vhostfdname, vnet_hdr, fd)) {
+            return -1;
+        }
+    } else if (tap->has_fds) {
+        char *fds[MAX_TAP_QUEUES];
+        char *vhost_fds[MAX_TAP_QUEUES];
+        int nfds, nvhosts;
+
+        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
+            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
+            tap->has_fd) {
+            error_report("ifname=, script=, downscript=, vnet_hdr=, "
+                         "helper=, queues=, and fd= are invalid with fds=");
+            return -1;
+        }
+
+        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
+        if (tap->has_vhostfds) {
+            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
+            if (nfds != nvhosts) {
+                error_report("The number of fds passed does not match the "
+                             "number of vhostfds passed");
+                return -1;
+            }
+        }
+
+        for (i = 0; i < nfds; i++) {
+            fd = monitor_handle_fd_param(cur_mon, fds[i]);
+            if (fd == -1) {
+                return -1;
+            }
+
+            fcntl(fd, F_SETFL, O_NONBLOCK);
 
+            if (i == 0) {
+                vnet_hdr = tap_probe_vnet_hdr(fd);
+            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
+                error_report("vnet_hdr not consistent across given tap fds");
+                return -1;
+            }
+
+            if (net_init_tap_one(tap, peer, "tap", name, ifname,
+                                 script, downscript,
+                                 tap->has_vhostfds ? vhost_fds[i] : NULL,
+                                 vnet_hdr, fd)) {
+                return -1;
+            }
+        }
     } else if (tap->has_helper) {
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr) {
+            tap->has_vnet_hdr || tap->has_queues || tap->has_fds) {
             error_report("ifname=, script=, downscript=, and vnet_hdr= "
-                         "are invalid with helper=");
+                         "queues=, and fds= are invalid with helper=");
             return -1;
         }
 
@@ -709,26 +775,48 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
         }
 
         fcntl(fd, F_SETFL, O_NONBLOCK);
-
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        model = "bridge";
-
+        if (net_init_tap_one(tap, peer, "bridge", name, ifname,
+                             script, downscript, vhostfdname,
+                             vnet_hdr, fd)) {
+            return -1;
+        }
     } else {
         script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
         downscript = tap->has_downscript ? tap->downscript :
             DEFAULT_NETWORK_DOWN_SCRIPT;
-        fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname);
-        if (fd == -1) {
-            return -1;
+
+        if (tap->has_ifname) {
+            pstrcpy(ifname, sizeof ifname, tap->ifname);
+        } else {
+            ifname[0] = '\0';
         }
 
-        model = "tap";
+        for (i = 0; i < queues; i++) {
+            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+                              ifname, sizeof ifname, queues > 1);
+            if (fd == -1) {
+                return -1;
+            }
+
+            if (queues > 1 && i == 0 && !tap->has_ifname) {
+                if (tap_fd_get_ifname(fd, ifname)) {
+                    error_report("Fail to get ifname");
+                    return -1;
+                }
+            }
+
+            if (net_init_tap_one(tap, peer, "tap", name, ifname,
+                                 i >= 1 ? "no" : script,
+                                 i >= 1 ? "no" : downscript,
+                                 vhostfdname, vnet_hdr, fd)) {
+                return -1;
+            }
+        }
     }
 
-    return net_init_tap_one(tap, peer, model, name, ifname, script,
-                            downscript, tap->has_vhostfd ? tap->vhostfd : NULL,
-                            vnet_hdr, fd);
+    return 0;
 }
 
 VHostNetState *tap_get_vhost_net(NetClientState *nc)
diff --git a/net/tap_int.h b/net/tap_int.h
index 125f83d..86bb224 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -32,7 +32,8 @@
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 3a4817b..cdd8384 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2533,6 +2533,7 @@
   'data': {
     '*ifname':     'str',
     '*fd':         'str',
+    '*fds':        'str',
     '*script':     'str',
     '*downscript': 'str',
     '*helper':     'str',
@@ -2540,7 +2541,9 @@
     '*vnet_hdr':   'bool',
     '*vhost':      'bool',
     '*vhostfd':    'str',
-    '*vhostforce': 'bool' } }
+    '*vhostfds':   'str',
+    '*vhostforce': 'bool',
+    '*queues':     'uint32'} }
 
 ##
 # @NetdevSocketOptions
-- 
1.7.1


  parent reply	other threads:[~2013-01-30 11:23 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-30 11:12 [PATCH V4 00/22] Multiqueue virtio-net Jason Wang
2013-01-30 11:12 ` [PATCH V4 01/22] net: tap: using bool instead of bitfield Jason Wang
2013-01-30 11:12 ` [PATCH V4 02/22] net: tap: use abort() instead of assert(0) Jason Wang
2013-01-30 11:12 ` [PATCH V4 03/22] net: introduce qemu_get_queue() Jason Wang
2013-01-30 11:12 ` [PATCH V4 04/22] net: introduce qemu_get_nic() Jason Wang
2013-01-30 11:12 ` [PATCH V4 05/22] net: intorduce qemu_del_nic() Jason Wang
2013-01-30 11:12 ` [PATCH V4 06/22] net: introduce qemu_find_net_clients_except() Jason Wang
2013-01-30 11:12 ` [PATCH V4 07/22] net: introduce qemu_net_client_setup() Jason Wang
2013-01-30 11:12 ` [PATCH V4 08/22] net: introduce NetClientState destructor Jason Wang
2013-01-30 11:12 ` [PATCH V4 09/22] net: multiqueue support Jason Wang
2013-01-30 11:12 ` [PATCH V4 10/22] tap: import linux multiqueue constants Jason Wang
2013-01-30 11:12 ` [PATCH V4 11/22] tap: factor out common tap initialization Jason Wang
2013-01-30 11:12 ` [PATCH V4 12/22] tap: add Linux multiqueue support Jason Wang
2013-01-30 11:12 ` [PATCH V4 13/22] tap: support enabling or disabling a queue Jason Wang
2013-01-30 11:12 ` [PATCH V4 14/22] tap: introduce a helper to get the name of an interface Jason Wang
2013-01-30 11:12 ` Jason Wang [this message]
2013-01-30 11:12 ` [PATCH V4 16/22] vhost: multiqueue support Jason Wang
2013-01-30 11:12 ` [PATCH V4 17/22] virtio: introduce virtio_del_queue() Jason Wang
2013-01-30 11:12 ` [PATCH V4 18/22] virtio: add a queue_index to VirtQueue Jason Wang
2013-01-30 11:12 ` [PATCH V4 19/22] virtio-net: separate virtqueue from VirtIONet Jason Wang
2013-01-30 11:12 ` [PATCH V4 20/22] virtio-net: multiqueue support Jason Wang
2013-01-30 11:12 ` [PATCH V4 21/22] virtio-net: migration support for multiqueue Jason Wang
2013-01-30 11:12 ` [PATCH V4 22/22] virtio-net: compat multiqueue support Jason Wang
     [not found] ` <5109669F.5010405@redhat.com>
2013-01-31  7:00   ` [Qemu-devel] [PATCH V4 00/22] Multiqueue virtio-net Jason Wang
2013-01-31 13:44     ` Eric Blake
2013-01-31 13:58       ` [Qemu-devel] " Michael S. Tsirkin
2013-01-31 15:18         ` Eric Blake
2013-01-31 15:04       ` [Qemu-devel] " Jason Wang
2013-01-31 14:21 ` Michael S. Tsirkin
2013-01-31 14:36   ` Michael S. Tsirkin
2013-01-31 15:05     ` Jason Wang
2013-02-04 22:53 ` Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1359544361-5089-16-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=blauwirbel@gmail.com \
    --cc=gaowanlong@cn.fujitsu.com \
    --cc=jwhan@filewood.snu.ac.kr \
    --cc=krkumar2@in.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=mprivozn@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rusty@rustcorp.com.au \
    --cc=shajnocz@redhat.com \
    --cc=shiyer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.