All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] add to support for virtio-user server mode
@ 2018-02-14 14:53 Zhiyong Yang
  2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
                   ` (4 more replies)
  0 siblings, 5 replies; 65+ messages in thread
From: Zhiyong Yang @ 2018-02-14 14:53 UTC (permalink / raw)
  To: dev, yliu, maxime.coquelin, jianfeng.tan, tiwei.bie, zhihong.wang
  Cc: dong1.wang

When vhost user/ovs-dpdk restart, virtio user is expected to keep alive
so that vhost user can reconnect it successfully and continue to exchange
packets.

The series support the feature and target for 18.05 release.

Virtio user with server mode creates socket file and then starts to wait
for first connection from vhost user with client mode in blocking mode.

Virtio user with server mode supports many times' vhost reconnections with
same configurations. 

Virtio user supports only one connection at the same time in server/client
mode.

How to test?
for example:

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 -m 256,0 --no-pci \
--file-prefix=testpmd0 --vdev=net_virtio_user0,mac=00:11:22:33:44:10, \
path=/tmp/sock0,server=1,queues=1 -- -i --rxq=1 --txq=1 --no-numa

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3e000 -n 4 --socket-mem 256,0 \
--vdev 'net_vhost0,iface=/tmp/sock0,client=0,queues=1' -- -i --rxq=1 --txq=1 \
--nb-cores=1 --no-numa

Zhiyong Yang (4):
  vhost: move fdset functions from fd_man.c to fd_man.h
  net/virtio-user: add data members to support server mode
  net/virtio-user: support server mode
  net/vhost: add memory checking to support client mode

 drivers/net/vhost/rte_eth_vhost.c                |   9 +
 drivers/net/virtio/virtio_ethdev.c               |   9 +-
 drivers/net/virtio/virtio_user/vhost_user.c      |  77 ++++++-
 drivers/net/virtio/virtio_user/virtio_user_dev.c |  44 ++--
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   8 +
 drivers/net/virtio/virtio_user_ethdev.c          |  81 ++++++-
 lib/librte_vhost/Makefile                        |   3 +-
 lib/librte_vhost/fd_man.c                        | 274 -----------------------
 lib/librte_vhost/fd_man.h                        | 258 ++++++++++++++++++++-
 9 files changed, 456 insertions(+), 307 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c

-- 
2.13.3

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
@ 2018-02-14 14:53 ` Zhiyong Yang
  2018-02-27 17:51   ` Maxime Coquelin
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
  2018-02-14 14:53 ` [PATCH 2/4] net/virtio-user: add data members to support server mode Zhiyong Yang
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 65+ messages in thread
From: Zhiyong Yang @ 2018-02-14 14:53 UTC (permalink / raw)
  To: dev, yliu, maxime.coquelin, jianfeng.tan, tiwei.bie, zhihong.wang
  Cc: dong1.wang, Zhiyong Yang

The patch moves fdset related funcitons from fd_man.c to fd_man.h in
order to reuse these funcitons from the perspective of PMDs.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 lib/librte_vhost/Makefile |   3 +-
 lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
 lib/librte_vhost/fd_man.h | 258 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 253 insertions(+), 282 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6abae..e201df79c 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -21,10 +21,11 @@ endif
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := iotlb.c socket.c vhost.c \
 					vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += fd_man.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
deleted file mode 100644
index 181711c2a..000000000
--- a/lib/librte_vhost/fd_man.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "fd_man.h"
-
-#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
-
-static int
-get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
-{
-	int i;
-
-	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
-		;
-
-	return i;
-}
-
-static void
-fdset_move(struct fdset *pfdset, int dst, int src)
-{
-	pfdset->fd[dst]    = pfdset->fd[src];
-	pfdset->rwfds[dst] = pfdset->rwfds[src];
-}
-
-static void
-fdset_shrink_nolock(struct fdset *pfdset)
-{
-	int i;
-	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
-
-	for (i = 0; i < last_valid_idx; i++) {
-		if (pfdset->fd[i].fd != -1)
-			continue;
-
-		fdset_move(pfdset, i, last_valid_idx);
-		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
-	}
-	pfdset->num = last_valid_idx + 1;
-}
-
-/*
- * Find deleted fd entries and remove them
- */
-static void
-fdset_shrink(struct fdset *pfdset)
-{
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	fdset_shrink_nolock(pfdset);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-}
-
-/**
- * Returns the index in the fdset for a given fd.
- * @return
- *   index for the fd, or -1 if fd isn't in the fdset.
- */
-static int
-fdset_find_fd(struct fdset *pfdset, int fd)
-{
-	int i;
-
-	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
-		;
-
-	return i == pfdset->num ? -1 : i;
-}
-
-static void
-fdset_add_fd(struct fdset *pfdset, int idx, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat)
-{
-	struct fdentry *pfdentry = &pfdset->fd[idx];
-	struct pollfd *pfd = &pfdset->rwfds[idx];
-
-	pfdentry->fd  = fd;
-	pfdentry->rcb = rcb;
-	pfdentry->wcb = wcb;
-	pfdentry->dat = dat;
-
-	pfd->fd = fd;
-	pfd->events  = rcb ? POLLIN : 0;
-	pfd->events |= wcb ? POLLOUT : 0;
-	pfd->revents = 0;
-}
-
-void
-fdset_init(struct fdset *pfdset)
-{
-	int i;
-
-	if (pfdset == NULL)
-		return;
-
-	for (i = 0; i < MAX_FDS; i++) {
-		pfdset->fd[i].fd = -1;
-		pfdset->fd[i].dat = NULL;
-	}
-	pfdset->num = 0;
-}
-
-/**
- * Register the fd in the fdset with read/write handler and context.
- */
-int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
-{
-	int i;
-
-	if (pfdset == NULL || fd == -1)
-		return -1;
-
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-	if (i == -1) {
-		fdset_shrink_nolock(pfdset);
-		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-		if (i == -1) {
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-			return -2;
-		}
-	}
-
-	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-
-	return 0;
-}
-
-/**
- *  Unregister the fd from the fdset.
- *  Returns context of a given fd or NULL.
- */
-void *
-fdset_del(struct fdset *pfdset, int fd)
-{
-	int i;
-	void *dat = NULL;
-
-	if (pfdset == NULL || fd == -1)
-		return NULL;
-
-	do {
-		pthread_mutex_lock(&pfdset->fd_mutex);
-
-		i = fdset_find_fd(pfdset, fd);
-		if (i != -1 && pfdset->fd[i].busy == 0) {
-			/* busy indicates r/wcb is executing! */
-			dat = pfdset->fd[i].dat;
-			pfdset->fd[i].fd = -1;
-			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
-			pfdset->fd[i].dat = NULL;
-			i = -1;
-		}
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-	} while (i != -1);
-
-	return dat;
-}
-
-
-/**
- * This functions runs in infinite blocking loop until there is no fd in
- * pfdset. It calls corresponding r/w handler if there is event on the fd.
- *
- * Before the callback is called, we set the flag to busy status; If other
- * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
- * will wait until the flag is reset to zero(which indicates the callback is
- * finished), then it could free the context after fdset_del.
- */
-void *
-fdset_event_dispatch(void *arg)
-{
-	int i;
-	struct pollfd *pfd;
-	struct fdentry *pfdentry;
-	fd_cb rcb, wcb;
-	void *dat;
-	int fd, numfds;
-	int remove1, remove2;
-	int need_shrink;
-	struct fdset *pfdset = arg;
-	int val;
-
-	if (pfdset == NULL)
-		return NULL;
-
-	while (1) {
-
-		/*
-		 * When poll is blocked, other threads might unregister
-		 * listenfds from and register new listenfds into fdset.
-		 * When poll returns, the entries for listenfds in the fdset
-		 * might have been updated. It is ok if there is unwanted call
-		 * for new listenfds.
-		 */
-		pthread_mutex_lock(&pfdset->fd_mutex);
-		numfds = pfdset->num;
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-
-		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
-		if (val < 0)
-			continue;
-
-		need_shrink = 0;
-		for (i = 0; i < numfds; i++) {
-			pthread_mutex_lock(&pfdset->fd_mutex);
-
-			pfdentry = &pfdset->fd[i];
-			fd = pfdentry->fd;
-			pfd = &pfdset->rwfds[i];
-
-			if (fd < 0) {
-				need_shrink = 1;
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			if (!pfd->revents) {
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			remove1 = remove2 = 0;
-
-			rcb = pfdentry->rcb;
-			wcb = pfdentry->wcb;
-			dat = pfdentry->dat;
-			pfdentry->busy = 1;
-
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-
-			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
-				rcb(fd, dat, &remove1);
-			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
-				wcb(fd, dat, &remove2);
-			pfdentry->busy = 0;
-			/*
-			 * fdset_del needs to check busy flag.
-			 * We don't allow fdset_del to be called in callback
-			 * directly.
-			 */
-			/*
-			 * When we are to clean up the fd from fdset,
-			 * because the fd is closed in the cb,
-			 * the old fd val could be reused by when creates new
-			 * listen fd in another thread, we couldn't call
-			 * fd_set_del.
-			 */
-			if (remove1 || remove2) {
-				pfdentry->fd = -1;
-				need_shrink = 1;
-			}
-		}
-
-		if (need_shrink)
-			fdset_shrink(pfdset);
-	}
-
-	return NULL;
-}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3a9276c3c..b1c628251 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -4,11 +4,11 @@
 
 #ifndef _FD_MAN_H_
 #define _FD_MAN_H_
-#include <stdint.h>
-#include <pthread.h>
 #include <poll.h>
+#include <stdio.h>
 
 #define MAX_FDS 1024
+#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
 
 typedef void (*fd_cb)(int fd, void *dat, int *remove);
 
@@ -27,14 +27,258 @@ struct fdset {
 	int num;	/* current fd number of this fdset */
 };
 
+static inline int
+get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
+{
+	int i;
 
-void fdset_init(struct fdset *pfdset);
+	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
+		;
 
-int fdset_add(struct fdset *pfdset, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat);
+	return i;
+}
 
-void *fdset_del(struct fdset *pfdset, int fd);
+static inline void
+fdset_move(struct fdset *pfdset, int dst, int src)
+{
+	pfdset->fd[dst]    = pfdset->fd[src];
+	pfdset->rwfds[dst] = pfdset->rwfds[src];
+}
 
-void *fdset_event_dispatch(void *arg);
+static inline void
+fdset_shrink_nolock(struct fdset *pfdset)
+{
+	int i;
+	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
+
+	for (i = 0; i < last_valid_idx; i++) {
+		if (pfdset->fd[i].fd != -1)
+			continue;
+
+		fdset_move(pfdset, i, last_valid_idx);
+		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
+	}
+	pfdset->num = last_valid_idx + 1;
+}
+
+/*
+ * Find deleted fd entries and remove them
+ */
+static inline void
+fdset_shrink(struct fdset *pfdset)
+{
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	fdset_shrink_nolock(pfdset);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static inline int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i == pfdset->num ? -1 : i;
+}
+
+static inline void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct fdentry *pfdentry = &pfdset->fd[idx];
+	struct pollfd *pfd = &pfdset->rwfds[idx];
+
+	pfdentry->fd  = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+
+	pfd->fd = fd;
+	pfd->events  = rcb ? POLLIN : 0;
+	pfd->events |= wcb ? POLLOUT : 0;
+	pfd->revents = 0;
+}
+
+static inline void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < MAX_FDS; i++) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].dat = NULL;
+	}
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+static inline int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+	if (i == -1) {
+		fdset_shrink_nolock(pfdset);
+		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+		if (i == -1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			return -2;
+		}
+	}
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ *  Returns context of a given fd or NULL.
+ */
+static inline void *
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+	void *dat = NULL;
+
+	if (pfdset == NULL || fd == -1)
+		return NULL;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			dat = pfdset->fd[i].dat;
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->fd[i].dat = NULL;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+
+	return dat;
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+static inline void *
+fdset_event_dispatch(void *arg)
+{
+	int i;
+	struct pollfd *pfd;
+	struct fdentry *pfdentry;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd, numfds;
+	int remove1, remove2;
+	int need_shrink;
+	struct fdset *pfdset = arg;
+	int val;
+
+	if (pfdset == NULL)
+		return NULL;
+
+	while (1) {
+
+		/*
+		 * When poll is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When poll returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		pthread_mutex_lock(&pfdset->fd_mutex);
+		numfds = pfdset->num;
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+		if (val < 0)
+			continue;
+
+		need_shrink = 0;
+		for (i = 0; i < numfds; i++) {
+			pthread_mutex_lock(&pfdset->fd_mutex);
+
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			pfd = &pfdset->rwfds[i];
+
+			if (fd < 0) {
+				need_shrink = 1;
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			if (!pfd->revents) {
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			remove1 = remove2 = 0;
+
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+
+			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
+				rcb(fd, dat, &remove1);
+			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2) {
+				pfdentry->fd = -1;
+				need_shrink = 1;
+			}
+		}
+
+		if (need_shrink)
+			fdset_shrink(pfdset);
+	}
+
+	return NULL;
+}
 
 #endif
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 2/4] net/virtio-user: add data members to support server mode
  2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
  2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
@ 2018-02-14 14:53 ` Zhiyong Yang
  2018-02-27 17:53   ` Maxime Coquelin
  2018-02-14 14:53 ` [PATCH 3/4] net/virtio-user: " Zhiyong Yang
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 65+ messages in thread
From: Zhiyong Yang @ 2018-02-14 14:53 UTC (permalink / raw)
  To: dev, yliu, maxime.coquelin, jianfeng.tan, tiwei.bie, zhihong.wang
  Cc: dong1.wang, Zhiyong Yang

Add data members so as to support server mode.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_user/virtio_user_dev.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 64467b4f9..e640a3438 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,13 +6,21 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
+#include "fd_man.h"
 
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd  */
+	bool		connected;  /* connection status */
+
+	/* support for server/clinet mode */
+	bool		is_server;
+	struct fdset	fdset;
 
 	/* for vhost_kernel backend */
 	char		*ifname;
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 3/4] net/virtio-user: support server mode
  2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
  2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
  2018-02-14 14:53 ` [PATCH 2/4] net/virtio-user: add data members to support server mode Zhiyong Yang
@ 2018-02-14 14:53 ` Zhiyong Yang
  2018-02-27 18:01   ` Maxime Coquelin
  2018-02-14 14:53 ` [PATCH 4/4] net/vhost: add memory checking to support client mode Zhiyong Yang
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
  4 siblings, 1 reply; 65+ messages in thread
From: Zhiyong Yang @ 2018-02-14 14:53 UTC (permalink / raw)
  To: dev, yliu, maxime.coquelin, jianfeng.tan, tiwei.bie, zhihong.wang
  Cc: dong1.wang, Zhiyong Yang

virtio user adds to support for server mode.

Virtio user with server mode creates socket file and then starts to wait
for first connection from vhost user with client mode in blocking mode.

Server mode virtio user supports many times' vhost reconnections with
same configurations.

Support only one connection at the same time in server mode.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c               |  9 ++-
 drivers/net/virtio/virtio_user/vhost_user.c      | 77 ++++++++++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 44 +++++++++----
 drivers/net/virtio/virtio_user_ethdev.c          | 81 ++++++++++++++++++++++--
 4 files changed, 186 insertions(+), 25 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 884f74ad0..44d037d6b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1273,9 +1273,13 @@ static void
 virtio_notify_peers(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct virtnet_rx *rxvq = NULL;
 	struct rte_mbuf *rarp_mbuf;
 
+	if (!dev->data->rx_queues)
+		return;
+
+	rxvq = dev->data->rx_queues[0];
 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
 			(struct ether_addr *)hw->mac_addr);
 	if (rarp_mbuf == NULL) {
@@ -1333,7 +1337,8 @@ virtio_interrupt_handler(void *param)
 
 	if (isr & VIRTIO_NET_S_ANNOUNCE) {
 		virtio_notify_peers(dev);
-		virtio_ack_link_announce(dev);
+		if (hw->cvq)
+			virtio_ack_link_announce(dev);
 	}
 }
 
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..fd806e106 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,55 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+static void
+virtio_user_set_block(int fd, bool enabled)
+{
+	int f;
+
+	f = fcntl(fd, F_GETFL);
+	if (enabled)
+		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+	else
+		fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+#define MAX_VIRTIO_USER_BACKLOG 128
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int fd = dev->listenfd;
+	int connectfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		goto err;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	virtio_user_set_block(fd, true);
+	PMD_DRV_LOG(NOTICE, "virtio user server mode is waiting for connection from vhost user.");
+	while (1) {
+		connectfd = accept(fd, NULL, NULL);
+		if (connectfd >= 0) {
+			dev->connected = true;
+			break;
+		}
+	}
+
+	dev->vhostfd = connectfd;
+	virtio_user_set_block(connectfd, true);
+
+	return 0;
+err:
+	close(fd);
+	return -1;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -390,6 +439,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
 {
 	int fd;
 	int flag;
+	int ret;
 	struct sockaddr_un un;
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,13 +455,30 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		static pthread_t fdset_tid;
+
+		dev->listenfd = fd;
+		if (fdset_tid == 0) {
+			ret = pthread_create(&fdset_tid, NULL,
+					     fdset_event_dispatch,
+					     &dev->fdset);
+			if (ret < 0)
+				PMD_DRV_LOG(ERR, "failed to create fdset handling thread");
+		}
+		return virtio_user_start_server(dev, &un);
+
+	} else {
+		dev->vhostfd = fd;
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->connected = true;
 	}
 
-	dev->vhostfd = fd;
 	return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..23312344f 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
 {
 	uint32_t i;
 
+	if (!dev->connected)
+		return -1;
+
 	for (i = 0; i < dev->max_queue_pairs; ++i)
 		dev->ops->enable_qp(dev, i, 0);
 
@@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
+	if (dev->is_server) {
+		dev->ops = &ops_user;/* server mode only supports vhost user */
 	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
-			return -1;
-		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
 
@@ -388,6 +397,10 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0)
+		close(dev->listenfd);
+
+	dev->connected = false;
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +409,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..43fde6840 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -65,8 +65,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
 			if (r == 0 || (r < 0 && errno != EAGAIN)) {
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
-				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
-					    hw->port_id);
+
 				/* Only client mode is available now. Once the
 				 * connection is broken, it can never be up
 				 * again. Besides, this function could be called
@@ -74,9 +73,15 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				 * callback cannot be unregistered here, set an
 				 * alarm to do it.
 				 */
-				rte_eal_alarm_set(1,
+				if (dev->connected) {
+					dev->connected = false;
+					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
+						    hw->port_id);
+					rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
 						  (void *)hw);
+					hw->started = 0;
+				}
 			} else {
 				dev->status |= VIRTIO_NET_S_LINK_UP;
 			}
@@ -278,12 +283,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -365,6 +373,49 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
 	rte_eth_dev_release_port(eth_dev);
 }
 
+static void
+virtio_user_server_reconnection(int fd, void *dat, int *remove __rte_unused)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct virtio_user_dev *dev = dat;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+
+	if (dev->connected)
+		return;
+
+	connectfd = accept(fd, NULL, NULL);
+	if (connectfd < 0)
+		return;
+
+	if (dev->vhostfd >= 0)
+		close(dev->vhostfd);
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
+
+	hw->started = 1;
+	dev->connected = true;
+	PMD_INIT_LOG(NOTICE, "virtio_user_server_reconnection succeeds!");
+}
 /* Dev initialization routine. Invoked once for each virtio vdev at
  * EAL init time, see rte_bus_probe().
  * Returns 0 on success.
@@ -378,11 +429,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
 	int ret = -1;
-
+	struct virtio_user_dev *vu_dev = NULL;
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
 	if (!kvlist) {
 		PMD_INIT_LOG(ERR, "error when parsing param");
@@ -445,6 +497,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -476,6 +537,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
@@ -488,6 +554,13 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 			goto end;
 	}
 
+	if (vu_dev->is_server) {
+		ret = fdset_add(&vu_dev->fdset, vu_dev->listenfd,
+				virtio_user_server_reconnection, NULL, vu_dev);
+		if (ret < 0)
+			goto end;
+	}
+
 	/* previously called by rte_pci_probe() for physical dev */
 	if (eth_virtio_dev_init(eth_dev) < 0) {
 		PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 4/4] net/vhost: add memory checking to support client mode
  2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
                   ` (2 preceding siblings ...)
  2018-02-14 14:53 ` [PATCH 3/4] net/virtio-user: " Zhiyong Yang
@ 2018-02-14 14:53 ` Zhiyong Yang
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
  4 siblings, 0 replies; 65+ messages in thread
From: Zhiyong Yang @ 2018-02-14 14:53 UTC (permalink / raw)
  To: dev, yliu, maxime.coquelin, jianfeng.tan, tiwei.bie, zhihong.wang
  Cc: dong1.wang, Zhiyong Yang

When vhost user PMD works in client mode to connect/reconnect virtio
user in server mode, new thread sometimes may run to new_device before
queue_setup has been done, So have to wait until memory allocation
is done.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 3aae01c39..cd67bc7c5 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -580,6 +580,15 @@ new_device(int vid)
 		eth_dev->data->numa_node = newnode;
 #endif
 
+	/* The thread may run here before eth_dev->data->rx_queues or
+	 * eth_dev->data->tx_queues have gotten valid memory, so have to
+	 * wait until memory allocation is done.
+	 */
+	while (!eth_dev->data->rx_queues ||
+	       !eth_dev->data->tx_queues) {
+		;
+	}
+
 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
 		vq = eth_dev->data->rx_queues[i];
 		if (vq == NULL)
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
@ 2018-02-27 17:51   ` Maxime Coquelin
  2018-02-28  1:36     ` Yang, Zhiyong
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
  1 sibling, 1 reply; 65+ messages in thread
From: Maxime Coquelin @ 2018-02-27 17:51 UTC (permalink / raw)
  To: Zhiyong Yang, dev, yliu, jianfeng.tan, tiwei.bie, zhihong.wang; +Cc: dong1.wang

Hi Zhiyong,

On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> The patch moves fdset related funcitons from fd_man.c to fd_man.h in
> order to reuse these funcitons from the perspective of PMDs.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   lib/librte_vhost/Makefile |   3 +-
>   lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
>   lib/librte_vhost/fd_man.h | 258 +++++++++++++++++++++++++++++++++++++++++--
>   3 files changed, 253 insertions(+), 282 deletions(-)
>   delete mode 100644 lib/librte_vhost/fd_man.c

I disagree with the patch.
It is a good thing to reuse the code, but to do it, you need to extend
the vhost lib API.

New API need to be prefixed with rte_vhost_, and be declared in
rte_vhost.h.

And no need to move the functions from the .c to the .h file, as it
moreover makes you inline them, which is not necessary here.

> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index 5d6c6abae..e201df79c 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -21,10 +21,11 @@ endif
>   LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
>   
>   # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := iotlb.c socket.c vhost.c \
>   					vhost_user.c virtio_net.c
>   
>   # install includes
>   SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += fd_man.h
>   
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
> deleted file mode 100644
> index 181711c2a..000000000
> --- a/lib/librte_vhost/fd_man.c
> +++ /dev/null
> @@ -1,274 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2014 Intel Corporation
> - */
> -
> -#include <stdint.h>
> -#include <stdio.h>
> -#include <stdlib.h>
> -#include <sys/socket.h>
> -#include <sys/time.h>
> -#include <sys/types.h>
> -#include <unistd.h>
> -#include <string.h>
> -
> -#include <rte_common.h>
> -#include <rte_log.h>
> -
> -#include "fd_man.h"
> -
> -#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
> -
> -static int
> -get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
> -{
> -	int i;
> -
> -	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
> -		;
> -
> -	return i;
> -}
> -
> -static void
> -fdset_move(struct fdset *pfdset, int dst, int src)
> -{
> -	pfdset->fd[dst]    = pfdset->fd[src];
> -	pfdset->rwfds[dst] = pfdset->rwfds[src];
> -}
> -
> -static void
> -fdset_shrink_nolock(struct fdset *pfdset)
> -{
> -	int i;
> -	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
> -
> -	for (i = 0; i < last_valid_idx; i++) {
> -		if (pfdset->fd[i].fd != -1)
> -			continue;
> -
> -		fdset_move(pfdset, i, last_valid_idx);
> -		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
> -	}
> -	pfdset->num = last_valid_idx + 1;
> -}
> -
> -/*
> - * Find deleted fd entries and remove them
> - */
> -static void
> -fdset_shrink(struct fdset *pfdset)
> -{
> -	pthread_mutex_lock(&pfdset->fd_mutex);
> -	fdset_shrink_nolock(pfdset);
> -	pthread_mutex_unlock(&pfdset->fd_mutex);
> -}
> -
> -/**
> - * Returns the index in the fdset for a given fd.
> - * @return
> - *   index for the fd, or -1 if fd isn't in the fdset.
> - */
> -static int
> -fdset_find_fd(struct fdset *pfdset, int fd)
> -{
> -	int i;
> -
> -	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
> -		;
> -
> -	return i == pfdset->num ? -1 : i;
> -}
> -
> -static void
> -fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> -	fd_cb rcb, fd_cb wcb, void *dat)
> -{
> -	struct fdentry *pfdentry = &pfdset->fd[idx];
> -	struct pollfd *pfd = &pfdset->rwfds[idx];
> -
> -	pfdentry->fd  = fd;
> -	pfdentry->rcb = rcb;
> -	pfdentry->wcb = wcb;
> -	pfdentry->dat = dat;
> -
> -	pfd->fd = fd;
> -	pfd->events  = rcb ? POLLIN : 0;
> -	pfd->events |= wcb ? POLLOUT : 0;
> -	pfd->revents = 0;
> -}
> -
> -void
> -fdset_init(struct fdset *pfdset)
> -{
> -	int i;
> -
> -	if (pfdset == NULL)
> -		return;
> -
> -	for (i = 0; i < MAX_FDS; i++) {
> -		pfdset->fd[i].fd = -1;
> -		pfdset->fd[i].dat = NULL;
> -	}
> -	pfdset->num = 0;
> -}
> -
> -/**
> - * Register the fd in the fdset with read/write handler and context.
> - */
> -int
> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> -{
> -	int i;
> -
> -	if (pfdset == NULL || fd == -1)
> -		return -1;
> -
> -	pthread_mutex_lock(&pfdset->fd_mutex);
> -	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> -	if (i == -1) {
> -		fdset_shrink_nolock(pfdset);
> -		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> -		if (i == -1) {
> -			pthread_mutex_unlock(&pfdset->fd_mutex);
> -			return -2;
> -		}
> -	}
> -
> -	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> -	pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -	return 0;
> -}
> -
> -/**
> - *  Unregister the fd from the fdset.
> - *  Returns context of a given fd or NULL.
> - */
> -void *
> -fdset_del(struct fdset *pfdset, int fd)
> -{
> -	int i;
> -	void *dat = NULL;
> -
> -	if (pfdset == NULL || fd == -1)
> -		return NULL;
> -
> -	do {
> -		pthread_mutex_lock(&pfdset->fd_mutex);
> -
> -		i = fdset_find_fd(pfdset, fd);
> -		if (i != -1 && pfdset->fd[i].busy == 0) {
> -			/* busy indicates r/wcb is executing! */
> -			dat = pfdset->fd[i].dat;
> -			pfdset->fd[i].fd = -1;
> -			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
> -			pfdset->fd[i].dat = NULL;
> -			i = -1;
> -		}
> -		pthread_mutex_unlock(&pfdset->fd_mutex);
> -	} while (i != -1);
> -
> -	return dat;
> -}
> -
> -
> -/**
> - * This functions runs in infinite blocking loop until there is no fd in
> - * pfdset. It calls corresponding r/w handler if there is event on the fd.
> - *
> - * Before the callback is called, we set the flag to busy status; If other
> - * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
> - * will wait until the flag is reset to zero(which indicates the callback is
> - * finished), then it could free the context after fdset_del.
> - */
> -void *
> -fdset_event_dispatch(void *arg)
> -{
> -	int i;
> -	struct pollfd *pfd;
> -	struct fdentry *pfdentry;
> -	fd_cb rcb, wcb;
> -	void *dat;
> -	int fd, numfds;
> -	int remove1, remove2;
> -	int need_shrink;
> -	struct fdset *pfdset = arg;
> -	int val;
> -
> -	if (pfdset == NULL)
> -		return NULL;
> -
> -	while (1) {
> -
> -		/*
> -		 * When poll is blocked, other threads might unregister
> -		 * listenfds from and register new listenfds into fdset.
> -		 * When poll returns, the entries for listenfds in the fdset
> -		 * might have been updated. It is ok if there is unwanted call
> -		 * for new listenfds.
> -		 */
> -		pthread_mutex_lock(&pfdset->fd_mutex);
> -		numfds = pfdset->num;
> -		pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
> -		if (val < 0)
> -			continue;
> -
> -		need_shrink = 0;
> -		for (i = 0; i < numfds; i++) {
> -			pthread_mutex_lock(&pfdset->fd_mutex);
> -
> -			pfdentry = &pfdset->fd[i];
> -			fd = pfdentry->fd;
> -			pfd = &pfdset->rwfds[i];
> -
> -			if (fd < 0) {
> -				need_shrink = 1;
> -				pthread_mutex_unlock(&pfdset->fd_mutex);
> -				continue;
> -			}
> -
> -			if (!pfd->revents) {
> -				pthread_mutex_unlock(&pfdset->fd_mutex);
> -				continue;
> -			}
> -
> -			remove1 = remove2 = 0;
> -
> -			rcb = pfdentry->rcb;
> -			wcb = pfdentry->wcb;
> -			dat = pfdentry->dat;
> -			pfdentry->busy = 1;
> -
> -			pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
> -				rcb(fd, dat, &remove1);
> -			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
> -				wcb(fd, dat, &remove2);
> -			pfdentry->busy = 0;
> -			/*
> -			 * fdset_del needs to check busy flag.
> -			 * We don't allow fdset_del to be called in callback
> -			 * directly.
> -			 */
> -			/*
> -			 * When we are to clean up the fd from fdset,
> -			 * because the fd is closed in the cb,
> -			 * the old fd val could be reused by when creates new
> -			 * listen fd in another thread, we couldn't call
> -			 * fd_set_del.
> -			 */
> -			if (remove1 || remove2) {
> -				pfdentry->fd = -1;
> -				need_shrink = 1;
> -			}
> -		}
> -
> -		if (need_shrink)
> -			fdset_shrink(pfdset);
> -	}
> -
> -	return NULL;
> -}
> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
> index 3a9276c3c..b1c628251 100644
> --- a/lib/librte_vhost/fd_man.h
> +++ b/lib/librte_vhost/fd_man.h
> @@ -4,11 +4,11 @@
>   
>   #ifndef _FD_MAN_H_
>   #define _FD_MAN_H_
> -#include <stdint.h>
> -#include <pthread.h>
>   #include <poll.h>
> +#include <stdio.h>
>   
>   #define MAX_FDS 1024
> +#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>   
>   typedef void (*fd_cb)(int fd, void *dat, int *remove);
>   
> @@ -27,14 +27,258 @@ struct fdset {
>   	int num;	/* current fd number of this fdset */
>   };
>   
> +static inline int
> +get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
> +{
> +	int i;
>   
> -void fdset_init(struct fdset *pfdset);
> +	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
> +		;
>   
> -int fdset_add(struct fdset *pfdset, int fd,
> -	fd_cb rcb, fd_cb wcb, void *dat);
> +	return i;
> +}
>   
> -void *fdset_del(struct fdset *pfdset, int fd);
> +static inline void
> +fdset_move(struct fdset *pfdset, int dst, int src)
> +{
> +	pfdset->fd[dst]    = pfdset->fd[src];
> +	pfdset->rwfds[dst] = pfdset->rwfds[src];
> +}
>   
> -void *fdset_event_dispatch(void *arg);
> +static inline void
> +fdset_shrink_nolock(struct fdset *pfdset)
> +{
> +	int i;
> +	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
> +
> +	for (i = 0; i < last_valid_idx; i++) {
> +		if (pfdset->fd[i].fd != -1)
> +			continue;
> +
> +		fdset_move(pfdset, i, last_valid_idx);
> +		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
> +	}
> +	pfdset->num = last_valid_idx + 1;
> +}
> +
> +/*
> + * Find deleted fd entries and remove them
> + */
> +static inline void
> +fdset_shrink(struct fdset *pfdset)
> +{
> +	pthread_mutex_lock(&pfdset->fd_mutex);
> +	fdset_shrink_nolock(pfdset);
> +	pthread_mutex_unlock(&pfdset->fd_mutex);
> +}
> +
> +/**
> + * Returns the index in the fdset for a given fd.
> + * @return
> + *   index for the fd, or -1 if fd isn't in the fdset.
> + */
> +static inline int
> +fdset_find_fd(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +
> +	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
> +		;
> +
> +	return i == pfdset->num ? -1 : i;
> +}
> +
> +static inline void
> +fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> +	fd_cb rcb, fd_cb wcb, void *dat)
> +{
> +	struct fdentry *pfdentry = &pfdset->fd[idx];
> +	struct pollfd *pfd = &pfdset->rwfds[idx];
> +
> +	pfdentry->fd  = fd;
> +	pfdentry->rcb = rcb;
> +	pfdentry->wcb = wcb;
> +	pfdentry->dat = dat;
> +
> +	pfd->fd = fd;
> +	pfd->events  = rcb ? POLLIN : 0;
> +	pfd->events |= wcb ? POLLOUT : 0;
> +	pfd->revents = 0;
> +}
> +
> +static inline void
> +fdset_init(struct fdset *pfdset)
> +{
> +	int i;
> +
> +	if (pfdset == NULL)
> +		return;
> +
> +	for (i = 0; i < MAX_FDS; i++) {
> +		pfdset->fd[i].fd = -1;
> +		pfdset->fd[i].dat = NULL;
> +	}
> +	pfdset->num = 0;
> +}
> +
> +/**
> + * Register the fd in the fdset with read/write handler and context.
> + */
> +static inline int
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> +{
> +	int i;
> +
> +	if (pfdset == NULL || fd == -1)
> +		return -1;
> +
> +	pthread_mutex_lock(&pfdset->fd_mutex);
> +	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> +	if (i == -1) {
> +		fdset_shrink_nolock(pfdset);
> +		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> +		if (i == -1) {
> +			pthread_mutex_unlock(&pfdset->fd_mutex);
> +			return -2;
> +		}
> +	}
> +
> +	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> +	pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +	return 0;
> +}
> +
> +/**
> + *  Unregister the fd from the fdset.
> + *  Returns context of a given fd or NULL.
> + */
> +static inline void *
> +fdset_del(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +	void *dat = NULL;
> +
> +	if (pfdset == NULL || fd == -1)
> +		return NULL;
> +
> +	do {
> +		pthread_mutex_lock(&pfdset->fd_mutex);
> +
> +		i = fdset_find_fd(pfdset, fd);
> +		if (i != -1 && pfdset->fd[i].busy == 0) {
> +			/* busy indicates r/wcb is executing! */
> +			dat = pfdset->fd[i].dat;
> +			pfdset->fd[i].fd = -1;
> +			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
> +			pfdset->fd[i].dat = NULL;
> +			i = -1;
> +		}
> +		pthread_mutex_unlock(&pfdset->fd_mutex);
> +	} while (i != -1);
> +
> +	return dat;
> +}
> +
> +/**
> + * This functions runs in infinite blocking loop until there is no fd in
> + * pfdset. It calls corresponding r/w handler if there is event on the fd.
> + *
> + * Before the callback is called, we set the flag to busy status; If other
> + * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
> + * will wait until the flag is reset to zero(which indicates the callback is
> + * finished), then it could free the context after fdset_del.
> + */
> +static inline void *
> +fdset_event_dispatch(void *arg)
> +{
> +	int i;
> +	struct pollfd *pfd;
> +	struct fdentry *pfdentry;
> +	fd_cb rcb, wcb;
> +	void *dat;
> +	int fd, numfds;
> +	int remove1, remove2;
> +	int need_shrink;
> +	struct fdset *pfdset = arg;
> +	int val;
> +
> +	if (pfdset == NULL)
> +		return NULL;
> +
> +	while (1) {
> +
> +		/*
> +		 * When poll is blocked, other threads might unregister
> +		 * listenfds from and register new listenfds into fdset.
> +		 * When poll returns, the entries for listenfds in the fdset
> +		 * might have been updated. It is ok if there is unwanted call
> +		 * for new listenfds.
> +		 */
> +		pthread_mutex_lock(&pfdset->fd_mutex);
> +		numfds = pfdset->num;
> +		pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
> +		if (val < 0)
> +			continue;
> +
> +		need_shrink = 0;
> +		for (i = 0; i < numfds; i++) {
> +			pthread_mutex_lock(&pfdset->fd_mutex);
> +
> +			pfdentry = &pfdset->fd[i];
> +			fd = pfdentry->fd;
> +			pfd = &pfdset->rwfds[i];
> +
> +			if (fd < 0) {
> +				need_shrink = 1;
> +				pthread_mutex_unlock(&pfdset->fd_mutex);
> +				continue;
> +			}
> +
> +			if (!pfd->revents) {
> +				pthread_mutex_unlock(&pfdset->fd_mutex);
> +				continue;
> +			}
> +
> +			remove1 = remove2 = 0;
> +
> +			rcb = pfdentry->rcb;
> +			wcb = pfdentry->wcb;
> +			dat = pfdentry->dat;
> +			pfdentry->busy = 1;
> +
> +			pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
> +				rcb(fd, dat, &remove1);
> +			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
> +				wcb(fd, dat, &remove2);
> +			pfdentry->busy = 0;
> +			/*
> +			 * fdset_del needs to check busy flag.
> +			 * We don't allow fdset_del to be called in callback
> +			 * directly.
> +			 */
> +			/*
> +			 * When we are to clean up the fd from fdset,
> +			 * because the fd is closed in the cb,
> +			 * the old fd val could be reused by when creates new
> +			 * listen fd in another thread, we couldn't call
> +			 * fd_set_del.
> +			 */
> +			if (remove1 || remove2) {
> +				pfdentry->fd = -1;
> +				need_shrink = 1;
> +			}
> +		}
> +
> +		if (need_shrink)
> +			fdset_shrink(pfdset);
> +	}
> +
> +	return NULL;
> +}
>   
>   #endif
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 2/4] net/virtio-user: add data members to support server mode
  2018-02-14 14:53 ` [PATCH 2/4] net/virtio-user: add data members to support server mode Zhiyong Yang
@ 2018-02-27 17:53   ` Maxime Coquelin
  2018-02-28  1:38     ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Maxime Coquelin @ 2018-02-27 17:53 UTC (permalink / raw)
  To: Zhiyong Yang, dev, yliu, jianfeng.tan, tiwei.bie, zhihong.wang; +Cc: dong1.wang



On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> Add data members so as to support server mode.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_user/virtio_user_dev.h | 8 ++++++++
>   1 file changed, 8 insertions(+)


This patch can be squashed in patch 3.

> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 64467b4f9..e640a3438 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,13 +6,21 @@
>   #define _VIRTIO_USER_DEV_H
>   
>   #include <limits.h>
> +#include <stdbool.h>
>   #include "../virtio_pci.h"
>   #include "../virtio_ring.h"
>   #include "vhost.h"
> +#include "fd_man.h"
>   
>   struct virtio_user_dev {
>   	/* for vhost_user backend */
>   	int		vhostfd;
> +	int		listenfd;   /* listening fd  */
> +	bool		connected;  /* connection status */
> +
> +	/* support for server/clinet mode */
> +	bool		is_server;
> +	struct fdset	fdset;
>   
>   	/* for vhost_kernel backend */
>   	char		*ifname;
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 3/4] net/virtio-user: support server mode
  2018-02-14 14:53 ` [PATCH 3/4] net/virtio-user: " Zhiyong Yang
@ 2018-02-27 18:01   ` Maxime Coquelin
  2018-02-28  1:53     ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Maxime Coquelin @ 2018-02-27 18:01 UTC (permalink / raw)
  To: Zhiyong Yang, dev, yliu, jianfeng.tan, tiwei.bie, zhihong.wang; +Cc: dong1.wang



On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> virtio user adds to support for server mode.
> 
> Virtio user with server mode creates socket file and then starts to wait
> for first connection from vhost user with client mode in blocking mode.
> 
> Server mode virtio user supports many times' vhost reconnections with
> same configurations.
> 
> Support only one connection at the same time in server mode.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c               |  9 ++-
>   drivers/net/virtio/virtio_user/vhost_user.c      | 77 ++++++++++++++++++++--
>   drivers/net/virtio/virtio_user/virtio_user_dev.c | 44 +++++++++----
>   drivers/net/virtio/virtio_user_ethdev.c          | 81 ++++++++++++++++++++++--
>   4 files changed, 186 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 884f74ad0..44d037d6b 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1273,9 +1273,13 @@ static void
>   virtio_notify_peers(struct rte_eth_dev *dev)
>   {
>   	struct virtio_hw *hw = dev->data->dev_private;
> -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct virtnet_rx *rxvq = NULL;
I don't think it is needed to assign to NULL here.

>   	struct rte_mbuf *rarp_mbuf;
>   
> +	if (!dev->data->rx_queues)
> +		return;
> +
> +	rxvq = dev->data->rx_queues[0];
The above change is valid, but I think it should be in a dedicated
patch, as it might be backported to -stable.

>   	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
>   			(struct ether_addr *)hw->mac_addr);
>   	if (rarp_mbuf == NULL) {
> @@ -1333,7 +1337,8 @@ virtio_interrupt_handler(void *param)
>   
>   	if (isr & VIRTIO_NET_S_ANNOUNCE) {
>   		virtio_notify_peers(dev);
> -		virtio_ack_link_announce(dev);
> +		if (hw->cvq) > +			virtio_ack_link_announce(dev);
Is this change also related to server mode?
It may deserve a dedicated patch too.
>   	}
>   }

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-02-27 17:51   ` Maxime Coquelin
@ 2018-02-28  1:36     ` Yang, Zhiyong
  2018-02-28  8:45       ` Maxime Coquelin
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-02-28  1:36 UTC (permalink / raw)
  To: Maxime Coquelin, dev, yliu, Tan, Jianfeng, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1



> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Wednesday, February 28, 2018 1:52 AM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Cc: Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> Hi Zhiyong,
> 
> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > The patch moves fdset related funcitons from fd_man.c to fd_man.h in
> > order to reuse these funcitons from the perspective of PMDs.
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >   lib/librte_vhost/Makefile |   3 +-
> >   lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
> >   lib/librte_vhost/fd_man.h | 258
> +++++++++++++++++++++++++++++++++++++++++--
> >   3 files changed, 253 insertions(+), 282 deletions(-)
> >   delete mode 100644 lib/librte_vhost/fd_man.c
> 
> I disagree with the patch.
> It is a good thing to reuse the code, but to do it, you need to extend the
> vhost lib API.
> 
> New API need to be prefixed with rte_vhost_, and be declared in
> rte_vhost.h.
> 
> And no need to move the functions from the .c to the .h file, as it moreover
> makes you inline them, which is not necessary here.

Thanks for your reviewing the series firstly, Maxime. :)

I considered to do it as you said. However I still preferred this one at last.
Here are my reasons.
1) As far as I know, this set of functions are used privately in librte_vhost before this feature.
No strong request from the perspective of DPDK application. If I understand well,  It is enough to expose the functions to all PMDs
And it is better to keep internal use in DPDK.

2) These functions help to implement vhost user, but they are not strongly related to other APIs of vhost user which have already exposed.
if we want to expose them as APIs at lib layer, many functions and related data structure has to be exposed in rte_vhost.h. it looks messy.
Your opinion?

thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 2/4] net/virtio-user: add data members to support server mode
  2018-02-27 17:53   ` Maxime Coquelin
@ 2018-02-28  1:38     ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-02-28  1:38 UTC (permalink / raw)
  To: Maxime Coquelin, dev, yliu, Tan, Jianfeng, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1



> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Wednesday, February 28, 2018 1:53 AM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Cc: Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 2/4] net/virtio-user: add data members to support
> server mode
> 
> 
> 
> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > Add data members so as to support server mode.
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >   drivers/net/virtio/virtio_user/virtio_user_dev.h | 8 ++++++++
> >   1 file changed, 8 insertions(+)
> 
> 
> This patch can be squashed in patch 3.

Ok. 

Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 3/4] net/virtio-user: support server mode
  2018-02-27 18:01   ` Maxime Coquelin
@ 2018-02-28  1:53     ` Yang, Zhiyong
  2018-02-28  8:33       ` Maxime Coquelin
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-02-28  1:53 UTC (permalink / raw)
  To: Maxime Coquelin, dev, yliu, Tan, Jianfeng, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1



> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Wednesday, February 28, 2018 2:02 AM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Cc: Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 3/4] net/virtio-user: support server mode
> 
> 
> 
> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > virtio user adds to support for server mode.
> >
> > Virtio user with server mode creates socket file and then starts to
> > wait for first connection from vhost user with client mode in blocking mode.
> >
> > Server mode virtio user supports many times' vhost reconnections with
> > same configurations.
> >
> > Support only one connection at the same time in server mode.
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >   drivers/net/virtio/virtio_ethdev.c               |  9 ++-
> >   drivers/net/virtio/virtio_user/vhost_user.c      | 77
> ++++++++++++++++++++--
> >   drivers/net/virtio/virtio_user/virtio_user_dev.c | 44 +++++++++----
> >   drivers/net/virtio/virtio_user_ethdev.c          | 81
> ++++++++++++++++++++++--
> >   4 files changed, 186 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> > b/drivers/net/virtio/virtio_ethdev.c
> > index 884f74ad0..44d037d6b 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -1273,9 +1273,13 @@ static void
> >   virtio_notify_peers(struct rte_eth_dev *dev)
> >   {
> >   	struct virtio_hw *hw = dev->data->dev_private;
> > -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> > +	struct virtnet_rx *rxvq = NULL;
> I don't think it is needed to assign to NULL here.

Ok.
> 
> >   	struct rte_mbuf *rarp_mbuf;
> >
> > +	if (!dev->data->rx_queues)
> > +		return;
> > +
> > +	rxvq = dev->data->rx_queues[0];
> The above change is valid, but I think it should be in a dedicated patch, as it
> might be backported to -stable.
> 
Ok, it will cause crash in some cases. For example, the code goes here before memory allocation of  rxvq is done.

> >   	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
> >   			(struct ether_addr *)hw->mac_addr);
> >   	if (rarp_mbuf == NULL) {
> > @@ -1333,7 +1337,8 @@ virtio_interrupt_handler(void *param)
> >
> >   	if (isr & VIRTIO_NET_S_ANNOUNCE) {
> >   		virtio_notify_peers(dev);
> > -		virtio_ack_link_announce(dev);
> > +		if (hw->cvq) > +
> 	virtio_ack_link_announce(dev);
> Is this change also related to server mode?
> It may deserve a dedicated patch too.

Above changes are not related to server mode. :). 
Just looks more reasonable.

If format extra two patches,  Should I put them in this series or in another series?

Thanks
Zhiyong

> >   	}
> >   }
> 
> Thanks,
> Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 3/4] net/virtio-user: support server mode
  2018-02-28  1:53     ` Yang, Zhiyong
@ 2018-02-28  8:33       ` Maxime Coquelin
  0 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-02-28  8:33 UTC (permalink / raw)
  To: Yang, Zhiyong, dev, yliu, Tan, Jianfeng, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1



On 02/28/2018 02:53 AM, Yang, Zhiyong wrote:
> 
> 
>> -----Original Message-----
>> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
>> Sent: Wednesday, February 28, 2018 2:02 AM
>> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
>> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
>> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
>> Cc: Wang, Dong1 <dong1.wang@intel.com>
>> Subject: Re: [PATCH 3/4] net/virtio-user: support server mode
>>
>>
>>
>> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
>>> virtio user adds to support for server mode.
>>>
>>> Virtio user with server mode creates socket file and then starts to
>>> wait for first connection from vhost user with client mode in blocking mode.
>>>
>>> Server mode virtio user supports many times' vhost reconnections with
>>> same configurations.
>>>
>>> Support only one connection at the same time in server mode.
>>>
>>> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
>>> ---
>>>    drivers/net/virtio/virtio_ethdev.c               |  9 ++-
>>>    drivers/net/virtio/virtio_user/vhost_user.c      | 77
>> ++++++++++++++++++++--
>>>    drivers/net/virtio/virtio_user/virtio_user_dev.c | 44 +++++++++----
>>>    drivers/net/virtio/virtio_user_ethdev.c          | 81
>> ++++++++++++++++++++++--
>>>    4 files changed, 186 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio/virtio_ethdev.c
>>> b/drivers/net/virtio/virtio_ethdev.c
>>> index 884f74ad0..44d037d6b 100644
>>> --- a/drivers/net/virtio/virtio_ethdev.c
>>> +++ b/drivers/net/virtio/virtio_ethdev.c
>>> @@ -1273,9 +1273,13 @@ static void
>>>    virtio_notify_peers(struct rte_eth_dev *dev)
>>>    {
>>>    	struct virtio_hw *hw = dev->data->dev_private;
>>> -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
>>> +	struct virtnet_rx *rxvq = NULL;
>> I don't think it is needed to assign to NULL here.
> 
> Ok.
>>
>>>    	struct rte_mbuf *rarp_mbuf;
>>>
>>> +	if (!dev->data->rx_queues)
>>> +		return;
>>> +
>>> +	rxvq = dev->data->rx_queues[0];
>> The above change is valid, but I think it should be in a dedicated patch, as it
>> might be backported to -stable.
>>
> Ok, it will cause crash in some cases. For example, the code goes here before memory allocation of  rxvq is done.
> 
>>>    	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
>>>    			(struct ether_addr *)hw->mac_addr);
>>>    	if (rarp_mbuf == NULL) {
>>> @@ -1333,7 +1337,8 @@ virtio_interrupt_handler(void *param)
>>>
>>>    	if (isr & VIRTIO_NET_S_ANNOUNCE) {
>>>    		virtio_notify_peers(dev);
>>> -		virtio_ack_link_announce(dev);
>>> +		if (hw->cvq) > +
>> 	virtio_ack_link_announce(dev);
>> Is this change also related to server mode?
>> It may deserve a dedicated patch too.
> 
> Above changes are not related to server mode. :).
> Just looks more reasonable.
> 
> If format extra two patches,  Should I put them in this series or in another series?

Thanks. Put them in the same series, with a Fixes: line and Cc:
stable@dpdk.org if the fixed patch was introduced before v17.11.


> Thanks
> Zhiyong
> 
>>>    	}
>>>    }
>>
>> Thanks,
>> Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-02-28  1:36     ` Yang, Zhiyong
@ 2018-02-28  8:45       ` Maxime Coquelin
  2018-03-01  6:02         ` Tan, Jianfeng
  0 siblings, 1 reply; 65+ messages in thread
From: Maxime Coquelin @ 2018-02-28  8:45 UTC (permalink / raw)
  To: Yang, Zhiyong, dev, yliu, Tan, Jianfeng, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1



On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> 
> 
>> -----Original Message-----
>> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
>> Sent: Wednesday, February 28, 2018 1:52 AM
>> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
>> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
>> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
>> Cc: Wang, Dong1 <dong1.wang@intel.com>
>> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
>> fd_man.h
>>
>> Hi Zhiyong,
>>
>> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
>>> The patch moves fdset related funcitons from fd_man.c to fd_man.h in
>>> order to reuse these funcitons from the perspective of PMDs.
>>>
>>> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
>>> ---
>>>    lib/librte_vhost/Makefile |   3 +-
>>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
>>>    lib/librte_vhost/fd_man.h | 258
>> +++++++++++++++++++++++++++++++++++++++++--
>>>    3 files changed, 253 insertions(+), 282 deletions(-)
>>>    delete mode 100644 lib/librte_vhost/fd_man.c
>>
>> I disagree with the patch.
>> It is a good thing to reuse the code, but to do it, you need to extend the
>> vhost lib API.
>>
>> New API need to be prefixed with rte_vhost_, and be declared in
>> rte_vhost.h.
>>
>> And no need to move the functions from the .c to the .h file, as it moreover
>> makes you inline them, which is not necessary here.
> 
> Thanks for your reviewing the series firstly, Maxime. :)
> 
> I considered to do it as you said. However I still preferred this one at last.
> Here are my reasons.
> 1) As far as I know, this set of functions are used privately in librte_vhost before this feature.
> No strong request from the perspective of DPDK application. If I understand well,  It is enough to expose the functions to all PMDs
> And it is better to keep internal use in DPDK.

But what the patch is doing is adding fd_man.h to the API, without doing
it properly. fd_man.h will be installed with other header files, and any
external application can use it.

> 
> 2) These functions help to implement vhost user, but they are not strongly related to other APIs of vhost user which have already exposed.
> if we want to expose them as APIs at lib layer, many functions and related data structure has to be exposed in rte_vhost.h. it looks messy.
> Your opinion?

Yes, it is not really vhost-related, it could be part of a more generic
library. It is maybe better to duplicate these lines, or to move this
code in a existing or new library.

Cheers,
Maxime

> thanks
> Zhiyong
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-02-28  8:45       ` Maxime Coquelin
@ 2018-03-01  6:02         ` Tan, Jianfeng
  2018-03-01 14:13           ` Thomas Monjalon
  0 siblings, 1 reply; 65+ messages in thread
From: Tan, Jianfeng @ 2018-03-01  6:02 UTC (permalink / raw)
  To: Maxime Coquelin, Yang, Zhiyong, dev, yliu, Bie, Tiwei, Wang, Zhihong
  Cc: Wang, Dong1, Thomas Monjalon



> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Wednesday, February 28, 2018 4:45 PM
> To: Yang, Zhiyong; dev@dpdk.org; yliu@fridaylinux.org; Tan, Jianfeng; Bie,
> Tiwei; Wang, Zhihong
> Cc: Wang, Dong1
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 
> 
> On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> >
> >
> >> -----Original Message-----
> >> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> >> Sent: Wednesday, February 28, 2018 1:52 AM
> >> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
> >> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
> >> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> >> Cc: Wang, Dong1 <dong1.wang@intel.com>
> >> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> >> fd_man.h
> >>
> >> Hi Zhiyong,
> >>
> >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> >>> The patch moves fdset related funcitons from fd_man.c to fd_man.h in
> >>> order to reuse these funcitons from the perspective of PMDs.
> >>>
> >>> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> >>> ---
> >>>    lib/librte_vhost/Makefile |   3 +-
> >>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
> >>>    lib/librte_vhost/fd_man.h | 258
> >> +++++++++++++++++++++++++++++++++++++++++--
> >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> >>
> >> I disagree with the patch.
> >> It is a good thing to reuse the code, but to do it, you need to extend the
> >> vhost lib API.
> >>
> >> New API need to be prefixed with rte_vhost_, and be declared in
> >> rte_vhost.h.
> >>
> >> And no need to move the functions from the .c to the .h file, as it
> moreover
> >> makes you inline them, which is not necessary here.
> >
> > Thanks for your reviewing the series firstly, Maxime. :)
> >
> > I considered to do it as you said. However I still preferred this one at last.
> > Here are my reasons.
> > 1) As far as I know, this set of functions are used privately in librte_vhost
> before this feature.
> > No strong request from the perspective of DPDK application. If I
> understand well,  It is enough to expose the functions to all PMDs
> > And it is better to keep internal use in DPDK.
> 
> But what the patch is doing is adding fd_man.h to the API, without doing
> it properly. fd_man.h will be installed with other header files, and any
> external application can use it.
> 
> >
> > 2) These functions help to implement vhost user, but they are not strongly
> related to other APIs of vhost user which have already exposed.
> > if we want to expose them as APIs at lib layer, many functions and related
> data structure has to be exposed in rte_vhost.h. it looks messy.
> > Your opinion?
> 
> Yes, it is not really vhost-related, it could be part of a more generic
> library. It is maybe better to duplicate these lines, or to move this
> code in a existing or new library.

I vote to move it to generic library, maybe eal. Poll() has better compatibility even though poll() is not as performant as epoll().

Thomas, how do you think?

Thanks,
Jianfeng 

> 
> Cheers,
> Maxime
> 
> > thanks
> > Zhiyong
> >

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-01  6:02         ` Tan, Jianfeng
@ 2018-03-01 14:13           ` Thomas Monjalon
  2018-03-05  7:43             ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Thomas Monjalon @ 2018-03-01 14:13 UTC (permalink / raw)
  To: Tan, Jianfeng
  Cc: Maxime Coquelin, Yang, Zhiyong, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

01/03/2018 07:02, Tan, Jianfeng:
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > >>>    lib/librte_vhost/Makefile |   3 +-
> > >>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
> > >>>    lib/librte_vhost/fd_man.h | 258
> > >> +++++++++++++++++++++++++++++++++++++++++--
> > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > >>
> > >> I disagree with the patch.
> > >> It is a good thing to reuse the code, but to do it, you need to extend the
> > >> vhost lib API.
> > >>
> > >> New API need to be prefixed with rte_vhost_, and be declared in
> > >> rte_vhost.h.
> > >>
> > >> And no need to move the functions from the .c to the .h file, as it
> > moreover
> > >> makes you inline them, which is not necessary here.
> > >
> > > Thanks for your reviewing the series firstly, Maxime. :)
> > >
> > > I considered to do it as you said. However I still preferred this one at last.
> > > Here are my reasons.
> > > 1) As far as I know, this set of functions are used privately in librte_vhost
> > before this feature.
> > > No strong request from the perspective of DPDK application. If I
> > understand well,  It is enough to expose the functions to all PMDs
> > > And it is better to keep internal use in DPDK.
> > 
> > But what the patch is doing is adding fd_man.h to the API, without doing
> > it properly. fd_man.h will be installed with other header files, and any
> > external application can use it.
> > 
> > >
> > > 2) These functions help to implement vhost user, but they are not strongly
> > related to other APIs of vhost user which have already exposed.
> > > if we want to expose them as APIs at lib layer, many functions and related
> > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > Your opinion?
> > 
> > Yes, it is not really vhost-related, it could be part of a more generic
> > library. It is maybe better to duplicate these lines, or to move this
> > code in a existing or new library.
> 
> I vote to move it to generic library, maybe eal. Poll() has better compatibility even though poll() is not as performant as epoll().
> 
> Thomas, how do you think?

I don't see why it should be exported outside of DPDK, except for PMDs.
I would tend to keep it internal but I understand that it would mean
duplicating some code, which is not ideal.
Please could you show what would be the content of the .h in EAL?

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-01 14:13           ` Thomas Monjalon
@ 2018-03-05  7:43             ` Yang, Zhiyong
  2018-03-05  8:54               ` Thomas Monjalon
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-05  7:43 UTC (permalink / raw)
  To: Thomas Monjalon, Tan, Jianfeng
  Cc: Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang, Zhihong, Wang, Dong1



> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, March 1, 2018 10:14 PM
> To: Tan, Jianfeng <jianfeng.tan@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>; Yang, Zhiyong
> <zhiyong.yang@intel.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>; Wang,
> Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 01/03/2018 07:02, Tan, Jianfeng:
> > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > >>>    lib/librte_vhost/fd_man.c | 274 -------------------------------------------
> ---
> > > >>>    lib/librte_vhost/fd_man.h | 258
> > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > >>
> > > >> I disagree with the patch.
> > > >> It is a good thing to reuse the code, but to do it, you need to
> > > >> extend the vhost lib API.
> > > >>
> > > >> New API need to be prefixed with rte_vhost_, and be declared in
> > > >> rte_vhost.h.
> > > >>
> > > >> And no need to move the functions from the .c to the .h file, as
> > > >> it
> > > moreover
> > > >> makes you inline them, which is not necessary here.
> > > >
> > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > >
> > > > I considered to do it as you said. However I still preferred this one at last.
> > > > Here are my reasons.
> > > > 1) As far as I know, this set of functions are used privately in
> > > > librte_vhost
> > > before this feature.
> > > > No strong request from the perspective of DPDK application. If I
> > > understand well,  It is enough to expose the functions to all PMDs
> > > > And it is better to keep internal use in DPDK.
> > >
> > > But what the patch is doing is adding fd_man.h to the API, without
> > > doing it properly. fd_man.h will be installed with other header
> > > files, and any external application can use it.
> > >
> > > >
> > > > 2) These functions help to implement vhost user, but they are not
> > > > strongly
> > > related to other APIs of vhost user which have already exposed.
> > > > if we want to expose them as APIs at lib layer, many functions and
> > > > related
> > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > Your opinion?
> > >
> > > Yes, it is not really vhost-related, it could be part of a more
> > > generic library. It is maybe better to duplicate these lines, or to
> > > move this code in a existing or new library.
> >
> > I vote to move it to generic library, maybe eal. Poll() has better
> compatibility even though poll() is not as performant as epoll().
> >
> > Thomas, how do you think?
> 
> I don't see why it should be exported outside of DPDK, except for PMDs.
> I would tend to keep it internal but I understand that it would mean
> duplicating some code, which is not ideal.
> Please could you show what would be the content of the .h in EAL?
> 

If needed to expose them in eal.h, 
I think that they should be the whole fdset mechanism as followings.

typedef void (*fd_cb)(int fd, void *dat, int *remove);

struct fdentry {
	int fd;		/* -1 indicates this entry is empty */
	fd_cb rcb;	/* callback when this fd is readable. */
	fd_cb wcb;	/* callback when this fd is writeable.*/
	void *dat;	/* fd context */
	int busy;	/* whether this entry is being used in cb. */
};

struct fdset {
	struct pollfd rwfds[MAX_FDS];
	struct fdentry fd[MAX_FDS];
	pthread_mutex_t fd_mutex;
	int num;	/* current fd number of this fdset */
};

void fdset_init(struct fdset *pfdset);    (not used in the patchset)

int fdset_add(struct fdset *pfdset, int fd,
	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)

void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset)

void *fdset_event_dispatch(void *arg);   (used in this patchset)

seems that we have 4 options.
1) expose them in librte_vhost
2) expose them in other existing or new libs. for example,  eal.
3) duplicate the code lines at PMD layer.
4) do it as the patch does that.

thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-05  7:43             ` Yang, Zhiyong
@ 2018-03-05  8:54               ` Thomas Monjalon
  2018-03-13  8:46                 ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Thomas Monjalon @ 2018-03-05  8:54 UTC (permalink / raw)
  To: Yang, Zhiyong
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

05/03/2018 08:43, Yang, Zhiyong:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 01/03/2018 07:02, Tan, Jianfeng:
> > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > >>>    lib/librte_vhost/fd_man.c | 274 -------------------------------------------
> > ---
> > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > >>
> > > > >> I disagree with the patch.
> > > > >> It is a good thing to reuse the code, but to do it, you need to
> > > > >> extend the vhost lib API.
> > > > >>
> > > > >> New API need to be prefixed with rte_vhost_, and be declared in
> > > > >> rte_vhost.h.
> > > > >>
> > > > >> And no need to move the functions from the .c to the .h file, as
> > > > >> it
> > > > moreover
> > > > >> makes you inline them, which is not necessary here.
> > > > >
> > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > >
> > > > > I considered to do it as you said. However I still preferred this one at last.
> > > > > Here are my reasons.
> > > > > 1) As far as I know, this set of functions are used privately in
> > > > > librte_vhost
> > > > before this feature.
> > > > > No strong request from the perspective of DPDK application. If I
> > > > understand well,  It is enough to expose the functions to all PMDs
> > > > > And it is better to keep internal use in DPDK.
> > > >
> > > > But what the patch is doing is adding fd_man.h to the API, without
> > > > doing it properly. fd_man.h will be installed with other header
> > > > files, and any external application can use it.
> > > >
> > > > >
> > > > > 2) These functions help to implement vhost user, but they are not
> > > > > strongly
> > > > related to other APIs of vhost user which have already exposed.
> > > > > if we want to expose them as APIs at lib layer, many functions and
> > > > > related
> > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > Your opinion?
> > > >
> > > > Yes, it is not really vhost-related, it could be part of a more
> > > > generic library. It is maybe better to duplicate these lines, or to
> > > > move this code in a existing or new library.
> > >
> > > I vote to move it to generic library, maybe eal. Poll() has better
> > compatibility even though poll() is not as performant as epoll().
> > >
> > > Thomas, how do you think?
> > 
> > I don't see why it should be exported outside of DPDK, except for PMDs.
> > I would tend to keep it internal but I understand that it would mean
> > duplicating some code, which is not ideal.
> > Please could you show what would be the content of the .h in EAL?
> > 
> 
> If needed to expose them in eal.h, 
> I think that they should be the whole fdset mechanism as followings.
> 
> typedef void (*fd_cb)(int fd, void *dat, int *remove);
> 
> struct fdentry {
> 	int fd;		/* -1 indicates this entry is empty */
> 	fd_cb rcb;	/* callback when this fd is readable. */
> 	fd_cb wcb;	/* callback when this fd is writeable.*/
> 	void *dat;	/* fd context */
> 	int busy;	/* whether this entry is being used in cb. */
> };
> 
> struct fdset {
> 	struct pollfd rwfds[MAX_FDS];
> 	struct fdentry fd[MAX_FDS];
> 	pthread_mutex_t fd_mutex;
> 	int num;	/* current fd number of this fdset */
> };
> 
> void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> 
> int fdset_add(struct fdset *pfdset, int fd,
> 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> 
> void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset)
> 
> void *fdset_event_dispatch(void *arg);   (used in this patchset)
> 
> seems that we have 4 options.
> 1) expose them in librte_vhost
> 2) expose them in other existing or new libs. for example,  eal.
> 3) duplicate the code lines at PMD layer.
> 4) do it as the patch does that.

It looks to be very close of the interrupt thread.
Can we have all merged in an unique event dispatcher thread?

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-05  8:54               ` Thomas Monjalon
@ 2018-03-13  8:46                 ` Yang, Zhiyong
  2018-03-13  9:43                   ` Thomas Monjalon
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-13  8:46 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Monday, March 5, 2018 4:55 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 05/03/2018 08:43, Yang, Zhiyong:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > >>> -------------------------------------------
> > > ---
> > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > >>
> > > > > >> I disagree with the patch.
> > > > > >> It is a good thing to reuse the code, but to do it, you need
> > > > > >> to extend the vhost lib API.
> > > > > >>
> > > > > >> New API need to be prefixed with rte_vhost_, and be declared
> > > > > >> in rte_vhost.h.
> > > > > >>
> > > > > >> And no need to move the functions from the .c to the .h file,
> > > > > >> as it
> > > > > moreover
> > > > > >> makes you inline them, which is not necessary here.
> > > > > >
> > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > >
> > > > > > I considered to do it as you said. However I still preferred this one at
> last.
> > > > > > Here are my reasons.
> > > > > > 1) As far as I know, this set of functions are used privately
> > > > > > in librte_vhost
> > > > > before this feature.
> > > > > > No strong request from the perspective of DPDK application. If
> > > > > > I
> > > > > understand well,  It is enough to expose the functions to all
> > > > > PMDs
> > > > > > And it is better to keep internal use in DPDK.
> > > > >
> > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > without doing it properly. fd_man.h will be installed with other
> > > > > header files, and any external application can use it.
> > > > >
> > > > > >
> > > > > > 2) These functions help to implement vhost user, but they are
> > > > > > not strongly
> > > > > related to other APIs of vhost user which have already exposed.
> > > > > > if we want to expose them as APIs at lib layer, many functions
> > > > > > and related
> > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > Your opinion?
> > > > >
> > > > > Yes, it is not really vhost-related, it could be part of a more
> > > > > generic library. It is maybe better to duplicate these lines, or
> > > > > to move this code in a existing or new library.
> > > >
> > > > I vote to move it to generic library, maybe eal. Poll() has better
> > > compatibility even though poll() is not as performant as epoll().
> > > >
> > > > Thomas, how do you think?
> > >
> > > I don't see why it should be exported outside of DPDK, except for PMDs.
> > > I would tend to keep it internal but I understand that it would mean
> > > duplicating some code, which is not ideal.
> > > Please could you show what would be the content of the .h in EAL?
> > >
> >
> > If needed to expose them in eal.h,
> > I think that they should be the whole fdset mechanism as followings.
> >
> > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> >
> > struct fdentry {
> > 	int fd;		/* -1 indicates this entry is empty */
> > 	fd_cb rcb;	/* callback when this fd is readable. */
> > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > 	void *dat;	/* fd context */
> > 	int busy;	/* whether this entry is being used in cb. */
> > };
> >
> > struct fdset {
> > 	struct pollfd rwfds[MAX_FDS];
> > 	struct fdentry fd[MAX_FDS];
> > 	pthread_mutex_t fd_mutex;
> > 	int num;	/* current fd number of this fdset */
> > };
> >
> > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> >
> > int fdset_add(struct fdset *pfdset, int fd,
> > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> >
> > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > patchset)
> >
> > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> >
> > seems that we have 4 options.
> > 1) expose them in librte_vhost
> > 2) expose them in other existing or new libs. for example,  eal.
> > 3) duplicate the code lines at PMD layer.
> > 4) do it as the patch does that.
> 
> It looks to be very close of the interrupt thread.
> Can we have all merged in an unique event dispatcher thread?
> 

If I understand right, do you mean that we can merge them in lib eal ?  right?

Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-13  8:46                 ` Yang, Zhiyong
@ 2018-03-13  9:43                   ` Thomas Monjalon
  2018-03-13  9:50                     ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Thomas Monjalon @ 2018-03-13  9:43 UTC (permalink / raw)
  To: Yang, Zhiyong
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

13/03/2018 09:46, Yang, Zhiyong:
> Hi Thomas,
> 
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 05/03/2018 08:43, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > > >>> -------------------------------------------
> > > > ---
> > > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > > >>
> > > > > > >> I disagree with the patch.
> > > > > > >> It is a good thing to reuse the code, but to do it, you need
> > > > > > >> to extend the vhost lib API.
> > > > > > >>
> > > > > > >> New API need to be prefixed with rte_vhost_, and be declared
> > > > > > >> in rte_vhost.h.
> > > > > > >>
> > > > > > >> And no need to move the functions from the .c to the .h file,
> > > > > > >> as it
> > > > > > moreover
> > > > > > >> makes you inline them, which is not necessary here.
> > > > > > >
> > > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > > >
> > > > > > > I considered to do it as you said. However I still preferred this one at
> > last.
> > > > > > > Here are my reasons.
> > > > > > > 1) As far as I know, this set of functions are used privately
> > > > > > > in librte_vhost
> > > > > > before this feature.
> > > > > > > No strong request from the perspective of DPDK application. If
> > > > > > > I
> > > > > > understand well,  It is enough to expose the functions to all
> > > > > > PMDs
> > > > > > > And it is better to keep internal use in DPDK.
> > > > > >
> > > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > > without doing it properly. fd_man.h will be installed with other
> > > > > > header files, and any external application can use it.
> > > > > >
> > > > > > >
> > > > > > > 2) These functions help to implement vhost user, but they are
> > > > > > > not strongly
> > > > > > related to other APIs of vhost user which have already exposed.
> > > > > > > if we want to expose them as APIs at lib layer, many functions
> > > > > > > and related
> > > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > > Your opinion?
> > > > > >
> > > > > > Yes, it is not really vhost-related, it could be part of a more
> > > > > > generic library. It is maybe better to duplicate these lines, or
> > > > > > to move this code in a existing or new library.
> > > > >
> > > > > I vote to move it to generic library, maybe eal. Poll() has better
> > > > compatibility even though poll() is not as performant as epoll().
> > > > >
> > > > > Thomas, how do you think?
> > > >
> > > > I don't see why it should be exported outside of DPDK, except for PMDs.
> > > > I would tend to keep it internal but I understand that it would mean
> > > > duplicating some code, which is not ideal.
> > > > Please could you show what would be the content of the .h in EAL?
> > > >
> > >
> > > If needed to expose them in eal.h,
> > > I think that they should be the whole fdset mechanism as followings.
> > >
> > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > >
> > > struct fdentry {
> > > 	int fd;		/* -1 indicates this entry is empty */
> > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > 	void *dat;	/* fd context */
> > > 	int busy;	/* whether this entry is being used in cb. */
> > > };
> > >
> > > struct fdset {
> > > 	struct pollfd rwfds[MAX_FDS];
> > > 	struct fdentry fd[MAX_FDS];
> > > 	pthread_mutex_t fd_mutex;
> > > 	int num;	/* current fd number of this fdset */
> > > };
> > >
> > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > >
> > > int fdset_add(struct fdset *pfdset, int fd,
> > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > >
> > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > patchset)
> > >
> > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > >
> > > seems that we have 4 options.
> > > 1) expose them in librte_vhost
> > > 2) expose them in other existing or new libs. for example,  eal.
> > > 3) duplicate the code lines at PMD layer.
> > > 4) do it as the patch does that.
> > 
> > It looks to be very close of the interrupt thread.
> > Can we have all merged in an unique event dispatcher thread?
> > 
> 
> If I understand right, do you mean that we can merge them in lib eal ?  right?

Yes merge with interrupt thread in EAL.
I didn't look at the details, but it seems the right place for such thing.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-13  9:43                   ` Thomas Monjalon
@ 2018-03-13  9:50                     ` Yang, Zhiyong
  2018-03-15  9:32                       ` Thomas Monjalon
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-13  9:50 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1



> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Tuesday, March 13, 2018 5:43 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 13/03/2018 09:46, Yang, Zhiyong:
> > Hi Thomas,
> >
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > > > >>> -------------------------------------------
> > > > > ---
> > > > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > > > >>
> > > > > > > >> I disagree with the patch.
> > > > > > > >> It is a good thing to reuse the code, but to do it, you
> > > > > > > >> need to extend the vhost lib API.
> > > > > > > >>
> > > > > > > >> New API need to be prefixed with rte_vhost_, and be
> > > > > > > >> declared in rte_vhost.h.
> > > > > > > >>
> > > > > > > >> And no need to move the functions from the .c to the .h
> > > > > > > >> file, as it
> > > > > > > moreover
> > > > > > > >> makes you inline them, which is not necessary here.
> > > > > > > >
> > > > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > > > >
> > > > > > > > I considered to do it as you said. However I still
> > > > > > > > preferred this one at
> > > last.
> > > > > > > > Here are my reasons.
> > > > > > > > 1) As far as I know, this set of functions are used
> > > > > > > > privately in librte_vhost
> > > > > > > before this feature.
> > > > > > > > No strong request from the perspective of DPDK
> > > > > > > > application. If I
> > > > > > > understand well,  It is enough to expose the functions to
> > > > > > > all PMDs
> > > > > > > > And it is better to keep internal use in DPDK.
> > > > > > >
> > > > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > > > without doing it properly. fd_man.h will be installed with
> > > > > > > other header files, and any external application can use it.
> > > > > > >
> > > > > > > >
> > > > > > > > 2) These functions help to implement vhost user, but they
> > > > > > > > are not strongly
> > > > > > > related to other APIs of vhost user which have already exposed.
> > > > > > > > if we want to expose them as APIs at lib layer, many
> > > > > > > > functions and related
> > > > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > > > Your opinion?
> > > > > > >
> > > > > > > Yes, it is not really vhost-related, it could be part of a
> > > > > > > more generic library. It is maybe better to duplicate these
> > > > > > > lines, or to move this code in a existing or new library.
> > > > > >
> > > > > > I vote to move it to generic library, maybe eal. Poll() has
> > > > > > better
> > > > > compatibility even though poll() is not as performant as epoll().
> > > > > >
> > > > > > Thomas, how do you think?
> > > > >
> > > > > I don't see why it should be exported outside of DPDK, except for
> PMDs.
> > > > > I would tend to keep it internal but I understand that it would
> > > > > mean duplicating some code, which is not ideal.
> > > > > Please could you show what would be the content of the .h in EAL?
> > > > >
> > > >
> > > > If needed to expose them in eal.h, I think that they should be the
> > > > whole fdset mechanism as followings.
> > > >
> > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > >
> > > > struct fdentry {
> > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > 	void *dat;	/* fd context */
> > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > };
> > > >
> > > > struct fdset {
> > > > 	struct pollfd rwfds[MAX_FDS];
> > > > 	struct fdentry fd[MAX_FDS];
> > > > 	pthread_mutex_t fd_mutex;
> > > > 	int num;	/* current fd number of this fdset */
> > > > };
> > > >
> > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > >
> > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > >
> > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > > patchset)
> > > >
> > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > >
> > > > seems that we have 4 options.
> > > > 1) expose them in librte_vhost
> > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > 3) duplicate the code lines at PMD layer.
> > > > 4) do it as the patch does that.
> > >
> > > It looks to be very close of the interrupt thread.
> > > Can we have all merged in an unique event dispatcher thread?
> > >
> >
> > If I understand right, do you mean that we can merge them in lib eal ?  right?
> 
> Yes merge with interrupt thread in EAL.
> I didn't look at the details, but it seems the right place for such thing.
> 
Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK users can use and like them as well. :)

Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-13  9:50                     ` Yang, Zhiyong
@ 2018-03-15  9:32                       ` Thomas Monjalon
  2018-03-16  8:43                         ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Thomas Monjalon @ 2018-03-15  9:32 UTC (permalink / raw)
  To: Yang, Zhiyong
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

13/03/2018 10:50, Yang, Zhiyong:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 13/03/2018 09:46, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > I don't see why it should be exported outside of DPDK, except for
> > PMDs.
> > > > > > I would tend to keep it internal but I understand that it would
> > > > > > mean duplicating some code, which is not ideal.
> > > > > > Please could you show what would be the content of the .h in EAL?
> > > > > >
> > > > >
> > > > > If needed to expose them in eal.h, I think that they should be the
> > > > > whole fdset mechanism as followings.
> > > > >
> > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > >
> > > > > struct fdentry {
> > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > 	void *dat;	/* fd context */
> > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > };
> > > > >
> > > > > struct fdset {
> > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > 	struct fdentry fd[MAX_FDS];
> > > > > 	pthread_mutex_t fd_mutex;
> > > > > 	int num;	/* current fd number of this fdset */
> > > > > };
> > > > >
> > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > >
> > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > >
> > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > > > patchset)
> > > > >
> > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > >
> > > > > seems that we have 4 options.
> > > > > 1) expose them in librte_vhost
> > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > 3) duplicate the code lines at PMD layer.
> > > > > 4) do it as the patch does that.
> > > >
> > > > It looks to be very close of the interrupt thread.
> > > > Can we have all merged in an unique event dispatcher thread?
> > >
> > > If I understand right, do you mean that we can merge them in lib eal ?  right?
> > 
> > Yes merge with interrupt thread in EAL.
> > I didn't look at the details, but it seems the right place for such thing.
> > 
> Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK users can use and like them as well. :)

I think you missed my initial question:
Is it possible to merge the vhost events needs in the EAL interrupt thread?

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v2 0/5] add support for virtio-user server mode
  2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
  2018-02-27 17:51   ` Maxime Coquelin
@ 2018-03-15  9:45   ` zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 1/5] net/virtio: fix add pointer checking zhiyong.yang
                       ` (4 more replies)
  1 sibling, 5 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev; +Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas

In a container environment if the vhost-user backend restarts, there's no way
for it to reconnect to virtio-user currently. To address this, support for
server mode is added. In this mode the socket file is created by virtio-user,
which the backend then connects to. This means that if the backend restarts,
it can reconnect to virtio-user and continue communications.

The series add support for the feature and target for 18.05 release.

Virtio-user with server mode creates socket file and then starts to wait for the
first connection from vhost user with client mode in blocking mode.

Virtio-user with server mode supports many times' vhost reconnections with same configurations. 

Virtio-user supports only one connection at the same time in server/client mode.

How to test?
The following scripts are as reference.

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 -m 256,0 --no-pci \
--file-prefix=testpmd0 --vdev=net_virtio_user0,mac=00:11:22:33:44:10, \
path=/tmp/sock0,server=1,queues=1 -- -i --rxq=1 --txq=1 --no-numa

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3e000 -n 4 --socket-mem 256,0 \
--vdev 'net_vhost0,iface=/tmp/sock0,client=1,queues=1' -- -i --rxq=1 --txq=1 \
--nb-cores=1 --no-numa

step1 : at the virio-user side, run "start"
step2: at the vhost-user side run "start tx_first 40000"

Changes in V2:
1. split two patch 1/5 and 2/5 from v1 patchset to fix some existing issues which is not
strongly related to support for server mode according to Maxime's comments.
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs according to Thomas' comments.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

Zhiyong Yang (5):
  net/virtio: fix add pointer checking
  net/virtio: add checking for cvq
  eal: expose fdset related APIs
  net/virtio-user: add support for server mode
  net/vhost: add memory checking

 doc/guides/rel_notes/release_18_05.rst             |   7 +
 drivers/net/vhost/rte_eth_vhost.c                  |   9 +
 drivers/net/virtio/virtio_ethdev.c                 |  10 +-
 drivers/net/virtio/virtio_user/vhost_user.c        |  77 +++++-
 drivers/net/virtio/virtio_user/virtio_user_dev.c   |  44 ++--
 drivers/net/virtio/virtio_user/virtio_user_dev.h   |   8 +
 drivers/net/virtio/virtio_user_ethdev.c            |  82 +++++-
 lib/librte_eal/common/include/rte_eal_interrupts.h |  56 +++++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 257 +++++++++++++++++++
 lib/librte_eal/rte_eal_version.map                 |  10 +
 lib/librte_vhost/Makefile                          |   2 +-
 lib/librte_vhost/fd_man.c                          | 274 ---------------------
 lib/librte_vhost/fd_man.h                          |  40 ---
 lib/librte_vhost/socket.c                          |  22 +-
 14 files changed, 548 insertions(+), 350 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c
 delete mode 100644 lib/librte_vhost/fd_man.h

-- 
2.14.3

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v2 1/5] net/virtio: fix add pointer checking
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
@ 2018-03-15  9:45     ` zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 2/5] net/virtio: add checking for cvq zhiyong.yang
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas,
	stable, Zhiyong Yang

It is necessary to add pointer checking because in some case the
code will cause crash.
For example,
The code goes here before memory allocation of rxvq is done.

Fixes: 7365504f77e3("net/virtio: support guest announce")
Cc: stable@dpdk.org
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 884f74ad0..b38582c8d 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1273,9 +1273,14 @@ static void
 virtio_notify_peers(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct virtnet_rx *rxvq;
 	struct rte_mbuf *rarp_mbuf;
 
+	if (!dev->data->rx_queues)
+		return;
+
+	rxvq = dev->data->rx_queues[0];
+
 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
 			(struct ether_addr *)hw->mac_addr);
 	if (rarp_mbuf == NULL) {
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 2/5] net/virtio: add checking for cvq
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 1/5] net/virtio: fix add pointer checking zhiyong.yang
@ 2018-03-15  9:45     ` zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 3/5] eal: expose fdset related APIs zhiyong.yang
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas,
	Zhiyong Yang

Add checking for cvq to judge if virtio_ack_link_announce is called.
The original code doesn't cause issue, and add the checking just to look
more reasonable.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index b38582c8d..74943a5a9 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1338,7 +1338,8 @@ virtio_interrupt_handler(void *param)
 
 	if (isr & VIRTIO_NET_S_ANNOUNCE) {
 		virtio_notify_peers(dev);
-		virtio_ack_link_announce(dev);
+		if (hw->cvq)
+			virtio_ack_link_announce(dev);
 	}
 }
 
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 3/5] eal: expose fdset related APIs
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 1/5] net/virtio: fix add pointer checking zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 2/5] net/virtio: add checking for cvq zhiyong.yang
@ 2018-03-15  9:45     ` zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 4/5] net/virtio-user: add support for server mode zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 5/5] net/vhost: add memory checking zhiyong.yang
  4 siblings, 0 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas,
	Zhiyong Yang

The patch moves fdset related functions from lib vhost to lib eal and
expose them as new APIs in order that they can be reused.
Just move the code to the new place "eal_interrupts.c" and add prefix
"rte_ " to functions name, and don't change any functionality.

Librte_vhost changes new function names accordingly when invoking.
Remove the files fd_man.h and fd_man.c and remove fd_mac.c from Makefile
from librte_vhost.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 lib/librte_eal/common/include/rte_eal_interrupts.h |  56 +++++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 257 +++++++++++++++++++
 lib/librte_eal/rte_eal_version.map                 |  10 +
 lib/librte_vhost/Makefile                          |   2 +-
 lib/librte_vhost/fd_man.c                          | 274 ---------------------
 lib/librte_vhost/fd_man.h                          |  40 ---
 lib/librte_vhost/socket.c                          |  22 +-
 7 files changed, 336 insertions(+), 325 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c
 delete mode 100644 lib/librte_vhost/fd_man.h

diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 3f792a97f..1886d0dd0 100644
--- a/lib/librte_eal/common/include/rte_eal_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -17,6 +17,9 @@
 #ifndef _RTE_EAL_INTERRUPTS_H_
 #define _RTE_EAL_INTERRUPTS_H_
 
+#include <poll.h>
+#include "rte_compat.h"
+
 #define RTE_MAX_RXTX_INTR_VEC_ID     32
 #define RTE_INTR_VEC_ZERO_OFFSET      0
 #define RTE_INTR_VEC_RXTX_OFFSET      1
@@ -82,6 +85,24 @@ struct rte_intr_handle {
 };
 
 #define RTE_EPOLL_PER_THREAD        -1  /**< to hint using per thread epfd */
+#define RTE_MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct rte_fdentry {
+	int fd;		/* -1 indicates this entry is empty */
+	fd_cb rcb;	/* callback when this fd is readable. */
+	fd_cb wcb;	/* callback when this fd is writeable.*/
+	void *dat;	/* fd context */
+	int busy;	/* whether this entry is being used in cb. */
+};
+
+struct rte_fdset {
+	struct pollfd rwfds[RTE_MAX_FDS];
+	struct rte_fdentry fd[RTE_MAX_FDS];
+	pthread_mutex_t fd_mutex;
+	int num;	/* current fd number of this fdset */
+};
 
 /**
  * It waits for events on the epoll instance.
@@ -218,4 +239,39 @@ rte_intr_allow_others(struct rte_intr_handle *intr_handle);
 int
 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
 
+/**
+ * Initialize struct rte_fdset.
+ *
+ * @param intr_handle
+ *   Pointer to struct rte_fdset.
+ */
+void __rte_experimental
+rte_fdset_init(struct rte_fdset *pfdset);
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ *
+ * @param intr_handle
+ *   Pointer to struct rte_fdset.
+ */
+int __rte_experimental
+rte_fdset_add(struct rte_fdset *pfdset, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat);
+
+/**
+ * Unregister the fd in the fdset.
+ *
+ * @param intr_handle
+ *   Pointer to struct rte_fdset.
+ */
+void * __rte_experimental
+rte_fdset_del(struct rte_fdset *pfdset, int fd);
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ */
+void * __rte_experimental
+rte_fdset_event_dispatch(void *arg);
+
 #endif /* _RTE_EAL_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index f86f22f7b..95a382525 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -1230,3 +1230,260 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
 
 	return 0;
 }
+
+#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
+
+static int
+get_last_valid_idx(struct rte_fdset *pfdset, int last_valid_idx)
+{
+	int i;
+
+	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
+		;
+
+	return i;
+}
+
+static void
+fdset_move(struct rte_fdset *pfdset, int dst, int src)
+{
+	pfdset->fd[dst]    = pfdset->fd[src];
+	pfdset->rwfds[dst] = pfdset->rwfds[src];
+}
+
+static void
+fdset_shrink_nolock(struct rte_fdset *pfdset)
+{
+	int i;
+	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
+
+	for (i = 0; i < last_valid_idx; i++) {
+		if (pfdset->fd[i].fd != -1)
+			continue;
+
+		fdset_move(pfdset, i, last_valid_idx);
+		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
+	}
+	pfdset->num = last_valid_idx + 1;
+}
+
+/*
+ * Find deleted fd entries and remove them
+ */
+static void
+fdset_shrink(struct rte_fdset *pfdset)
+{
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	fdset_shrink_nolock(pfdset);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct rte_fdset *pfdset, int fd)
+{
+	int i;
+
+	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i == pfdset->num ? -1 : i;
+}
+
+static void
+fdset_add_fd(struct rte_fdset *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct rte_fdentry *pfdentry = &pfdset->fd[idx];
+	struct pollfd *pfd = &pfdset->rwfds[idx];
+
+	pfdentry->fd  = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+
+	pfd->fd = fd;
+	pfd->events  = rcb ? POLLIN : 0;
+	pfd->events |= wcb ? POLLOUT : 0;
+	pfd->revents = 0;
+}
+
+void
+rte_fdset_init(struct rte_fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < RTE_MAX_FDS; i++) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].dat = NULL;
+	}
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+rte_fdset_add(struct rte_fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = pfdset->num < RTE_MAX_FDS ? pfdset->num++ : -1;
+	if (i == -1) {
+		fdset_shrink_nolock(pfdset);
+		i = pfdset->num < RTE_MAX_FDS ? pfdset->num++ : -1;
+		if (i == -1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			return -2;
+		}
+	}
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ *  Returns context of a given fd or NULL.
+ */
+void *
+rte_fdset_del(struct rte_fdset *pfdset, int fd)
+{
+	int i;
+	void *dat = NULL;
+
+	if (pfdset == NULL || fd == -1)
+		return NULL;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			dat = pfdset->fd[i].dat;
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->fd[i].dat = NULL;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+
+	return dat;
+}
+
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void *
+rte_fdset_event_dispatch(void *arg)
+{
+	int i;
+	struct pollfd *pfd;
+	struct rte_fdentry *pfdentry;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd, numfds;
+	int remove1, remove2;
+	int need_shrink;
+	struct rte_fdset *pfdset = arg;
+	int val;
+
+	if (pfdset == NULL)
+		return NULL;
+
+	while (1) {
+
+		/*
+		 * When poll is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When poll returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		pthread_mutex_lock(&pfdset->fd_mutex);
+		numfds = pfdset->num;
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+		if (val < 0)
+			continue;
+
+		need_shrink = 0;
+		for (i = 0; i < numfds; i++) {
+			pthread_mutex_lock(&pfdset->fd_mutex);
+
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			pfd = &pfdset->rwfds[i];
+
+			if (fd < 0) {
+				need_shrink = 1;
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			if (!pfd->revents) {
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			remove1 = remove2 = 0;
+
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+
+			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
+				rcb(fd, dat, &remove1);
+			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2) {
+				pfdentry->fd = -1;
+				need_shrink = 1;
+			}
+		}
+
+		if (need_shrink)
+			fdset_shrink(pfdset);
+	}
+
+	return NULL;
+}
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index d12360235..8369f88bb 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -256,3 +256,13 @@ EXPERIMENTAL {
 	rte_service_start_with_defaults;
 
 } DPDK_18.02;
+
+EXPERIMENTAL {
+	global:
+
+	rte_fdset_init;
+	rte_fdset_add;
+	rte_fdset_del;
+	rte_fdset_event_dispatch;
+
+} DPDK_18.05;
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6abae..eee378237 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -21,7 +21,7 @@ endif
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := iotlb.c socket.c vhost.c \
 					vhost_user.c virtio_net.c
 
 # install includes
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
deleted file mode 100644
index 181711c2a..000000000
--- a/lib/librte_vhost/fd_man.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "fd_man.h"
-
-#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
-
-static int
-get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
-{
-	int i;
-
-	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
-		;
-
-	return i;
-}
-
-static void
-fdset_move(struct fdset *pfdset, int dst, int src)
-{
-	pfdset->fd[dst]    = pfdset->fd[src];
-	pfdset->rwfds[dst] = pfdset->rwfds[src];
-}
-
-static void
-fdset_shrink_nolock(struct fdset *pfdset)
-{
-	int i;
-	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
-
-	for (i = 0; i < last_valid_idx; i++) {
-		if (pfdset->fd[i].fd != -1)
-			continue;
-
-		fdset_move(pfdset, i, last_valid_idx);
-		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
-	}
-	pfdset->num = last_valid_idx + 1;
-}
-
-/*
- * Find deleted fd entries and remove them
- */
-static void
-fdset_shrink(struct fdset *pfdset)
-{
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	fdset_shrink_nolock(pfdset);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-}
-
-/**
- * Returns the index in the fdset for a given fd.
- * @return
- *   index for the fd, or -1 if fd isn't in the fdset.
- */
-static int
-fdset_find_fd(struct fdset *pfdset, int fd)
-{
-	int i;
-
-	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
-		;
-
-	return i == pfdset->num ? -1 : i;
-}
-
-static void
-fdset_add_fd(struct fdset *pfdset, int idx, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat)
-{
-	struct fdentry *pfdentry = &pfdset->fd[idx];
-	struct pollfd *pfd = &pfdset->rwfds[idx];
-
-	pfdentry->fd  = fd;
-	pfdentry->rcb = rcb;
-	pfdentry->wcb = wcb;
-	pfdentry->dat = dat;
-
-	pfd->fd = fd;
-	pfd->events  = rcb ? POLLIN : 0;
-	pfd->events |= wcb ? POLLOUT : 0;
-	pfd->revents = 0;
-}
-
-void
-fdset_init(struct fdset *pfdset)
-{
-	int i;
-
-	if (pfdset == NULL)
-		return;
-
-	for (i = 0; i < MAX_FDS; i++) {
-		pfdset->fd[i].fd = -1;
-		pfdset->fd[i].dat = NULL;
-	}
-	pfdset->num = 0;
-}
-
-/**
- * Register the fd in the fdset with read/write handler and context.
- */
-int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
-{
-	int i;
-
-	if (pfdset == NULL || fd == -1)
-		return -1;
-
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-	if (i == -1) {
-		fdset_shrink_nolock(pfdset);
-		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-		if (i == -1) {
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-			return -2;
-		}
-	}
-
-	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-
-	return 0;
-}
-
-/**
- *  Unregister the fd from the fdset.
- *  Returns context of a given fd or NULL.
- */
-void *
-fdset_del(struct fdset *pfdset, int fd)
-{
-	int i;
-	void *dat = NULL;
-
-	if (pfdset == NULL || fd == -1)
-		return NULL;
-
-	do {
-		pthread_mutex_lock(&pfdset->fd_mutex);
-
-		i = fdset_find_fd(pfdset, fd);
-		if (i != -1 && pfdset->fd[i].busy == 0) {
-			/* busy indicates r/wcb is executing! */
-			dat = pfdset->fd[i].dat;
-			pfdset->fd[i].fd = -1;
-			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
-			pfdset->fd[i].dat = NULL;
-			i = -1;
-		}
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-	} while (i != -1);
-
-	return dat;
-}
-
-
-/**
- * This functions runs in infinite blocking loop until there is no fd in
- * pfdset. It calls corresponding r/w handler if there is event on the fd.
- *
- * Before the callback is called, we set the flag to busy status; If other
- * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
- * will wait until the flag is reset to zero(which indicates the callback is
- * finished), then it could free the context after fdset_del.
- */
-void *
-fdset_event_dispatch(void *arg)
-{
-	int i;
-	struct pollfd *pfd;
-	struct fdentry *pfdentry;
-	fd_cb rcb, wcb;
-	void *dat;
-	int fd, numfds;
-	int remove1, remove2;
-	int need_shrink;
-	struct fdset *pfdset = arg;
-	int val;
-
-	if (pfdset == NULL)
-		return NULL;
-
-	while (1) {
-
-		/*
-		 * When poll is blocked, other threads might unregister
-		 * listenfds from and register new listenfds into fdset.
-		 * When poll returns, the entries for listenfds in the fdset
-		 * might have been updated. It is ok if there is unwanted call
-		 * for new listenfds.
-		 */
-		pthread_mutex_lock(&pfdset->fd_mutex);
-		numfds = pfdset->num;
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-
-		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
-		if (val < 0)
-			continue;
-
-		need_shrink = 0;
-		for (i = 0; i < numfds; i++) {
-			pthread_mutex_lock(&pfdset->fd_mutex);
-
-			pfdentry = &pfdset->fd[i];
-			fd = pfdentry->fd;
-			pfd = &pfdset->rwfds[i];
-
-			if (fd < 0) {
-				need_shrink = 1;
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			if (!pfd->revents) {
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			remove1 = remove2 = 0;
-
-			rcb = pfdentry->rcb;
-			wcb = pfdentry->wcb;
-			dat = pfdentry->dat;
-			pfdentry->busy = 1;
-
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-
-			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
-				rcb(fd, dat, &remove1);
-			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
-				wcb(fd, dat, &remove2);
-			pfdentry->busy = 0;
-			/*
-			 * fdset_del needs to check busy flag.
-			 * We don't allow fdset_del to be called in callback
-			 * directly.
-			 */
-			/*
-			 * When we are to clean up the fd from fdset,
-			 * because the fd is closed in the cb,
-			 * the old fd val could be reused by when creates new
-			 * listen fd in another thread, we couldn't call
-			 * fd_set_del.
-			 */
-			if (remove1 || remove2) {
-				pfdentry->fd = -1;
-				need_shrink = 1;
-			}
-		}
-
-		if (need_shrink)
-			fdset_shrink(pfdset);
-	}
-
-	return NULL;
-}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
deleted file mode 100644
index 3a9276c3c..000000000
--- a/lib/librte_vhost/fd_man.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#ifndef _FD_MAN_H_
-#define _FD_MAN_H_
-#include <stdint.h>
-#include <pthread.h>
-#include <poll.h>
-
-#define MAX_FDS 1024
-
-typedef void (*fd_cb)(int fd, void *dat, int *remove);
-
-struct fdentry {
-	int fd;		/* -1 indicates this entry is empty */
-	fd_cb rcb;	/* callback when this fd is readable. */
-	fd_cb wcb;	/* callback when this fd is writeable.*/
-	void *dat;	/* fd context */
-	int busy;	/* whether this entry is being used in cb. */
-};
-
-struct fdset {
-	struct pollfd rwfds[MAX_FDS];
-	struct fdentry fd[MAX_FDS];
-	pthread_mutex_t fd_mutex;
-	int num;	/* current fd number of this fdset */
-};
-
-
-void fdset_init(struct fdset *pfdset);
-
-int fdset_add(struct fdset *pfdset, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat);
-
-void *fdset_del(struct fdset *pfdset, int fd);
-
-void *fdset_event_dispatch(void *arg);
-
-#endif
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 83befdced..e05c16a5e 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -19,7 +19,7 @@
 
 #include <rte_log.h>
 
-#include "fd_man.h"
+#include <rte_interrupts.h>
 #include "vhost.h"
 #include "vhost_user.h"
 
@@ -66,7 +66,7 @@ struct vhost_user_connection {
 #define MAX_VHOST_SOCKET 1024
 struct vhost_user {
 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
-	struct fdset fdset;
+	struct rte_fdset fdset;
 	int vsocket_cnt;
 	pthread_mutex_t mutex;
 };
@@ -80,7 +80,7 @@ static int vhost_user_start_client(struct vhost_user_socket *vsocket);
 
 static struct vhost_user vhost_user = {
 	.fdset = {
-		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+		.fd = { [0 ... RTE_MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.num = 0
 	},
@@ -216,7 +216,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	conn->connfd = fd;
 	conn->vsocket = vsocket;
 	conn->vid = vid;
-	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
+	ret = rte_fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
 			NULL, conn);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
@@ -331,8 +331,8 @@ vhost_user_start_server(struct vhost_user_socket *vsocket)
 	if (ret < 0)
 		goto err;
 
-	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
-		  NULL, vsocket);
+	ret = rte_fdset_add(&vhost_user.fdset, fd,
+			    vhost_user_server_new_connection, NULL, vsocket);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"failed to add listen fd %d to vhost server fdset\n",
@@ -744,7 +744,8 @@ rte_vhost_driver_unregister(const char *path)
 
 		if (!strcmp(vsocket->path, path)) {
 			if (vsocket->is_server) {
-				fdset_del(&vhost_user.fdset, vsocket->socket_fd);
+				rte_fdset_del(&vhost_user.fdset,
+					      vsocket->socket_fd);
 				close(vsocket->socket_fd);
 				unlink(path);
 			} else if (vsocket->reconnect) {
@@ -757,7 +758,7 @@ rte_vhost_driver_unregister(const char *path)
 			     conn = next) {
 				next = TAILQ_NEXT(conn, next);
 
-				fdset_del(&vhost_user.fdset, conn->connfd);
+				rte_fdset_del(&vhost_user.fdset, conn->connfd);
 				RTE_LOG(INFO, VHOST_CONFIG,
 					"free connfd = %d for device '%s'\n",
 					conn->connfd, path);
@@ -829,8 +830,9 @@ rte_vhost_driver_start(const char *path)
 		return -1;
 
 	if (fdset_tid == 0) {
-		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
-				     &vhost_user.fdset);
+		int ret = pthread_create(&fdset_tid, NULL,
+					 rte_fdset_event_dispatch,
+					 &vhost_user.fdset);
 		if (ret != 0)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to create fdset handling thread");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 4/5] net/virtio-user: add support for server mode
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
                       ` (2 preceding siblings ...)
  2018-03-15  9:45     ` [PATCH v2 3/5] eal: expose fdset related APIs zhiyong.yang
@ 2018-03-15  9:45     ` zhiyong.yang
  2018-03-15  9:45     ` [PATCH v2 5/5] net/vhost: add memory checking zhiyong.yang
  4 siblings, 0 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas,
	Zhiyong Yang

virtio-user adds support for server mode in this patch.

Virtio-user with server mode creates socket file and then starts to wait
for the first connection from vhost user with client mode in blocking mode.

Server mode virtio-user supports many times' vhost reconnections with
same configurations.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_user/vhost_user.c      | 77 ++++++++++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 44 +++++++++----
 drivers/net/virtio/virtio_user/virtio_user_dev.h |  8 +++
 drivers/net/virtio/virtio_user_ethdev.c          | 82 ++++++++++++++++++++++--
 4 files changed, 188 insertions(+), 23 deletions(-)

diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..cd30f713a 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,55 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+static void
+virtio_user_set_block(int fd, bool enabled)
+{
+	int f;
+
+	f = fcntl(fd, F_GETFL);
+	if (enabled)
+		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+	else
+		fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+#define MAX_VIRTIO_USER_BACKLOG 128
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int fd = dev->listenfd;
+	int connectfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		goto err;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	virtio_user_set_block(fd, true);
+	PMD_DRV_LOG(NOTICE, "virtio user server mode is waiting for connection from vhost user.");
+	while (1) {
+		connectfd = accept(fd, NULL, NULL);
+		if (connectfd >= 0) {
+			dev->connected = true;
+			break;
+		}
+	}
+
+	dev->vhostfd = connectfd;
+	virtio_user_set_block(connectfd, true);
+
+	return 0;
+err:
+	close(fd);
+	return -1;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -390,6 +439,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
 {
 	int fd;
 	int flag;
+	int ret;
 	struct sockaddr_un un;
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,13 +455,30 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		static pthread_t fdset_tid;
+
+		dev->listenfd = fd;
+		if (fdset_tid == 0) {
+			ret = pthread_create(&fdset_tid, NULL,
+					     rte_fdset_event_dispatch,
+					     &dev->fdset);
+			if (ret < 0)
+				PMD_DRV_LOG(ERR, "failed to create fdset handling thread");
+		}
+		return virtio_user_start_server(dev, &un);
+
+	} else {
+		dev->vhostfd = fd;
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->connected = true;
 	}
 
-	dev->vhostfd = fd;
 	return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..23312344f 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
 {
 	uint32_t i;
 
+	if (!dev->connected)
+		return -1;
+
 	for (i = 0; i < dev->max_queue_pairs; ++i)
 		dev->ops->enable_qp(dev, i, 0);
 
@@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
+	if (dev->is_server) {
+		dev->ops = &ops_user;/* server mode only supports vhost user */
 	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
-			return -1;
-		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
 
@@ -388,6 +397,10 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0)
+		close(dev->listenfd);
+
+	dev->connected = false;
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +409,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 64467b4f9..9b73b9531 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,8 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
+#include <rte_interrupts.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +15,12 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd  */
+	bool		connected;  /* connection status */
+
+	/* support for server/clinet mode */
+	bool		is_server;
+	struct rte_fdset	fdset;
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..d36c50c5b 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -65,8 +65,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
 			if (r == 0 || (r < 0 && errno != EAGAIN)) {
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
-				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
-					    hw->port_id);
+
 				/* Only client mode is available now. Once the
 				 * connection is broken, it can never be up
 				 * again. Besides, this function could be called
@@ -74,9 +73,15 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				 * callback cannot be unregistered here, set an
 				 * alarm to do it.
 				 */
-				rte_eal_alarm_set(1,
+				if (dev->connected) {
+					dev->connected = false;
+					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
+						    hw->port_id);
+					rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
 						  (void *)hw);
+					hw->started = 0;
+				}
 			} else {
 				dev->status |= VIRTIO_NET_S_LINK_UP;
 			}
@@ -278,12 +283,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -365,6 +373,49 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
 	rte_eth_dev_release_port(eth_dev);
 }
 
+static void
+virtio_user_server_reconnection(int fd, void *dat, int *remove __rte_unused)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct virtio_user_dev *dev = dat;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+
+	if (dev->connected)
+		return;
+
+	connectfd = accept(fd, NULL, NULL);
+	if (connectfd < 0)
+		return;
+
+	if (dev->vhostfd >= 0)
+		close(dev->vhostfd);
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
+
+	hw->started = 1;
+	dev->connected = true;
+	PMD_INIT_LOG(NOTICE, "virtio user server reconnection succeeds!");
+}
 /* Dev initialization routine. Invoked once for each virtio vdev at
  * EAL init time, see rte_bus_probe().
  * Returns 0 on success.
@@ -378,11 +429,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
 	int ret = -1;
-
+	struct virtio_user_dev *vu_dev = NULL;
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
 	if (!kvlist) {
 		PMD_INIT_LOG(ERR, "error when parsing param");
@@ -445,6 +497,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -476,6 +537,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
@@ -488,6 +554,14 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 			goto end;
 	}
 
+	if (vu_dev->is_server) {
+		ret = rte_fdset_add(&vu_dev->fdset, vu_dev->listenfd,
+				    virtio_user_server_reconnection, NULL,
+				    vu_dev);
+		if (ret < 0)
+			goto end;
+	}
+
 	/* previously called by rte_pci_probe() for physical dev */
 	if (eth_virtio_dev_init(eth_dev) < 0) {
 		PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 5/5] net/vhost: add memory checking
  2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
                       ` (3 preceding siblings ...)
  2018-03-15  9:45     ` [PATCH v2 4/5] net/virtio-user: add support for server mode zhiyong.yang
@ 2018-03-15  9:45     ` zhiyong.yang
  4 siblings, 0 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-15  9:45 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, dong1.wang, zhihong.wang, thomas,
	Zhiyong Yang

When vhost user PMD works in client mode to connect/reconnect virtio-user
with server mode, new thread sometimes may run to new_device before
queue_setup has been done, So have to wait until memory allocation is
done.

Release note is updated in the patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 doc/guides/rel_notes/release_18_05.rst | 7 +++++++
 drivers/net/vhost/rte_eth_vhost.c      | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 3923dc253..7b301f021 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -41,6 +41,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Added support for virtio-user server mode.**
+
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend then connects to. This means that if the backend restarts, it can
+  reconnect to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 3aae01c39..2490bad0b 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -580,6 +580,15 @@ new_device(int vid)
 		eth_dev->data->numa_node = newnode;
 #endif
 
+	/* The thread may run here before eth_dev->data->rx_queues or
+	 * eth_dev->data->tx_queues have gotten valid memory, so have to
+	 * wait until memory allocation is done.
+	 */
+	while (!eth_dev->data->rx_queues ||
+	       !eth_dev->data->tx_queues) {
+		usleep(1);
+	}
+
 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
 		vq = eth_dev->data->rx_queues[i];
 		if (vq == NULL)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-15  9:32                       ` Thomas Monjalon
@ 2018-03-16  8:43                         ` Yang, Zhiyong
  2018-03-21  6:51                           ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-16  8:43 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1



> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, March 15, 2018 5:33 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 13/03/2018 10:50, Yang, Zhiyong:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 13/03/2018 09:46, Yang, Zhiyong:
> > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > I don't see why it should be exported outside of DPDK,
> > > > > > > except for
> > > PMDs.
> > > > > > > I would tend to keep it internal but I understand that it
> > > > > > > would mean duplicating some code, which is not ideal.
> > > > > > > Please could you show what would be the content of the .h in EAL?
> > > > > > >
> > > > > >
> > > > > > If needed to expose them in eal.h, I think that they should be
> > > > > > the whole fdset mechanism as followings.
> > > > > >
> > > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > > >
> > > > > > struct fdentry {
> > > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > > 	void *dat;	/* fd context */
> > > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > > };
> > > > > >
> > > > > > struct fdset {
> > > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > > 	struct fdentry fd[MAX_FDS];
> > > > > > 	pthread_mutex_t fd_mutex;
> > > > > > 	int num;	/* current fd number of this fdset */
> > > > > > };
> > > > > >
> > > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > > >
> > > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > > >
> > > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in
> > > > > > the
> > > > > > patchset)
> > > > > >
> > > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > > >
> > > > > > seems that we have 4 options.
> > > > > > 1) expose them in librte_vhost
> > > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > > 3) duplicate the code lines at PMD layer.
> > > > > > 4) do it as the patch does that.
> > > > >
> > > > > It looks to be very close of the interrupt thread.
> > > > > Can we have all merged in an unique event dispatcher thread?
> > > >
> > > > If I understand right, do you mean that we can merge them in lib eal ?
> right?
> > >
> > > Yes merge with interrupt thread in EAL.
> > > I didn't look at the details, but it seems the right place for such thing.
> > >
> > Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK
> > users can use and like them as well. :)
> 
> I think you missed my initial question:
> Is it possible to merge the vhost events needs in the EAL interrupt thread?
> 

Sorry to miss this question.
Compared to vhost event mechanism(poll), Eal interrupt uses epoll,
>From my basic understanding,  linux and freeBSD both support poll,
Epoll is supported by Linux only. 

Hi Maxime, 
I want to know your opinion about Thomas's question.

This patchset aim to support for virtio-user server mode and just need to call event functions,
So, Let me try librte_eal epoll mechanism if support for server mode.


Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v3 0/4] add support for virtio-user server mode
  2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
                   ` (3 preceding siblings ...)
  2018-02-14 14:53 ` [PATCH 4/4] net/vhost: add memory checking to support client mode Zhiyong Yang
@ 2018-03-21  3:03 ` zhiyong.yang
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
                     ` (4 more replies)
  4 siblings, 5 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-21  3:03 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, zhihong.wang, maxime.coquelin, thomas, dong1.wang,
	tiwei.bie

In a container environment if the vhost-user backend restarts, there's no way
for it to reconnect to virtio-user currently. To address this, support for
server mode is added. In this mode the socket file is created by virtio-user,
which the backend then connects to. This means that if the backend restarts,
it can reconnect to virtio-user and continue communications.

The series add support for the feature and target for 18.05 release.

Virtio-user with server mode creates socket file and then starts to wait for the
first connection from vhost user with client mode in blocking mode.

Virtio-user with server mode supports many times' vhost reconnections with the
same configurations. 

Virtio-user supports only one connection at the same time in server/client mode.

How to test?
The following scripts are as reference.

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 -m 256,0 --no-pci \
--file-prefix=testpmd0 --vdev=net_virtio_user0,mac=00:11:22:33:44:10, \
path=/tmp/sock0,server=1,queues=1 -- -i --rxq=1 --txq=1 --no-numa

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3e000 -n 4 --socket-mem 256,0 \
--vdev 'net_vhost0,iface=/tmp/sock0,client=1,queues=1' -- -i --rxq=1 --txq=1 \
--nb-cores=1 --no-numa

step1 : at the virtio-user side, run "start"
step2: at the vhost-user side run "start tx_first 40000"

Then you can get the numbers by running "show port stats all" at both sides.

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode according to Maxime's
comments.
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs according to Thomas' comments.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

Zhiyong Yang (4):
  net/virtio: fix add pointer checking
  net/virtio: add checking for cvq
  net/virtio-user: add support for server mode
  net/vhost: add NULL pointer checking

 doc/guides/rel_notes/release_18_05.rst           |  7 ++
 drivers/net/vhost/rte_eth_vhost.c                |  9 +++
 drivers/net/virtio/virtio_ethdev.c               |  9 ++-
 drivers/net/virtio/virtio_user/vhost_user.c      | 96 ++++++++++++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 47 ++++++++----
 drivers/net/virtio/virtio_user/virtio_user_dev.h | 11 +++
 drivers/net/virtio/virtio_user_ethdev.c          | 83 +++++++++++++++++++-
 7 files changed, 238 insertions(+), 24 deletions(-)

-- 
2.14.3

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v3 1/4] net/virtio: fix add pointer checking
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
@ 2018-03-21  3:03   ` zhiyong.yang
  2018-03-28  7:26     ` Tan, Jianfeng
                       ` (2 more replies)
  2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
                     ` (3 subsequent siblings)
  4 siblings, 3 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-21  3:03 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, zhihong.wang, maxime.coquelin, thomas, dong1.wang,
	tiwei.bie, stable, Zhiyong Yang

It is necessary to add pointer checking because in some case the
code will cause crash. For example, the code goes here before
memory allocation of rxvq is finished.

Fixes: 7365504f77e3("net/virtio: support guest announce")
Cc: stable@dpdk.org
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 884f74ad0..f377d8aa3 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1273,9 +1273,13 @@ static void
 virtio_notify_peers(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
+	struct virtnet_rx *rxvq;
 	struct rte_mbuf *rarp_mbuf;
 
+	if (!dev->data->rx_queues)
+		return;
+
+	rxvq = dev->data->rx_queues[0];
 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
 			(struct ether_addr *)hw->mac_addr);
 	if (rarp_mbuf == NULL) {
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v3 2/4] net/virtio: add checking for cvq
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
@ 2018-03-21  3:03   ` zhiyong.yang
  2018-03-28  8:34     ` Tan, Jianfeng
                       ` (2 more replies)
  2018-03-21  3:03   ` [PATCH v3 3/4] net/virtio-user: add support for server mode zhiyong.yang
                     ` (2 subsequent siblings)
  4 siblings, 3 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-03-21  3:03 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, zhihong.wang, maxime.coquelin, thomas, dong1.wang,
	tiwei.bie, Zhiyong Yang

Add checking for cvq to judge if virtio_ack_link_announce should be called.
The existing code doesn't cause issue, and add the checking just to look
more reasonable.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index f377d8aa3..b567d3cf8 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1337,7 +1337,8 @@ virtio_interrupt_handler(void *param)
 
 	if (isr & VIRTIO_NET_S_ANNOUNCE) {
 		virtio_notify_peers(dev);
-		virtio_ack_link_announce(dev);
+		if (hw->cvq)
+			virtio_ack_link_announce(dev);
 	}
 }
 
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v3 3/4] net/virtio-user: add support for server mode
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
  2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
@ 2018-03-21  3:03   ` zhiyong.yang
  2018-03-28 15:14     ` Tan, Jianfeng
  2018-03-21  3:03   ` [PATCH v3 4/4] net/vhost: add NULL pointer checking zhiyong.yang
  2018-04-03 12:20   ` [PATCH v4 0/1] server mode virtio-user zhiyong.yang
  4 siblings, 1 reply; 65+ messages in thread
From: zhiyong.yang @ 2018-03-21  3:03 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, zhihong.wang, maxime.coquelin, thomas, dong1.wang,
	tiwei.bie, Zhiyong Yang

virtio-user adds support for server mode in this patch.

Virtio-user with server mode creates socket file and then starts to wait
for the first connection from vhost user with client mode in blocking mode.

Server mode virtio-user supports many times' vhost reconnections with
the same configurations.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 drivers/net/virtio/virtio_user/vhost_user.c      | 96 ++++++++++++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 47 ++++++++----
 drivers/net/virtio/virtio_user/virtio_user_dev.h | 11 +++
 drivers/net/virtio/virtio_user_ethdev.c          | 83 +++++++++++++++++++-
 4 files changed, 215 insertions(+), 22 deletions(-)

diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..90f4fed31 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,65 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+static void
+virtio_user_set_block(int fd, bool enabled)
+{
+	int f;
+
+	f = fcntl(fd, F_GETFL);
+	if (enabled)
+		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+	else
+		fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+#define MAX_VIRTIO_USER_BACKLOG 128
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int fd = dev->listenfd;
+	int connectfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		goto err;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	virtio_user_set_block(fd, true);
+	PMD_DRV_LOG(NOTICE, "virtio user server mode is waiting for connection from vhost user.");
+	while (1) {
+		connectfd = accept(fd, NULL, NULL);
+		if (connectfd >= 0) {
+			dev->connected = true;
+			break;
+		}
+	}
+
+	dev->vhostfd = connectfd;
+	virtio_user_set_block(connectfd, true);
+
+	return 0;
+err:
+	close(dev->epoll_fd);
+	close(dev->listenfd);
+	return -1;
+}
+
+static __attribute__((noreturn)) void *
+event_dispatch(void *arg)
+{
+	struct virtio_user_dev *dev = arg;
+
+	while (1)
+		rte_epoll_wait(dev->epoll_fd, &dev->rte_epoll_ev, 128, -1);
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -390,6 +449,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
 {
 	int fd;
 	int flag;
+	int ret;
 	struct sockaddr_un un;
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,13 +465,39 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		static pthread_t fdset_tid;
+
+		dev->listenfd = fd;
+		dev->epoll_fd = rte_intr_tls_epfd();
+		if (dev->epoll_fd < 0) {
+			PMD_DRV_LOG(ERR, "Can't create epoll file descriptor");
+			return -1;
+		}
+		virtio_user_set_block(dev->epoll_fd, true);
+		if (fdset_tid == 0) {
+			ret = pthread_create(&fdset_tid, NULL, event_dispatch,
+					     dev);
+			if (ret < 0) {
+				PMD_DRV_LOG(ERR, "failed to create fdset handling thread");
+				close(fd);
+				close(dev->epoll_fd);
+				return -1;
+			}
+		}
+		return virtio_user_start_server(dev, &un);
+
+	} else {
+		dev->vhostfd = fd;
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->connected = true;
 	}
 
-	dev->vhostfd = fd;
 	return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..1430b7cbd 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
 {
 	uint32_t i;
 
+	if (!dev->connected)
+		return -1;
+
 	for (i = 0; i < dev->max_queue_pairs; ++i)
 		dev->ops->enable_qp(dev, i, 0);
 
@@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
+	if (dev->is_server) {
+		dev->ops = &ops_user;/* server mode only supports vhost user */
 	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
-			return -1;
-		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
 
@@ -388,6 +397,13 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0)
+		close(dev->listenfd);
+
+	if (dev->is_server && dev->epoll_fd >= 0)
+		close(dev->epoll_fd);
+
+	dev->connected = false;
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +412,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 64467b4f9..a8e16d172 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,9 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
+#include <sys/epoll.h>
+#include <rte_interrupts.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +16,14 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd  */
+	bool		connected;  /* connection status */
+
+	int		epoll_fd;
+	struct rte_epoll_event rte_epoll_ev;
+
+	/* support for server/clinet mode */
+	bool		is_server;
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..e60542de5 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -65,8 +65,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
 			if (r == 0 || (r < 0 && errno != EAGAIN)) {
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
-				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
-					    hw->port_id);
+
 				/* Only client mode is available now. Once the
 				 * connection is broken, it can never be up
 				 * again. Besides, this function could be called
@@ -74,9 +73,15 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				 * callback cannot be unregistered here, set an
 				 * alarm to do it.
 				 */
-				rte_eal_alarm_set(1,
+				if (dev->connected) {
+					dev->connected = false;
+					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
+						    hw->port_id);
+					rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
 						  (void *)hw);
+					hw->started = 0;
+				}
 			} else {
 				dev->status |= VIRTIO_NET_S_LINK_UP;
 			}
@@ -278,12 +283,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -365,6 +373,49 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
 	rte_eth_dev_release_port(eth_dev);
 }
 
+static void
+virtio_user_server_reconnection(int fd, void *dat)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct virtio_user_dev *dev = dat;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+	struct virtio_hw *hw = eth_dev->data->dev_private;
+
+	if (dev->connected)
+		return;
+
+	connectfd = accept(fd, NULL, NULL);
+	if (connectfd < 0)
+		return;
+
+	if (dev->vhostfd >= 0)
+		close(dev->vhostfd);
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
+
+	hw->started = 1;
+	dev->connected = true;
+	PMD_INIT_LOG(NOTICE, "virtio user server reconnection succeeds!");
+}
 /* Dev initialization routine. Invoked once for each virtio vdev at
  * EAL init time, see rte_bus_probe().
  * Returns 0 on success.
@@ -378,10 +429,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
 	int ret = -1;
+	struct virtio_user_dev *vu_dev = NULL;
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
 	if (!kvlist) {
@@ -445,6 +498,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -476,6 +538,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
@@ -488,6 +555,16 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 			goto end;
 	}
 
+	if (vu_dev->is_server) {
+		vu_dev->rte_epoll_ev.epdata.event = EPOLLIN | EPOLLET;
+		vu_dev->rte_epoll_ev.epdata.cb_fun = virtio_user_server_reconnection;
+		vu_dev->rte_epoll_ev.epdata.cb_arg = vu_dev;
+		ret = rte_epoll_ctl(vu_dev->epoll_fd, EPOLL_CTL_ADD,
+				    vu_dev->listenfd, &vu_dev->rte_epoll_ev);
+		if (ret < 0)
+			goto end;
+	}
+
 	/* previously called by rte_pci_probe() for physical dev */
 	if (eth_virtio_dev_init(eth_dev) < 0) {
 		PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v3 4/4] net/vhost: add NULL pointer checking
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
                     ` (2 preceding siblings ...)
  2018-03-21  3:03   ` [PATCH v3 3/4] net/virtio-user: add support for server mode zhiyong.yang
@ 2018-03-21  3:03   ` zhiyong.yang
  2018-03-29 13:19     ` Maxime Coquelin
  2018-04-03 12:20   ` [PATCH v4 0/1] server mode virtio-user zhiyong.yang
  4 siblings, 1 reply; 65+ messages in thread
From: zhiyong.yang @ 2018-03-21  3:03 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, zhihong.wang, maxime.coquelin, thomas, dong1.wang,
	tiwei.bie, Zhiyong Yang

When vhost user PMD works in client mode to connect/reconnect virtio-user
with server mode, new thread sometimes may run to new_device before
queue_setup has been done, So have to wait until memory allocation is
done.

Release note is updated in the patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 doc/guides/rel_notes/release_18_05.rst | 7 +++++++
 drivers/net/vhost/rte_eth_vhost.c      | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 3923dc253..7b301f021 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -41,6 +41,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Added support for virtio-user server mode.**
+
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend then connects to. This means that if the backend restarts, it can
+  reconnect to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 3aae01c39..2490bad0b 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -580,6 +580,15 @@ new_device(int vid)
 		eth_dev->data->numa_node = newnode;
 #endif
 
+	/* The thread may run here before eth_dev->data->rx_queues or
+	 * eth_dev->data->tx_queues have gotten valid memory, so have to
+	 * wait until memory allocation is done.
+	 */
+	while (!eth_dev->data->rx_queues ||
+	       !eth_dev->data->tx_queues) {
+		usleep(1);
+	}
+
 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
 		vq = eth_dev->data->rx_queues[i];
 		if (vq == NULL)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
  2018-03-16  8:43                         ` Yang, Zhiyong
@ 2018-03-21  6:51                           ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-21  6:51 UTC (permalink / raw)
  To: Yang, Zhiyong, Thomas Monjalon
  Cc: Tan, Jianfeng, Maxime Coquelin, dev, yliu, Bie, Tiwei, Wang,
	Zhihong, Wang, Dong1

Hi Thomas, Maxime, Jianfeng,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yang, Zhiyong
> Sent: Friday, March 16, 2018 4:44 PM
> To: Thomas Monjalon <thomas@monjalon.net>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [dpdk-dev] [PATCH 1/4] vhost: move fdset functions from
> fd_man.c to fd_man.h
> 
> 
> 
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Thursday, March 15, 2018 5:33 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>
> > Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> > <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> > Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> > Wang, Dong1 <dong1.wang@intel.com>
> > Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> > fd_man.h
> >
> > 13/03/2018 10:50, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 13/03/2018 09:46, Yang, Zhiyong:
> > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > > I don't see why it should be exported outside of DPDK,
> > > > > > > > except for
> > > > PMDs.
> > > > > > > > I would tend to keep it internal but I understand that it
> > > > > > > > would mean duplicating some code, which is not ideal.
> > > > > > > > Please could you show what would be the content of the .h in
> EAL?
> > > > > > > >
> > > > > > >
> > > > > > > If needed to expose them in eal.h, I think that they should
> > > > > > > be the whole fdset mechanism as followings.
> > > > > > >
> > > > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > > > >
> > > > > > > struct fdentry {
> > > > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > > > 	void *dat;	/* fd context */
> > > > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > > > };
> > > > > > >
> > > > > > > struct fdset {
> > > > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > > > 	struct fdentry fd[MAX_FDS];
> > > > > > > 	pthread_mutex_t fd_mutex;
> > > > > > > 	int num;	/* current fd number of this fdset */
> > > > > > > };
> > > > > > >
> > > > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > > > >
> > > > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > > > >
> > > > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in
> > > > > > > the
> > > > > > > patchset)
> > > > > > >
> > > > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > > > >
> > > > > > > seems that we have 4 options.
> > > > > > > 1) expose them in librte_vhost
> > > > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > > > 3) duplicate the code lines at PMD layer.
> > > > > > > 4) do it as the patch does that.
> > > > > >
> > > > > > It looks to be very close of the interrupt thread.
> > > > > > Can we have all merged in an unique event dispatcher thread?
> > > > >
> > > > > If I understand right, do you mean that we can merge them in lib eal ?
> > right?
> > > >
> > > > Yes merge with interrupt thread in EAL.
> > > > I didn't look at the details, but it seems the right place for such thing.
> > > >
> > > Ok,  we have to expose them as new APIs.  Expect that somebody as
> > > DPDK users can use and like them as well. :)
> >
> > I think you missed my initial question:
> > Is it possible to merge the vhost events needs in the EAL interrupt thread?
> >
> 
> Sorry to miss this question.
> Compared to vhost event mechanism(poll), Eal interrupt uses epoll, From my
> basic understanding,  linux and freeBSD both support poll, Epoll is supported
> by Linux only.
> 
> Hi Maxime,
> I want to know your opinion about Thomas's question.
> 
> This patchset aim to support for virtio-user server mode and just need to call
> event functions, So, Let me try librte_eal epoll mechanism if support for
> server mode.
> 

I have implemented the same functionality calling librte_eal  epoll mechanism instead of
Vhost event and V3 has been sent out.  Please help review and welcome any comments.

Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 1/4] net/virtio: fix add pointer checking
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
@ 2018-03-28  7:26     ` Tan, Jianfeng
  2018-03-28  7:48       ` Yang, Zhiyong
  2018-03-29 11:59     ` Maxime Coquelin
  2018-03-29 12:01     ` Maxime Coquelin
  2 siblings, 1 reply; 65+ messages in thread
From: Tan, Jianfeng @ 2018-03-28  7:26 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: zhihong.wang, maxime.coquelin, thomas, dong1.wang, tiwei.bie, stable



On 3/21/2018 11:03 AM, zhiyong.yang@intel.com wrote:
> It is necessary to add pointer checking because in some case the
> code will cause crash. For example, the code goes here before
> memory allocation of rxvq is finished.
>
> Fixes: 7365504f77e3("net/virtio: support guest announce")
> Cc: stable@dpdk.org
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>

This fix is not related to the topic of this patch series, correct? 
Maybe next time, we can put it out of the patch series.

Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks,
Jianfeng

> ---
>   drivers/net/virtio/virtio_ethdev.c | 6 +++++-
>   1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 884f74ad0..f377d8aa3 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1273,9 +1273,13 @@ static void
>   virtio_notify_peers(struct rte_eth_dev *dev)
>   {
>   	struct virtio_hw *hw = dev->data->dev_private;
> -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct virtnet_rx *rxvq;
>   	struct rte_mbuf *rarp_mbuf;
>   
> +	if (!dev->data->rx_queues)
> +		return;
> +
> +	rxvq = dev->data->rx_queues[0];
>   	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
>   			(struct ether_addr *)hw->mac_addr);
>   	if (rarp_mbuf == NULL) {

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 1/4] net/virtio: fix add pointer checking
  2018-03-28  7:26     ` Tan, Jianfeng
@ 2018-03-28  7:48       ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-28  7:48 UTC (permalink / raw)
  To: Tan, Jianfeng, dev
  Cc: Wang, Zhihong, maxime.coquelin, thomas, Wang, Dong1, Bie, Tiwei, stable



> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Wednesday, March 28, 2018 3:26 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: Wang, Zhihong <zhihong.wang@intel.com>;
> maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Dong1
> <dong1.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>;
> stable@dpdk.org
> Subject: Re: [PATCH v3 1/4] net/virtio: fix add pointer checking
> 
> 
> 
> On 3/21/2018 11:03 AM, zhiyong.yang@intel.com wrote:
> > It is necessary to add pointer checking because in some case the code
> > will cause crash. For example, the code goes here before memory
> > allocation of rxvq is finished.
> >
> > Fixes: 7365504f77e3("net/virtio: support guest announce")
> > Cc: stable@dpdk.org
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> 
> This fix is not related to the topic of this patch series, correct?
> Maybe next time, we can put it out of the patch series.
> 
> Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>
> 
> Thanks,
> Jianfeng
> 
Thanks for Jianfeng's review.  The bug must be fixed firstly in order 
that code can go in the right way  for virito-user server mode patch. :)

Thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 2/4] net/virtio: add checking for cvq
  2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
@ 2018-03-28  8:34     ` Tan, Jianfeng
  2018-03-29 11:59     ` Maxime Coquelin
  2018-03-29 12:06     ` Maxime Coquelin
  2 siblings, 0 replies; 65+ messages in thread
From: Tan, Jianfeng @ 2018-03-28  8:34 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: zhihong.wang, maxime.coquelin, thomas, dong1.wang, tiwei.bie



On 3/21/2018 11:03 AM, zhiyong.yang@intel.com wrote:
> Add checking for cvq to judge if virtio_ack_link_announce should be called.
> The existing code doesn't cause issue, and add the checking just to look
> more reasonable.
>
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>

Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks,
Jianfeng

> ---
>   drivers/net/virtio/virtio_ethdev.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index f377d8aa3..b567d3cf8 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1337,7 +1337,8 @@ virtio_interrupt_handler(void *param)
>   
>   	if (isr & VIRTIO_NET_S_ANNOUNCE) {
>   		virtio_notify_peers(dev);
> -		virtio_ack_link_announce(dev);
> +		if (hw->cvq)
> +			virtio_ack_link_announce(dev);
>   	}
>   }
>   

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 3/4] net/virtio-user: add support for server mode
  2018-03-21  3:03   ` [PATCH v3 3/4] net/virtio-user: add support for server mode zhiyong.yang
@ 2018-03-28 15:14     ` Tan, Jianfeng
  2018-03-30  2:08       ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Tan, Jianfeng @ 2018-03-28 15:14 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: zhihong.wang, maxime.coquelin, thomas, dong1.wang, tiwei.bie

Hi Zhiyong,

Triggered by the community discussion of thread creation in a library 
(http://dpdk.org/dev/patchwork/patch/36579/), we can think about if 
possible to avoid the pthread creation here.

Just a quick think, it could be feasible. The key idea is to properly 
set up the LSC interrupt for a server mode virtio-user.
- If the virtio-user device is just probed, we can setup LSC interrupt 
by registering the listen fd to interrupt thread. Whenever a vhost-user 
connection is coming, we will get a chance to read status, at that time, 
we could accept the connection, and report the status is up.
- And after the connection is setup, we setup LSC interrupt by 
registering the connection fd to interrupt thread; if the connection is 
broken, we also get a change to read status, in which we can switch to 
the above setting.

How do you think?

Thanks,
Jianfeng

On 3/21/2018 11:03 AM, zhiyong.yang@intel.com wrote:
> virtio-user adds support for server mode in this patch.
>
> Virtio-user with server mode creates socket file and then starts to wait
> for the first connection from vhost user with client mode in blocking mode.
>
> Server mode virtio-user supports many times' vhost reconnections with
> the same configurations.
>
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_user/vhost_user.c      | 96 ++++++++++++++++++++++--
>   drivers/net/virtio/virtio_user/virtio_user_dev.c | 47 ++++++++----
>   drivers/net/virtio/virtio_user/virtio_user_dev.h | 11 +++
>   drivers/net/virtio/virtio_user_ethdev.c          | 83 +++++++++++++++++++-
>   4 files changed, 215 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index 91c6449bb..90f4fed31 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -378,6 +378,65 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   	return 0;
>   }
>   
> +static void
> +virtio_user_set_block(int fd, bool enabled)
> +{
> +	int f;
> +
> +	f = fcntl(fd, F_GETFL);
> +	if (enabled)
> +		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> +	else
> +		fcntl(fd, F_SETFL, f | O_NONBLOCK);
> +}
> +
> +#define MAX_VIRTIO_USER_BACKLOG 128
> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
> +{
> +	int ret;
> +	int fd = dev->listenfd;
> +	int connectfd;
> +
> +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
> +			    dev->path, strerror(errno));
> +		goto err;
> +	}
> +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> +	if (ret < 0)
> +		goto err;
> +
> +	virtio_user_set_block(fd, true);
> +	PMD_DRV_LOG(NOTICE, "virtio user server mode is waiting for connection from vhost user.");
> +	while (1) {
> +		connectfd = accept(fd, NULL, NULL);
> +		if (connectfd >= 0) {
> +			dev->connected = true;
> +			break;
> +		}
> +	}
> +
> +	dev->vhostfd = connectfd;
> +	virtio_user_set_block(connectfd, true);
> +
> +	return 0;
> +err:
> +	close(dev->epoll_fd);
> +	close(dev->listenfd);
> +	return -1;
> +}
> +
> +static __attribute__((noreturn)) void *
> +event_dispatch(void *arg)
> +{
> +	struct virtio_user_dev *dev = arg;
> +
> +	while (1)
> +		rte_epoll_wait(dev->epoll_fd, &dev->rte_epoll_ev, 128, -1);
> +}
> +
>   /**
>    * Set up environment to talk with a vhost user backend.
>    *
> @@ -390,6 +449,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
>   {
>   	int fd;
>   	int flag;
> +	int ret;
>   	struct sockaddr_un un;
>   
>   	fd = socket(AF_UNIX, SOCK_STREAM, 0);
> @@ -405,13 +465,39 @@ vhost_user_setup(struct virtio_user_dev *dev)
>   	memset(&un, 0, sizeof(un));
>   	un.sun_family = AF_UNIX;
>   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> -		close(fd);
> -		return -1;
> +
> +	if (dev->is_server) {
> +		static pthread_t fdset_tid;
> +
> +		dev->listenfd = fd;
> +		dev->epoll_fd = rte_intr_tls_epfd();
> +		if (dev->epoll_fd < 0) {
> +			PMD_DRV_LOG(ERR, "Can't create epoll file descriptor");
> +			return -1;
> +		}
> +		virtio_user_set_block(dev->epoll_fd, true);
> +		if (fdset_tid == 0) {
> +			ret = pthread_create(&fdset_tid, NULL, event_dispatch,
> +					     dev);
> +			if (ret < 0) {
> +				PMD_DRV_LOG(ERR, "failed to create fdset handling thread");
> +				close(fd);
> +				close(dev->epoll_fd);
> +				return -1;
> +			}
> +		}
> +		return virtio_user_start_server(dev, &un);
> +
> +	} else {
> +		dev->vhostfd = fd;
> +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> +			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> +			close(fd);
> +			return -1;
> +		}
> +		dev->connected = true;
>   	}
>   
> -	dev->vhostfd = fd;
>   	return 0;
>   }
>   
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..1430b7cbd 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
>   {
>   	uint32_t i;
>   
> +	if (!dev->connected)
> +		return -1;
> +
>   	for (i = 0; i < dev->max_queue_pairs; ++i)
>   		dev->ops->enable_qp(dev, i, 0);
>   
> @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>   	dev->vhostfds = NULL;
>   	dev->tapfds = NULL;
>   
> -	if (is_vhost_user_by_type(dev->path)) {
> -		dev->ops = &ops_user;
> +	if (dev->is_server) {
> +		dev->ops = &ops_user;/* server mode only supports vhost user */
>   	} else {
> -		dev->ops = &ops_kernel;
> -
> -		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		if (!dev->vhostfds || !dev->tapfds) {
> -			PMD_INIT_LOG(ERR, "Failed to malloc");
> -			return -1;
> -		}
> -
> -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> -			dev->vhostfds[q] = -1;
> -			dev->tapfds[q] = -1;
> +		if (is_vhost_user_by_type(dev->path)) {
> +			dev->ops = &ops_user;
> +		} else {
> +			dev->ops = &ops_kernel;
> +
> +			dev->vhostfds = malloc(dev->max_queue_pairs *
> +					       sizeof(int));
> +			dev->tapfds = malloc(dev->max_queue_pairs *
> +					     sizeof(int));
> +			if (!dev->vhostfds || !dev->tapfds) {
> +				PMD_INIT_LOG(ERR, "Failed to malloc");
> +				return -1;
> +			}
> +
> +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> +				dev->vhostfds[q] = -1;
> +				dev->tapfds[q] = -1;
> +			}
>   		}
>   	}
>   
> @@ -388,6 +397,13 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   
>   	close(dev->vhostfd);
>   
> +	if (dev->is_server && dev->listenfd >= 0)
> +		close(dev->listenfd);
> +
> +	if (dev->is_server && dev->epoll_fd >= 0)
> +		close(dev->epoll_fd);
> +
> +	dev->connected = false;
>   	if (dev->vhostfds) {
>   		for (i = 0; i < dev->max_queue_pairs; ++i)
>   			close(dev->vhostfds[i]);
> @@ -396,6 +412,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   	}
>   
>   	free(dev->ifname);
> +
> +	if (dev->is_server)
> +		unlink(dev->path);
>   }
>   
>   static uint8_t
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 64467b4f9..a8e16d172 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,6 +6,9 @@
>   #define _VIRTIO_USER_DEV_H
>   
>   #include <limits.h>
> +#include <stdbool.h>
> +#include <sys/epoll.h>
> +#include <rte_interrupts.h>
>   #include "../virtio_pci.h"
>   #include "../virtio_ring.h"
>   #include "vhost.h"
> @@ -13,6 +16,14 @@
>   struct virtio_user_dev {
>   	/* for vhost_user backend */
>   	int		vhostfd;
> +	int		listenfd;   /* listening fd  */
> +	bool		connected;  /* connection status */
> +
> +	int		epoll_fd;
> +	struct rte_epoll_event rte_epoll_ev;
> +
> +	/* support for server/clinet mode */
> +	bool		is_server;
>   
>   	/* for vhost_kernel backend */
>   	char		*ifname;
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index 263649006..e60542de5 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -65,8 +65,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
>   			if (r == 0 || (r < 0 && errno != EAGAIN)) {
>   				dev->status &= (~VIRTIO_NET_S_LINK_UP);
> -				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> -					    hw->port_id);
> +
>   				/* Only client mode is available now. Once the
>   				 * connection is broken, it can never be up
>   				 * again. Besides, this function could be called
> @@ -74,9 +73,15 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   				 * callback cannot be unregistered here, set an
>   				 * alarm to do it.
>   				 */
> -				rte_eal_alarm_set(1,
> +				if (dev->connected) {
> +					dev->connected = false;
> +					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> +						    hw->port_id);
> +					rte_eal_alarm_set(1,
>   						  virtio_user_delayed_handler,
>   						  (void *)hw);
> +					hw->started = 0;
> +				}
>   			} else {
>   				dev->status |= VIRTIO_NET_S_LINK_UP;
>   			}
> @@ -278,12 +283,15 @@ static const char *valid_args[] = {
>   	VIRTIO_USER_ARG_QUEUE_SIZE,
>   #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
>   	VIRTIO_USER_ARG_INTERFACE_NAME,
> +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> +	VIRTIO_USER_ARG_SERVER_MODE,
>   	NULL
>   };
>   
>   #define VIRTIO_USER_DEF_CQ_EN	0
>   #define VIRTIO_USER_DEF_Q_NUM	1
>   #define VIRTIO_USER_DEF_Q_SZ	256
> +#define VIRTIO_USER_DEF_SERVER_MODE	0
>   
>   static int
>   get_string_arg(const char *key __rte_unused,
> @@ -365,6 +373,49 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
>   	rte_eth_dev_release_port(eth_dev);
>   }
>   
> +static void
> +virtio_user_server_reconnection(int fd, void *dat)
> +{
> +	int ret;
> +	int flag;
> +	int connectfd;
> +	struct virtio_user_dev *dev = dat;
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> +	struct virtio_hw *hw = eth_dev->data->dev_private;
> +
> +	if (dev->connected)
> +		return;
> +
> +	connectfd = accept(fd, NULL, NULL);
> +	if (connectfd < 0)
> +		return;
> +
> +	if (dev->vhostfd >= 0)
> +		close(dev->vhostfd);
> +
> +	dev->vhostfd = connectfd;
> +	flag = fcntl(connectfd, F_GETFD);
> +	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> +
> +	ret = virtio_user_start_device(dev);
> +	if (ret < 0)
> +		return;
> +
> +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> +		eth_dev->intr_handle->fd = connectfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return;
> +		}
> +	}
> +
> +	hw->started = 1;
> +	dev->connected = true;
> +	PMD_INIT_LOG(NOTICE, "virtio user server reconnection succeeds!");
> +}
>   /* Dev initialization routine. Invoked once for each virtio vdev at
>    * EAL init time, see rte_bus_probe().
>    * Returns 0 on success.
> @@ -378,10 +429,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
>   	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
>   	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
>   	char *path = NULL;
>   	char *ifname = NULL;
>   	char *mac_addr = NULL;
>   	int ret = -1;
> +	struct virtio_user_dev *vu_dev = NULL;
>   
>   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
>   	if (!kvlist) {
> @@ -445,6 +498,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   	}
>   
> +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
> +		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
> +				       &get_integer_arg, &server_mode) < 0) {
> +			PMD_INIT_LOG(ERR, "error to parse %s",
> +				     VIRTIO_USER_ARG_SERVER_MODE);
> +			goto end;
> +		}
> +	}
> +
>   	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
>   		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
>   				       &get_integer_arg, &cq) < 0) {
> @@ -476,6 +538,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   
>   		hw = eth_dev->data->dev_private;
> +		vu_dev = virtio_user_get_dev(hw);
> +		if (server_mode == 1)
> +			vu_dev->is_server = true;
> +		else
> +			vu_dev->is_server = false;
>   		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
>   				 queue_size, mac_addr, &ifname) < 0) {
>   			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
> @@ -488,6 +555,16 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   			goto end;
>   	}
>   
> +	if (vu_dev->is_server) {
> +		vu_dev->rte_epoll_ev.epdata.event = EPOLLIN | EPOLLET;
> +		vu_dev->rte_epoll_ev.epdata.cb_fun = virtio_user_server_reconnection;
> +		vu_dev->rte_epoll_ev.epdata.cb_arg = vu_dev;
> +		ret = rte_epoll_ctl(vu_dev->epoll_fd, EPOLL_CTL_ADD,
> +				    vu_dev->listenfd, &vu_dev->rte_epoll_ev);
> +		if (ret < 0)
> +			goto end;
> +	}
> +
>   	/* previously called by rte_pci_probe() for physical dev */
>   	if (eth_virtio_dev_init(eth_dev) < 0) {
>   		PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 1/4] net/virtio: fix add pointer checking
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
  2018-03-28  7:26     ` Tan, Jianfeng
@ 2018-03-29 11:59     ` Maxime Coquelin
  2018-03-29 12:01     ` Maxime Coquelin
  2 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-03-29 11:59 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: jianfeng.tan, zhihong.wang, thomas, dong1.wang, tiwei.bie, stable



On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> It is necessary to add pointer checking because in some case the
> code will cause crash. For example, the code goes here before
> memory allocation of rxvq is finished.
> 
> Fixes: 7365504f77e3("net/virtio: support guest announce")
> Cc: stable@dpdk.org
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 6 +++++-
>   1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 884f74ad0..f377d8aa3 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1273,9 +1273,13 @@ static void
>   virtio_notify_peers(struct rte_eth_dev *dev)
>   {
>   	struct virtio_hw *hw = dev->data->dev_private;
> -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct virtnet_rx *rxvq;
>   	struct rte_mbuf *rarp_mbuf;
>   
> +	if (!dev->data->rx_queues)
> +		return;
> +
> +	rxvq = dev->data->rx_queues[0];
>   	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
>   			(struct ether_addr *)hw->mac_addr);
>   	if (rarp_mbuf == NULL) {
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 2/4] net/virtio: add checking for cvq
  2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
  2018-03-28  8:34     ` Tan, Jianfeng
@ 2018-03-29 11:59     ` Maxime Coquelin
  2018-03-29 12:06     ` Maxime Coquelin
  2 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-03-29 11:59 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: jianfeng.tan, zhihong.wang, thomas, dong1.wang, tiwei.bie



On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> Add checking for cvq to judge if virtio_ack_link_announce should be called.
> The existing code doesn't cause issue, and add the checking just to look
> more reasonable.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index f377d8aa3..b567d3cf8 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1337,7 +1337,8 @@ virtio_interrupt_handler(void *param)
>   
>   	if (isr & VIRTIO_NET_S_ANNOUNCE) {
>   		virtio_notify_peers(dev);
> -		virtio_ack_link_announce(dev);
> +		if (hw->cvq)
> +			virtio_ack_link_announce(dev);
>   	}
>   }
>   
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 1/4] net/virtio: fix add pointer checking
  2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
  2018-03-28  7:26     ` Tan, Jianfeng
  2018-03-29 11:59     ` Maxime Coquelin
@ 2018-03-29 12:01     ` Maxime Coquelin
  2 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-03-29 12:01 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: jianfeng.tan, zhihong.wang, thomas, dong1.wang, tiwei.bie, stable



On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> It is necessary to add pointer checking because in some case the
> code will cause crash. For example, the code goes here before
> memory allocation of rxvq is finished.
> 
> Fixes: 7365504f77e3("net/virtio: support guest announce")
> Cc: stable@dpdk.org
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 6 +++++-
>   1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 884f74ad0..f377d8aa3 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1273,9 +1273,13 @@ static void
>   virtio_notify_peers(struct rte_eth_dev *dev)
>   {
>   	struct virtio_hw *hw = dev->data->dev_private;
> -	struct virtnet_rx *rxvq = dev->data->rx_queues[0];
> +	struct virtnet_rx *rxvq;
>   	struct rte_mbuf *rarp_mbuf;
>   
> +	if (!dev->data->rx_queues)
> +		return;
> +
> +	rxvq = dev->data->rx_queues[0];
>   	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
>   			(struct ether_addr *)hw->mac_addr);
>   	if (rarp_mbuf == NULL) {
> 

Applied to dpdk-next-virtio/master.

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 2/4] net/virtio: add checking for cvq
  2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
  2018-03-28  8:34     ` Tan, Jianfeng
  2018-03-29 11:59     ` Maxime Coquelin
@ 2018-03-29 12:06     ` Maxime Coquelin
  2 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-03-29 12:06 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: jianfeng.tan, zhihong.wang, thomas, dong1.wang, tiwei.bie



On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> Add checking for cvq to judge if virtio_ack_link_announce should be called.
> The existing code doesn't cause issue, and add the checking just to look
> more reasonable.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index f377d8aa3..b567d3cf8 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1337,7 +1337,8 @@ virtio_interrupt_handler(void *param)
>   
>   	if (isr & VIRTIO_NET_S_ANNOUNCE) {
>   		virtio_notify_peers(dev);
> -		virtio_ack_link_announce(dev);
> +		if (hw->cvq)
> +			virtio_ack_link_announce(dev);
>   	}
>   }
>   
> 

Applied to dpdk-next-virtio/master.

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 4/4] net/vhost: add NULL pointer checking
  2018-03-21  3:03   ` [PATCH v3 4/4] net/vhost: add NULL pointer checking zhiyong.yang
@ 2018-03-29 13:19     ` Maxime Coquelin
  2018-03-30  2:00       ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Maxime Coquelin @ 2018-03-29 13:19 UTC (permalink / raw)
  To: zhiyong.yang, dev
  Cc: jianfeng.tan, zhihong.wang, thomas, dong1.wang, tiwei.bie

Hi,

On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> When vhost user PMD works in client mode to connect/reconnect virtio-user
> with server mode, new thread sometimes may run to new_device before
> queue_setup has been done, So have to wait until memory allocation is
> done.
> 
> Release note is updated in the patch.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   doc/guides/rel_notes/release_18_05.rst | 7 +++++++
>   drivers/net/vhost/rte_eth_vhost.c      | 9 +++++++++
>   2 files changed, 16 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index 3923dc253..7b301f021 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -41,6 +41,13 @@ New Features
>        Also, make sure to start the actual text at the margin.
>        =========================================================
>   
> +* **Added support for virtio-user server mode.**
> +
> +  In a container environment if the vhost-user backend restarts, there's no way
> +  for it to reconnect to virtio-user. To address this, support for server mode
> +  is added. In this mode the socket file is created by virtio-user, which the
> +  backend then connects to. This means that if the backend restarts, it can
> +  reconnect to virtio-user and continue communications.

I think this shouldn't be part of this patch.

>   
>   API Changes
>   -----------
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index 3aae01c39..2490bad0b 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -580,6 +580,15 @@ new_device(int vid)
>   		eth_dev->data->numa_node = newnode;
>   #endif
>   
> +	/* The thread may run here before eth_dev->data->rx_queues or
> +	 * eth_dev->data->tx_queues have gotten valid memory, so have to
> +	 * wait until memory allocation is done.
> +	 */
> +	while (!eth_dev->data->rx_queues ||
> +	       !eth_dev->data->tx_queues) {
> +		usleep(1);
> +	}
> +
>   	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
>   		vq = eth_dev->data->rx_queues[i];
>   		if (vq == NULL)
> 

I don't like the idea of polling here.
It looks like Junjie is addressing the problem in a different way [0],
do you confirm it would work in your case?

Thanks,
Maxime

[0]: http://dpdk.org/dev/patchwork/patch/36643/

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 4/4] net/vhost: add NULL pointer checking
  2018-03-29 13:19     ` Maxime Coquelin
@ 2018-03-30  2:00       ` Yang, Zhiyong
  2018-03-30  7:41         ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-30  2:00 UTC (permalink / raw)
  To: Maxime Coquelin, dev
  Cc: Tan, Jianfeng, Wang, Zhihong, thomas, Wang, Dong1, Bie, Tiwei

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> Sent: Thursday, March 29, 2018 9:20 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>; thomas@monjalon.net; Wang, Dong1
> <dong1.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [PATCH v3 4/4] net/vhost: add NULL pointer checking
> 
> Hi,
> 
> On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> > When vhost user PMD works in client mode to connect/reconnect
> > virtio-user with server mode, new thread sometimes may run to
> > new_device before queue_setup has been done, So have to wait until
> > memory allocation is done.
> >
> > Release note is updated in the patch.
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >   doc/guides/rel_notes/release_18_05.rst | 7 +++++++
> >   drivers/net/vhost/rte_eth_vhost.c      | 9 +++++++++
> >   2 files changed, 16 insertions(+)
> >
> > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > b/doc/guides/rel_notes/release_18_05.rst
> > index 3923dc253..7b301f021 100644
> > --- a/doc/guides/rel_notes/release_18_05.rst
> > +++ b/doc/guides/rel_notes/release_18_05.rst
> > @@ -41,6 +41,13 @@ New Features
> >        Also, make sure to start the actual text at the margin.
> >
> =========================================================
> >
> > +* **Added support for virtio-user server mode.**
> > +
> > +  In a container environment if the vhost-user backend restarts,
> > + there's no way  for it to reconnect to virtio-user. To address this,
> > + support for server mode  is added. In this mode the socket file is
> > + created by virtio-user, which the  backend then connects to. This
> > + means that if the backend restarts, it can  reconnect to virtio-user and
> continue communications.
> 
> I think this shouldn't be part of this patch.
> 
Ok, I can merge it with the previous patch 3/4.

> >
> >   API Changes
> >   -----------
> > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > b/drivers/net/vhost/rte_eth_vhost.c
> > index 3aae01c39..2490bad0b 100644
> > --- a/drivers/net/vhost/rte_eth_vhost.c
> > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > @@ -580,6 +580,15 @@ new_device(int vid)
> >   		eth_dev->data->numa_node = newnode;
> >   #endif
> >
> > +	/* The thread may run here before eth_dev->data->rx_queues or
> > +	 * eth_dev->data->tx_queues have gotten valid memory, so have to
> > +	 * wait until memory allocation is done.
> > +	 */
> > +	while (!eth_dev->data->rx_queues ||
> > +	       !eth_dev->data->tx_queues) {
> > +		usleep(1);
> > +	}
> > +
> >   	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> >   		vq = eth_dev->data->rx_queues[i];
> >   		if (vq == NULL)
> >
> 
> I don't like the idea of polling here.
> It looks like Junjie is addressing the problem in a different way [0], do you
> confirm it would work in your case?
> 

Great to hear that.  I have to fix it when the issue is found.
It's better to have another solution. I will test it later.

Thanks
Zhiyong

> Thanks,
> Maxime
> 
> [0]: http://dpdk.org/dev/patchwork/patch/36643/

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 3/4] net/virtio-user: add support for server mode
  2018-03-28 15:14     ` Tan, Jianfeng
@ 2018-03-30  2:08       ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-30  2:08 UTC (permalink / raw)
  To: Tan, Jianfeng, dev
  Cc: Wang, Zhihong, maxime.coquelin, thomas, Wang, Dong1, Bie, Tiwei

Hi Jianfeng,

> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Wednesday, March 28, 2018 11:15 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: Wang, Zhihong <zhihong.wang@intel.com>;
> maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Dong1
> <dong1.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [PATCH v3 3/4] net/virtio-user: add support for server mode
> 
> Hi Zhiyong,
> 
> Triggered by the community discussion of thread creation in a library
> (http://dpdk.org/dev/patchwork/patch/36579/), we can think about if
> possible to avoid the pthread creation here.
> 
> Just a quick think, it could be feasible. The key idea is to properly set up the
> LSC interrupt for a server mode virtio-user.
> - If the virtio-user device is just probed, we can setup LSC interrupt by
> registering the listen fd to interrupt thread. Whenever a vhost-user
> connection is coming, we will get a chance to read status, at that time, we
> could accept the connection, and report the status is up.
> - And after the connection is setup, we setup LSC interrupt by registering the
> connection fd to interrupt thread; if the connection is broken, we also get a
> change to read status, in which we can switch to the above setting.
> 
> How do you think?
> 

Very nice idea,  which looks reasonable. Let me try it. 

Thanks
Zhiyong

> Thanks,
> Jianfeng
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v3 4/4] net/vhost: add NULL pointer checking
  2018-03-30  2:00       ` Yang, Zhiyong
@ 2018-03-30  7:41         ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-03-30  7:41 UTC (permalink / raw)
  To: Yang, Zhiyong, Maxime Coquelin, dev
  Cc: Tan, Jianfeng, Wang, Zhihong, thomas, Wang, Dong1, Bie, Tiwei



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yang, Zhiyong
> Sent: Friday, March 30, 2018 10:01 AM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>; thomas@monjalon.net; Wang, Dong1
> <dong1.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 4/4] net/vhost: add NULL pointer
> checking
> 
> Hi Maxime,
> 
> > -----Original Message-----
> > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > Sent: Thursday, March 29, 2018 9:20 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> > Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Wang, Zhihong
> > <zhihong.wang@intel.com>; thomas@monjalon.net; Wang, Dong1
> > <dong1.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> > Subject: Re: [PATCH v3 4/4] net/vhost: add NULL pointer checking
> >
> > Hi,
> >
> > On 03/21/2018 04:03 AM, zhiyong.yang@intel.com wrote:
> > > When vhost user PMD works in client mode to connect/reconnect
> > > virtio-user with server mode, new thread sometimes may run to
> > > new_device before queue_setup has been done, So have to wait until
> > > memory allocation is done.
> > >
> > > Release note is updated in the patch.
> > >
> > > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > > ---
> > >   doc/guides/rel_notes/release_18_05.rst | 7 +++++++
> > >   drivers/net/vhost/rte_eth_vhost.c      | 9 +++++++++
> > >   2 files changed, 16 insertions(+)
> > >
> > > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > > b/doc/guides/rel_notes/release_18_05.rst
> > > index 3923dc253..7b301f021 100644
> > > --- a/doc/guides/rel_notes/release_18_05.rst
> > > +++ b/doc/guides/rel_notes/release_18_05.rst
> > > @@ -41,6 +41,13 @@ New Features
> > >        Also, make sure to start the actual text at the margin.
> > >
> > =========================================================
> > >
> > > +* **Added support for virtio-user server mode.**
> > > +
> > > +  In a container environment if the vhost-user backend restarts,
> > > + there's no way  for it to reconnect to virtio-user. To address
> > > + this, support for server mode  is added. In this mode the socket
> > > + file is created by virtio-user, which the  backend then connects
> > > + to. This means that if the backend restarts, it can  reconnect to
> > > + virtio-user and
> > continue communications.
> >
> > I think this shouldn't be part of this patch.
> >
> Ok, I can merge it with the previous patch 3/4.
> 
> > >
> > >   API Changes
> > >   -----------
> > > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > > b/drivers/net/vhost/rte_eth_vhost.c
> > > index 3aae01c39..2490bad0b 100644
> > > --- a/drivers/net/vhost/rte_eth_vhost.c
> > > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > > @@ -580,6 +580,15 @@ new_device(int vid)
> > >   		eth_dev->data->numa_node = newnode;
> > >   #endif
> > >
> > > +	/* The thread may run here before eth_dev->data->rx_queues or
> > > +	 * eth_dev->data->tx_queues have gotten valid memory, so have to
> > > +	 * wait until memory allocation is done.
> > > +	 */
> > > +	while (!eth_dev->data->rx_queues ||
> > > +	       !eth_dev->data->tx_queues) {
> > > +		usleep(1);
> > > +	}
> > > +
> > >   	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> > >   		vq = eth_dev->data->rx_queues[i];
> > >   		if (vq == NULL)
> > >
> >
> > I don't like the idea of polling here.
> > It looks like Junjie is addressing the problem in a different way [0],
> > do you confirm it would work in your case?
> >
> 
> Great to hear that.  I have to fix it when the issue is found.
> It's better to have another solution. I will test it later.
> 

Junjie's patch can fix the existing issue and then drop this patch.

Here is the link.
http://www.dpdk.org/dev/patchwork/patch/36766/

thanks
Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v4 0/1] server mode virtio-user
  2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
                     ` (3 preceding siblings ...)
  2018-03-21  3:03   ` [PATCH v3 4/4] net/vhost: add NULL pointer checking zhiyong.yang
@ 2018-04-03 12:20   ` zhiyong.yang
  2018-04-03 12:20     ` [PATCH v4 1/1] net/virtio-user: add support for server mode zhiyong.yang
  4 siblings, 1 reply; 65+ messages in thread
From: zhiyong.yang @ 2018-04-03 12:20 UTC (permalink / raw)
  To: dev; +Cc: maxime.coquelin, jianfeng.tan, thomas, zhihong.wang, tiwei.bie

In a container environment if the vhost-user backend restarts, there's no way
for it to reconnect to virtio-user currently. To address this, support for
server mode is added. In this mode the socket file is created by virtio-user,
which the backend connects to. This means that if the backend restarts, it can
reconnect to virtio-user and continue communications.

The series add support for the feature and target for 18.05 release.

virtio-user adds support for server mode in this patch.

Client mode vhost-user startup firstly, server mode virtio-user startups
and creates the socket file to exchange vhost messages.

If the connection is broken, client mode vhost-user can support to
reconnect virtio-user.

Server mode virtio-user supports many times' vhost-user reconnections with
the same parameter configurations.
 
Virtio-user supports only one connection at the same time in server/client mode.

How to test?
The following scripts are as reference.

step1:
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3e000 -n 4 --socket-mem 256,0 \
--vdev 'net_vhost0,iface=/tmp/sock0,client=1,queues=1' -- -i --rxq=1 --txq=1 \
--nb-cores=1 --no-numa

step2:
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 -m 256,0 --no-pci \
--file-prefix=testpmd0 --vdev=net_virtio_user0,mac=00:11:22:33:44:10, \
path=/tmp/sock0,server=1,queues=1 -- -i --rxq=1 --txq=1 --no-numa

step3: at the virtio-user side, run "start"
step4: at the vhost-user side, run "start tx_first 40000"

Then you can get the numbers by running "show port stats all" at both sides.

Vhost-user restarts (quit and startup again)and can reconnect virtio-user
successfully again and continue communications.

Changes in V4:
1. Don't create new pthread any more and use librte_eal interrupt thread.
2. virtio-user doesn't work in blocking mode any more for the first connection.
Client mode vhost-user startups firstly, then server mode creates socket file
and startups. Keep consistency with client mode virtio-user. 

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.


Zhiyong Yang (1):
  net/virtio-user: add support for server mode

 doc/guides/rel_notes/release_18_05.rst           |   6 ++
 drivers/net/virtio/virtio_user/vhost_user.c      |  64 ++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c |  45 +++++++---
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   4 +
 drivers/net/virtio/virtio_user_ethdev.c          | 103 +++++++++++++++++++++--
 5 files changed, 194 insertions(+), 28 deletions(-)

-- 
2.14.3

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-03 12:20   ` [PATCH v4 0/1] server mode virtio-user zhiyong.yang
@ 2018-04-03 12:20     ` zhiyong.yang
  2018-04-03 15:16       ` Tan, Jianfeng
  2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
  0 siblings, 2 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-04-03 12:20 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, thomas, zhihong.wang, tiwei.bie,
	Zhiyong Yang

virtio-user adds support for server mode in this patch.

Client mode vhost-user startup firstly, server mode virtio-user startups
and creates the socket file to exchange vhost messages.

If the connection is broken, client mode vhost-user can support to
reconnect virtio-user.

Server mode virtio-user supports many times' vhost-user reconnections with
the same parameter configurations.

Release note is updated in the patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 doc/guides/rel_notes/release_18_05.rst           |   6 ++
 drivers/net/virtio/virtio_user/vhost_user.c      |  64 ++++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c |  45 +++++++---
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   4 +
 drivers/net/virtio/virtio_user_ethdev.c          | 103 +++++++++++++++++++++--
 5 files changed, 194 insertions(+), 28 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 9cc77f893..f8897b2e9 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,12 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added support for virtio-user server mode.**
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend connects to. This means that if the backend restarts, it can reconnect
+  to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..1b3401d4f 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+static void
+virtio_user_set_block(int fd, bool enabled)
+{
+	int f;
+
+	f = fcntl(fd, F_GETFL);
+	if (enabled)
+		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+	else
+		fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+#define MAX_VIRTIO_USER_BACKLOG 128
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int fd = dev->listenfd;
+	int connectfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		goto err;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	connectfd = accept(fd, NULL, NULL);
+	if (connectfd >= 0)
+		dev->connected = true;
+	else
+		goto err;
+
+	dev->vhostfd = connectfd;
+	virtio_user_set_block(connectfd, true);
+	return 0;
+err:
+	close(dev->listenfd);
+	return -1;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
 {
 	int fd;
 	int flag;
+	int ret = 0;
 	struct sockaddr_un un;
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,14 +450,21 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		dev->listenfd = fd;
+		ret = virtio_user_start_server(dev, &un);
+	} else {
+		dev->vhostfd = fd;
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->connected = true;
 	}
 
-	dev->vhostfd = fd;
-	return 0;
+	return ret;
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..dd9fa9bdf 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
 {
 	uint32_t i;
 
+	if (!dev->connected)
+		return -1;
+
 	for (i = 0; i < dev->max_queue_pairs; ++i)
 		dev->ops->enable_qp(dev, i, 0);
 
@@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
+	if (dev->is_server) {
+		dev->ops = &ops_user;/* server mode only supports vhost user*/
 	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
-			return -1;
-		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
 
@@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0) {
+		close(dev->listenfd);
+		dev->listenfd = -1;
+	}
+	dev->connected = false;
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 64467b4f9..68056720d 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,7 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +14,9 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd */
+	bool		connected;  /* connection status */
+	bool		is_server;  /* server or client mode */
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..5b8c8e291 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -24,15 +24,76 @@
 #define virtio_user_get_dev(hw) \
 	((struct virtio_user_dev *)(hw)->virtio_user_dev)
 
+static void
+virtio_user_server_reconnection(struct virtio_user_dev *dev)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+
+	if (dev->connected)
+		return;
+
+	connectfd = accept(dev->listenfd, NULL, NULL);
+	if (connectfd < 0)
+		return;
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt disable failed");
+			return;
+		}
+		rte_intr_callback_unregister(eth_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
+	dev->connected = true;
+	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
+}
+
 static void
 virtio_user_delayed_handler(void *param)
 {
 	struct virtio_hw *hw = (struct virtio_hw *)param;
-	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-	rte_intr_callback_unregister(dev->intr_handle,
-				     virtio_interrupt_handler,
-				     dev);
+	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+		PMD_DRV_LOG(ERR, "interrupt disable failed");
+		return;
+	}
+	rte_intr_callback_unregister(eth_dev->intr_handle,
+				     virtio_interrupt_handler, eth_dev);
+	if (dev->is_server) {
+		if (dev->vhostfd >= 0) {
+			close(dev->vhostfd);
+			dev->vhostfd = -1;
+		}
+		eth_dev->intr_handle->fd = dev->listenfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
 }
 
 static void
@@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
 			if (r == 0 || (r < 0 && errno != EAGAIN)) {
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
-				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
-					    hw->port_id);
+
 				/* Only client mode is available now. Once the
 				 * connection is broken, it can never be up
 				 * again. Besides, this function could be called
@@ -74,9 +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				 * callback cannot be unregistered here, set an
 				 * alarm to do it.
 				 */
-				rte_eal_alarm_set(1,
+				if (dev->connected) {
+					dev->connected = false;
+					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
+						    hw->port_id);
+					rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
 						  (void *)hw);
+				}
 			} else {
 				dev->status |= VIRTIO_NET_S_LINK_UP;
 			}
@@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
 				return;
 			}
-		}
+
+		} else if (dev->is_server)
+			virtio_user_server_reconnection(dev);
+
 		*(uint16_t *)dst = dev->status;
 	}
 
@@ -278,12 +346,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -378,10 +449,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
 	int ret = -1;
+	struct virtio_user_dev *vu_dev = NULL;
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
 	if (!kvlist) {
@@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -476,6 +558,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-03 12:20     ` [PATCH v4 1/1] net/virtio-user: add support for server mode zhiyong.yang
@ 2018-04-03 15:16       ` Tan, Jianfeng
  2018-04-04  3:31         ` Yang, Zhiyong
  2018-04-04  5:37         ` Tiwei Bie
  2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
  1 sibling, 2 replies; 65+ messages in thread
From: Tan, Jianfeng @ 2018-04-03 15:16 UTC (permalink / raw)
  To: zhiyong.yang, dev; +Cc: maxime.coquelin, thomas, zhihong.wang, tiwei.bie



On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> virtio-user adds support for server mode in this patch.
>
> Client mode vhost-user startup firstly, server mode virtio-user startups
> and creates the socket file to exchange vhost messages.
>
> If the connection is broken, client mode vhost-user can support to
> reconnect virtio-user.
>
> Server mode virtio-user supports many times' vhost-user reconnections with
> the same parameter configurations.
>
> Release note is updated in the patch.

With current implementation, we have to enable LSC; or no chance to 
accept the coming connection. We shall point this out.

And if possible, split this patch into multiple patches.

>
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   doc/guides/rel_notes/release_18_05.rst           |   6 ++
>   drivers/net/virtio/virtio_user/vhost_user.c      |  64 ++++++++++++--
>   drivers/net/virtio/virtio_user/virtio_user_dev.c |  45 +++++++---
>   drivers/net/virtio/virtio_user/virtio_user_dev.h |   4 +
>   drivers/net/virtio/virtio_user_ethdev.c          | 103 +++++++++++++++++++++--
>   5 files changed, 194 insertions(+), 28 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index 9cc77f893..f8897b2e9 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -58,6 +58,12 @@ New Features
>     * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
>     * Added support for DROP action in flow API.
>   
> +* **Added support for virtio-user server mode.**
> +  In a container environment if the vhost-user backend restarts, there's no way
> +  for it to reconnect to virtio-user. To address this, support for server mode
> +  is added. In this mode the socket file is created by virtio-user, which the
> +  backend connects to. This means that if the backend restarts, it can reconnect
> +  to virtio-user and continue communications.
>   
>   API Changes
>   -----------
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index 91c6449bb..1b3401d4f 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   	return 0;
>   }
>   
> +static void
> +virtio_user_set_block(int fd, bool enabled)

This is only used once, no need to abstract it into a function?

> +{
> +	int f;
> +
> +	f = fcntl(fd, F_GETFL);
> +	if (enabled)
> +		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> +	else
> +		fcntl(fd, F_SETFL, f | O_NONBLOCK);
> +}
> +
> +#define MAX_VIRTIO_USER_BACKLOG 128

We only allow one connection from vhost-user, so how about just make the 
backlog queue length as 1?

> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
> +{
> +	int ret;
> +	int fd = dev->listenfd;
> +	int connectfd;
> +
> +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
> +			    dev->path, strerror(errno));
> +		goto err;
> +	}
> +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> +	if (ret < 0)
> +		goto err;
> +
> +	connectfd = accept(fd, NULL, NULL);
> +	if (connectfd >= 0)
> +		dev->connected = true;
> +	else
> +		goto err;

if (connectfd < 0)
         goto err;

dev->connected = true;
dev->vhostfd = connectfd;
...

> +
> +	dev->vhostfd = connectfd;
> +	virtio_user_set_block(connectfd, true);
> +	return 0;
> +err:
> +	close(dev->listenfd);
> +	return -1;
> +}
> +
>   /**
>    * Set up environment to talk with a vhost user backend.
>    *
> @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
>   {
>   	int fd;
>   	int flag;
> +	int ret = 0;
>   	struct sockaddr_un un;
>   
>   	fd = socket(AF_UNIX, SOCK_STREAM, 0);
> @@ -405,14 +450,21 @@ vhost_user_setup(struct virtio_user_dev *dev)
>   	memset(&un, 0, sizeof(un));
>   	un.sun_family = AF_UNIX;
>   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> -		close(fd);
> -		return -1;
> +
> +	if (dev->is_server) {
> +		dev->listenfd = fd;
> +		ret = virtio_user_start_server(dev, &un);
> +	} else {
> +		dev->vhostfd = fd;
> +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> +			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> +			close(fd);
> +			return -1;
> +		}
> +		dev->connected = true;
>   	}
>   
> -	dev->vhostfd = fd;
> -	return 0;
> +	return ret;
>   }
>   
>   static int
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..dd9fa9bdf 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
>   {
>   	uint32_t i;
>   
> +	if (!dev->connected)
> +		return -1;
> +
>   	for (i = 0; i < dev->max_queue_pairs; ++i)
>   		dev->ops->enable_qp(dev, i, 0);
>   
> @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>   	dev->vhostfds = NULL;
>   	dev->tapfds = NULL;

Add a check here:
if (dev->is_server && !is_vhost_user_by_type(dev->path))
         return error;

>   
> -	if (is_vhost_user_by_type(dev->path)) {
> -		dev->ops = &ops_user;
> +	if (dev->is_server) {
> +		dev->ops = &ops_user;/* server mode only supports vhost user*/
>   	} else {
> -		dev->ops = &ops_kernel;
> -
> -		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		if (!dev->vhostfds || !dev->tapfds) {
> -			PMD_INIT_LOG(ERR, "Failed to malloc");
> -			return -1;
> -		}
> -
> -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> -			dev->vhostfds[q] = -1;
> -			dev->tapfds[q] = -1;
> +		if (is_vhost_user_by_type(dev->path)) {
> +			dev->ops = &ops_user;
> +		} else {
> +			dev->ops = &ops_kernel;
> +
> +			dev->vhostfds = malloc(dev->max_queue_pairs *
> +					       sizeof(int));
> +			dev->tapfds = malloc(dev->max_queue_pairs *
> +					     sizeof(int));
> +			if (!dev->vhostfds || !dev->tapfds) {
> +				PMD_INIT_LOG(ERR, "Failed to malloc");
> +				return -1;
> +			}
> +
> +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> +				dev->vhostfds[q] = -1;
> +				dev->tapfds[q] = -1;
> +			}
>   		}
>   	}
>   
> @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   
>   	close(dev->vhostfd);
>   
> +	if (dev->is_server && dev->listenfd >= 0) {
> +		close(dev->listenfd);
> +		dev->listenfd = -1;
> +	}
> +	dev->connected = false;
>   	if (dev->vhostfds) {
>   		for (i = 0; i < dev->max_queue_pairs; ++i)
>   			close(dev->vhostfds[i]);
> @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   	}
>   
>   	free(dev->ifname);
> +
> +	if (dev->is_server)
> +		unlink(dev->path);
>   }
>   
>   static uint8_t
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 64467b4f9..68056720d 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,6 +6,7 @@
>   #define _VIRTIO_USER_DEV_H
>   
>   #include <limits.h>
> +#include <stdbool.h>
>   #include "../virtio_pci.h"
>   #include "../virtio_ring.h"
>   #include "vhost.h"
> @@ -13,6 +14,9 @@
>   struct virtio_user_dev {
>   	/* for vhost_user backend */
>   	int		vhostfd;
> +	int		listenfd;   /* listening fd */
> +	bool		connected;  /* connection status */

Seems not necessary to add this field; as the connection status can be 
deduced from (vhostfd>=0)

> +	bool		is_server;  /* server or client mode */
>   
>   	/* for vhost_kernel backend */
>   	char		*ifname;
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index 263649006..5b8c8e291 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -24,15 +24,76 @@
>   #define virtio_user_get_dev(hw) \
>   	((struct virtio_user_dev *)(hw)->virtio_user_dev)
>   
> +static void
> +virtio_user_server_reconnection(struct virtio_user_dev *dev)

s/reconnection/reconnect?

> +{
> +	int ret;
> +	int flag;
> +	int connectfd;
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> +
> +	if (dev->connected)
> +		return;
> +
> +	connectfd = accept(dev->listenfd, NULL, NULL);
> +	if (connectfd < 0)
> +		return;
> +
> +	dev->vhostfd = connectfd;
> +	flag = fcntl(connectfd, F_GETFD);
> +	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> +
> +	ret = virtio_user_start_device(dev);
> +	if (ret < 0)
> +		return;
> +
> +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> +		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt disable failed");
> +			return;
> +		}
> +		rte_intr_callback_unregister(eth_dev->intr_handle,
> +					     virtio_interrupt_handler,
> +					     eth_dev);
> +		eth_dev->intr_handle->fd = connectfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return;
> +		}
> +	}
> +	dev->connected = true;
> +	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
> +}
> +
>   static void
>   virtio_user_delayed_handler(void *param)
>   {
>   	struct virtio_hw *hw = (struct virtio_hw *)param;
> -	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> +	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
>   
> -	rte_intr_callback_unregister(dev->intr_handle,
> -				     virtio_interrupt_handler,
> -				     dev);
> +	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +		PMD_DRV_LOG(ERR, "interrupt disable failed");
> +		return;
> +	}
> +	rte_intr_callback_unregister(eth_dev->intr_handle,
> +				     virtio_interrupt_handler, eth_dev);
> +	if (dev->is_server) {
> +		if (dev->vhostfd >= 0) {
> +			close(dev->vhostfd);
> +			dev->vhostfd = -1;
> +		}
> +		eth_dev->intr_handle->fd = dev->listenfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return;
> +		}
> +	}
>   }
>   
>   static void
> @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);

As server mode and the connection is not connected, vhostfd is -1 now, 
then r < 0 and errno is EBADF, how could it go into server handling in 
the "else if" block?

>   			if (r == 0 || (r < 0 && errno != EAGAIN)) {
>   				dev->status &= (~VIRTIO_NET_S_LINK_UP);
> -				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> -					    hw->port_id);
> +
>   				/* Only client mode is available now. Once the

Can you also correct this note as we support server mode now?

>   				 * connection is broken, it can never be up
>   				 * again. Besides, this function could be called
> @@ -74,9 +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   				 * callback cannot be unregistered here, set an
>   				 * alarm to do it.
>   				 */
> -				rte_eal_alarm_set(1,
> +				if (dev->connected) {
> +					dev->connected = false;
> +					PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> +						    hw->port_id);
> +					rte_eal_alarm_set(1,
>   						  virtio_user_delayed_handler,
>   						  (void *)hw);
> +				}
>   			} else {
>   				dev->status |= VIRTIO_NET_S_LINK_UP;
>   			}
> @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
>   				return;
>   			}
> -		}
> +
> +		} else if (dev->is_server)
> +			virtio_user_server_reconnection(dev);
> +
>   		*(uint16_t *)dst = dev->status;
>   	}
>   
> @@ -278,12 +346,15 @@ static const char *valid_args[] = {
>   	VIRTIO_USER_ARG_QUEUE_SIZE,
>   #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
>   	VIRTIO_USER_ARG_INTERFACE_NAME,
> +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> +	VIRTIO_USER_ARG_SERVER_MODE,
>   	NULL
>   };
>   
>   #define VIRTIO_USER_DEF_CQ_EN	0
>   #define VIRTIO_USER_DEF_Q_NUM	1
>   #define VIRTIO_USER_DEF_Q_SZ	256
> +#define VIRTIO_USER_DEF_SERVER_MODE	0
>   
>   static int
>   get_string_arg(const char *key __rte_unused,
> @@ -378,10 +449,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
>   	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
>   	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
>   	char *path = NULL;
>   	char *ifname = NULL;
>   	char *mac_addr = NULL;
>   	int ret = -1;
> +	struct virtio_user_dev *vu_dev = NULL;
>   
>   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
>   	if (!kvlist) {
> @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   	}
>   
> +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
> +		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
> +				       &get_integer_arg, &server_mode) < 0) {
> +			PMD_INIT_LOG(ERR, "error to parse %s",
> +				     VIRTIO_USER_ARG_SERVER_MODE);
> +			goto end;
> +		}
> +	}
> +
>   	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
>   		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
>   				       &get_integer_arg, &cq) < 0) {
> @@ -476,6 +558,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   
>   		hw = eth_dev->data->dev_private;
> +		vu_dev = virtio_user_get_dev(hw);
> +		if (server_mode == 1)
> +			vu_dev->is_server = true;
> +		else
> +			vu_dev->is_server = false;
>   		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
>   				 queue_size, mac_addr, &ifname) < 0) {
>   			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-03 15:16       ` Tan, Jianfeng
@ 2018-04-04  3:31         ` Yang, Zhiyong
  2018-04-04  3:47           ` Tan, Jianfeng
  2018-04-04  5:37         ` Tiwei Bie
  1 sibling, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-04  3:31 UTC (permalink / raw)
  To: Tan, Jianfeng, dev; +Cc: maxime.coquelin, thomas, Wang, Zhihong, Bie, Tiwei

Hi Jiafeng,

Thanks for your review and comments firstly.
Reply inline.

> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Tuesday, April 3, 2018 11:16 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> <zhihong.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
> 
> 
> 
> On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> > virtio-user adds support for server mode in this patch.
> >
> > Client mode vhost-user startup firstly, server mode virtio-user
> > startups and creates the socket file to exchange vhost messages.
> >
> > If the connection is broken, client mode vhost-user can support to
> > reconnect virtio-user.
> >
> > Server mode virtio-user supports many times' vhost-user reconnections
> > with the same parameter configurations.
> >
> > Release note is updated in the patch.
> 
> With current implementation, we have to enable LSC; or no chance to accept
> the coming connection. We shall point this out.
> 
Ok.

> And if possible, split this patch into multiple patches.
>
How to split?

> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >   doc/guides/rel_notes/release_18_05.rst           |   6 ++
> >   drivers/net/virtio/virtio_user/vhost_user.c      |  64 ++++++++++++--
> >   drivers/net/virtio/virtio_user/virtio_user_dev.c |  45 +++++++---
> >   drivers/net/virtio/virtio_user/virtio_user_dev.h |   4 +
> >   drivers/net/virtio/virtio_user_ethdev.c          | 103
> +++++++++++++++++++++--
> >   5 files changed, 194 insertions(+), 28 deletions(-)
> >
> > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > b/doc/guides/rel_notes/release_18_05.rst
> > index 9cc77f893..f8897b2e9 100644
> > --- a/doc/guides/rel_notes/release_18_05.rst
> > +++ b/doc/guides/rel_notes/release_18_05.rst
> > @@ -58,6 +58,12 @@ New Features
> >     * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> >     * Added support for DROP action in flow API.
> >
> > +* **Added support for virtio-user server mode.**
> > +  In a container environment if the vhost-user backend restarts,
> > +there's no way
> > +  for it to reconnect to virtio-user. To address this, support for
> > +server mode
> > +  is added. In this mode the socket file is created by virtio-user,
> > +which the
> > +  backend connects to. This means that if the backend restarts, it
> > +can reconnect
> > +  to virtio-user and continue communications.
> >
> >   API Changes
> >   -----------
> > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > b/drivers/net/virtio/virtio_user/vhost_user.c
> > index 91c6449bb..1b3401d4f 100644
> > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
> >   	return 0;
> >   }
> >
> > +static void
> > +virtio_user_set_block(int fd, bool enabled)
> 
> This is only used once, no need to abstract it into a function?

Ok.

> 
> > +{
> > +	int f;
> > +
> > +	f = fcntl(fd, F_GETFL);
> > +	if (enabled)
> > +		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> > +	else
> > +		fcntl(fd, F_SETFL, f | O_NONBLOCK); }
> > +
> > +#define MAX_VIRTIO_USER_BACKLOG 128
> 
> We only allow one connection from vhost-user, so how about just make the
> backlog queue length as 1?
> 
> > +static int
> > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > +sockaddr_un *un) {
> > +	int ret;
> > +	int fd = dev->listenfd;
> > +	int connectfd;
> > +
> > +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > +	if (ret < 0) {
> > +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> > +			    dev->path, strerror(errno));
> > +		goto err;
> > +	}
> > +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > +	if (ret < 0)
> > +		goto err;
> > +
> > +	connectfd = accept(fd, NULL, NULL);
> > +	if (connectfd >= 0)
> > +		dev->connected = true;
> > +	else
> > +		goto err;
> 
> if (connectfd < 0)
>          goto err;
> 
> dev->connected = true;
> dev->vhostfd = connectfd;
> ...

Ok.

> 
> > +
> > +	dev->vhostfd = connectfd;
> > +	virtio_user_set_block(connectfd, true);
> > +	return 0;
> > +err:
> > +	close(dev->listenfd);
> > +	return -1;
> > +}
> > +
> >   /**
> >    * Set up environment to talk with a vhost user backend.
> >    *
> > @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
> >   {
> >   	int fd;
> >   	int flag;
> > +	int ret = 0;
> >   	struct sockaddr_un un;
> >
> >   	fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -405,14 +450,21 @@
> > vhost_user_setup(struct virtio_user_dev *dev)
> >   	memset(&un, 0, sizeof(un));
> >   	un.sun_family = AF_UNIX;
> >   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > -		close(fd);
> > -		return -1;
> > +
> > +	if (dev->is_server) {
> > +		dev->listenfd = fd;
> > +		ret = virtio_user_start_server(dev, &un);
> > +	} else {
> > +		dev->vhostfd = fd;
> > +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > +			PMD_DRV_LOG(ERR, "connect error, %s",
> strerror(errno));
> > +			close(fd);
> > +			return -1;
> > +		}
> > +		dev->connected = true;
> >   	}
> >
> > -	dev->vhostfd = fd;
> > -	return 0;
> > +	return ret;
> >   }
> >
> >   static int
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > index f90fee9e5..dd9fa9bdf 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev
> *dev)
> >   {
> >   	uint32_t i;
> >
> > +	if (!dev->connected)
> > +		return -1;
> > +
> >   	for (i = 0; i < dev->max_queue_pairs; ++i)
> >   		dev->ops->enable_qp(dev, i, 0);
> >
> > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> *dev)
> >   	dev->vhostfds = NULL;
> >   	dev->tapfds = NULL;
> 
> Add a check here:
> if (dev->is_server && !is_vhost_user_by_type(dev->path))
>          return error;

Ok.

> 
> >
> > -	if (is_vhost_user_by_type(dev->path)) {
> > -		dev->ops = &ops_user;
> > +	if (dev->is_server) {
> > +		dev->ops = &ops_user;/* server mode only supports vhost
> user*/
> >   	} else {
> > -		dev->ops = &ops_kernel;
> > -
> > -		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > -		if (!dev->vhostfds || !dev->tapfds) {
> > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > -			return -1;
> > -		}
> > -
> > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > -			dev->vhostfds[q] = -1;
> > -			dev->tapfds[q] = -1;
> > +		if (is_vhost_user_by_type(dev->path)) {
> > +			dev->ops = &ops_user;
> > +		} else {
> > +			dev->ops = &ops_kernel;
> > +
> > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > +					       sizeof(int));
> > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > +					     sizeof(int));
> > +			if (!dev->vhostfds || !dev->tapfds) {
> > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > +				return -1;
> > +			}
> > +
> > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > +				dev->vhostfds[q] = -1;
> > +				dev->tapfds[q] = -1;
> > +			}
> >   		}
> >   	}
> >
> > @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev
> > *dev)
> >
> >   	close(dev->vhostfd);
> >
> > +	if (dev->is_server && dev->listenfd >= 0) {
> > +		close(dev->listenfd);
> > +		dev->listenfd = -1;
> > +	}
> > +	dev->connected = false;
> >   	if (dev->vhostfds) {
> >   		for (i = 0; i < dev->max_queue_pairs; ++i)
> >   			close(dev->vhostfds[i]);
> > @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
> >   	}
> >
> >   	free(dev->ifname);
> > +
> > +	if (dev->is_server)
> > +		unlink(dev->path);
> >   }
> >
> >   static uint8_t
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > index 64467b4f9..68056720d 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > @@ -6,6 +6,7 @@
> >   #define _VIRTIO_USER_DEV_H
> >
> >   #include <limits.h>
> > +#include <stdbool.h>
> >   #include "../virtio_pci.h"
> >   #include "../virtio_ring.h"
> >   #include "vhost.h"
> > @@ -13,6 +14,9 @@
> >   struct virtio_user_dev {
> >   	/* for vhost_user backend */
> >   	int		vhostfd;
> > +	int		listenfd;   /* listening fd */
> > +	bool		connected;  /* connection status */
> 
> Seems not necessary to add this field; as the connection status can be
> deduced from (vhostfd>=0)

Ok, remove it.

> 
> > +	bool		is_server;  /* server or client mode */
> >
> >   	/* for vhost_kernel backend */
> >   	char		*ifname;
> > diff --git a/drivers/net/virtio/virtio_user_ethdev.c
> > b/drivers/net/virtio/virtio_user_ethdev.c
> > index 263649006..5b8c8e291 100644
> > --- a/drivers/net/virtio/virtio_user_ethdev.c
> > +++ b/drivers/net/virtio/virtio_user_ethdev.c
> > @@ -24,15 +24,76 @@
> >   #define virtio_user_get_dev(hw) \
> >   	((struct virtio_user_dev *)(hw)->virtio_user_dev)
> >
> > +static void
> > +virtio_user_server_reconnection(struct virtio_user_dev *dev)
> 
> s/reconnection/reconnect?
>

Ok, Good Suggestion.
 
> > +{
> > +	int ret;
> > +	int flag;
> > +	int connectfd;
> > +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> > +
> > +	if (dev->connected)
> > +		return;
> > +
> > +	connectfd = accept(dev->listenfd, NULL, NULL);
> > +	if (connectfd < 0)
> > +		return;
> > +
> > +	dev->vhostfd = connectfd;
> > +	flag = fcntl(connectfd, F_GETFD);
> > +	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> > +
> > +	ret = virtio_user_start_device(dev);
> > +	if (ret < 0)
> > +		return;
> > +
> > +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> > +		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > +			PMD_DRV_LOG(ERR, "interrupt disable failed");
> > +			return;
> > +		}
> > +		rte_intr_callback_unregister(eth_dev->intr_handle,
> > +					     virtio_interrupt_handler,
> > +					     eth_dev);
> > +		eth_dev->intr_handle->fd = connectfd;
> > +		rte_intr_callback_register(eth_dev->intr_handle,
> > +					   virtio_interrupt_handler, eth_dev);
> > +
> > +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> > +			return;
> > +		}
> > +	}
> > +	dev->connected = true;
> > +	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection
> > +succeeds!"); }
> > +
> >   static void
> >   virtio_user_delayed_handler(void *param)
> >   {
> >   	struct virtio_hw *hw = (struct virtio_hw *)param;
> > -	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> > +	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> > +	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
> >
> > -	rte_intr_callback_unregister(dev->intr_handle,
> > -				     virtio_interrupt_handler,
> > -				     dev);
> > +	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > +		PMD_DRV_LOG(ERR, "interrupt disable failed");
> > +		return;
> > +	}
> > +	rte_intr_callback_unregister(eth_dev->intr_handle,
> > +				     virtio_interrupt_handler, eth_dev);
> > +	if (dev->is_server) {
> > +		if (dev->vhostfd >= 0) {
> > +			close(dev->vhostfd);
> > +			dev->vhostfd = -1;
> > +		}
> > +		eth_dev->intr_handle->fd = dev->listenfd;
> > +		rte_intr_callback_register(eth_dev->intr_handle,
> > +					   virtio_interrupt_handler, eth_dev);
> > +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> > +			return;
> > +		}
> > +	}
> >   }
> >
> >   static void
> > @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
> >   			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
> 
> As server mode and the connection is not connected, vhostfd is -1 now, then
> r < 0 and errno is EBADF, how could it go into server handling in the "else if"
> block?
> 

I think I don't catch you.  When server mode, if vhostfd = -1,
if (dev->vhostfd >= 0)  is false, so, the code will goto to check else if (dev->is_server)
to handle server mode.

> >   			if (r == 0 || (r < 0 && errno != EAGAIN)) {
> >   				dev->status &= (~VIRTIO_NET_S_LINK_UP);
> > -				PMD_DRV_LOG(ERR, "virtio-user port %u is
> down",
> > -					    hw->port_id);
> > +
> >   				/* Only client mode is available now. Once
> the
> 
> Can you also correct this note as we support server mode now?

Ok.
> 
> >   				 * connection is broken, it can never be up
> >   				 * again. Besides, this function could be called
> @@ -74,9
> > +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t
> offset,
> >   				 * callback cannot be unregistered here, set
> an
> >   				 * alarm to do it.
> >   				 */
> > -				rte_eal_alarm_set(1,
> > +				if (dev->connected) {
> > +					dev->connected = false;
> > +					PMD_DRV_LOG(ERR, "virtio-user
> port %u is down",
> > +						    hw->port_id);
> > +					rte_eal_alarm_set(1,
> >
> virtio_user_delayed_handler,
> >   						  (void *)hw);
> > +				}
> >   			} else {
> >   				dev->status |= VIRTIO_NET_S_LINK_UP;
> >   			}
> > @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
> >   				PMD_DRV_LOG(ERR, "error clearing
> O_NONBLOCK flag");
> >   				return;
> >   			}
> > -		}
> > +
> > +		} else if (dev->is_server)
> > +			virtio_user_server_reconnection(dev);
> > +
> >   		*(uint16_t *)dst = dev->status;
> >   	}
> >
> > @@ -278,12 +346,15 @@ static const char *valid_args[] = {
> >   	VIRTIO_USER_ARG_QUEUE_SIZE,
> >   #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
> >   	VIRTIO_USER_ARG_INTERFACE_NAME,
> > +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> > +	VIRTIO_USER_ARG_SERVER_MODE,
> >   	NULL
> >   };
> >
> >   #define VIRTIO_USER_DEF_CQ_EN	0
> >   #define VIRTIO_USER_DEF_Q_NUM	1
> >   #define VIRTIO_USER_DEF_Q_SZ	256
> > +#define VIRTIO_USER_DEF_SERVER_MODE	0
> >
> >   static int
> >   get_string_arg(const char *key __rte_unused, @@ -378,10 +449,12 @@
> > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> >   	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> >   	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> >   	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> > +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> >   	char *path = NULL;
> >   	char *ifname = NULL;
> >   	char *mac_addr = NULL;
> >   	int ret = -1;
> > +	struct virtio_user_dev *vu_dev = NULL;
> >
> >   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
> >   	if (!kvlist) {
> > @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device
> *dev)
> >   		}
> >   	}
> >
> > +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1)
> {
> > +		if (rte_kvargs_process(kvlist,
> VIRTIO_USER_ARG_SERVER_MODE,
> > +				       &get_integer_arg, &server_mode) < 0) {
> > +			PMD_INIT_LOG(ERR, "error to parse %s",
> > +				     VIRTIO_USER_ARG_SERVER_MODE);
> > +			goto end;
> > +		}
> > +	}
> > +
> >   	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
> >   		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
> >   				       &get_integer_arg, &cq) < 0) { @@ -476,6
> +558,11 @@
> > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> >   		}
> >
> >   		hw = eth_dev->data->dev_private;
> > +		vu_dev = virtio_user_get_dev(hw);
> > +		if (server_mode == 1)
> > +			vu_dev->is_server = true;
> > +		else
> > +			vu_dev->is_server = false;
> >   		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues,
> cq,
> >   				 queue_size, mac_addr, &ifname) < 0) {
> >   			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-04  3:31         ` Yang, Zhiyong
@ 2018-04-04  3:47           ` Tan, Jianfeng
  0 siblings, 0 replies; 65+ messages in thread
From: Tan, Jianfeng @ 2018-04-04  3:47 UTC (permalink / raw)
  To: Yang, Zhiyong, dev; +Cc: maxime.coquelin, thomas, Wang, Zhihong, Bie, Tiwei



> -----Original Message-----
> From: Yang, Zhiyong
> Sent: Wednesday, April 4, 2018 11:32 AM
> To: Tan, Jianfeng; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong;
> Bie, Tiwei
> Subject: RE: [PATCH v4 1/1] net/virtio-user: add support for server mode
> 
> Hi Jiafeng,
> 
> Thanks for your review and comments firstly.
> Reply inline.
> 
> > -----Original Message-----
> > From: Tan, Jianfeng
> > Sent: Tuesday, April 3, 2018 11:16 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> > <zhihong.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> > Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
> >
> >
> >
> > On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> > > virtio-user adds support for server mode in this patch.
> > >
> > > Client mode vhost-user startup firstly, server mode virtio-user
> > > startups and creates the socket file to exchange vhost messages.
> > >
> > > If the connection is broken, client mode vhost-user can support to
> > > reconnect virtio-user.
> > >
> > > Server mode virtio-user supports many times' vhost-user reconnections
> > > with the same parameter configurations.
> > >
> > > Release note is updated in the patch.
> >
> > With current implementation, we have to enable LSC; or no chance to
> accept
> > the coming connection. We shall point this out.
> >
> Ok.
> 
> > And if possible, split this patch into multiple patches.
> >
> How to split?

I think it can be split into three:
- One for new dev parameter.
- One for server socket setup.
- One for LSC handling.

But it's not a big patch anyway, I'm OK if you think better to keep in one patch.

> 
> > >
> > > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > > ---
> > >   doc/guides/rel_notes/release_18_05.rst           |   6 ++
> > >   drivers/net/virtio/virtio_user/vhost_user.c      |  64 ++++++++++++--
> > >   drivers/net/virtio/virtio_user/virtio_user_dev.c |  45 +++++++---
> > >   drivers/net/virtio/virtio_user/virtio_user_dev.h |   4 +
> > >   drivers/net/virtio/virtio_user_ethdev.c          | 103
> > +++++++++++++++++++++--
> > >   5 files changed, 194 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > > b/doc/guides/rel_notes/release_18_05.rst
> > > index 9cc77f893..f8897b2e9 100644
> > > --- a/doc/guides/rel_notes/release_18_05.rst
> > > +++ b/doc/guides/rel_notes/release_18_05.rst
> > > @@ -58,6 +58,12 @@ New Features
> > >     * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> > >     * Added support for DROP action in flow API.
> > >
> > > +* **Added support for virtio-user server mode.**
> > > +  In a container environment if the vhost-user backend restarts,
> > > +there's no way
> > > +  for it to reconnect to virtio-user. To address this, support for
> > > +server mode
> > > +  is added. In this mode the socket file is created by virtio-user,
> > > +which the
> > > +  backend connects to. This means that if the backend restarts, it
> > > +can reconnect
> > > +  to virtio-user and continue communications.
> > >
> > >   API Changes
> > >   -----------
> > > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > > b/drivers/net/virtio/virtio_user/vhost_user.c
> > > index 91c6449bb..1b3401d4f 100644
> > > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > > @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
> > >   	return 0;
> > >   }
> > >
> > > +static void
> > > +virtio_user_set_block(int fd, bool enabled)
> >
> > This is only used once, no need to abstract it into a function?
> 
> Ok.
> 
> >
> > > +{
> > > +	int f;
> > > +
> > > +	f = fcntl(fd, F_GETFL);
> > > +	if (enabled)
> > > +		fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> > > +	else
> > > +		fcntl(fd, F_SETFL, f | O_NONBLOCK); }
> > > +
> > > +#define MAX_VIRTIO_USER_BACKLOG 128
> >
> > We only allow one connection from vhost-user, so how about just make
> the
> > backlog queue length as 1?
> >
> > > +static int
> > > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > > +sockaddr_un *un) {
> > > +	int ret;
> > > +	int fd = dev->listenfd;
> > > +	int connectfd;
> > > +
> > > +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > > +	if (ret < 0) {
> > > +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> > try again\n",
> > > +			    dev->path, strerror(errno));
> > > +		goto err;
> > > +	}
> > > +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > > +	if (ret < 0)
> > > +		goto err;
> > > +
> > > +	connectfd = accept(fd, NULL, NULL);
> > > +	if (connectfd >= 0)
> > > +		dev->connected = true;
> > > +	else
> > > +		goto err;
> >
> > if (connectfd < 0)
> >          goto err;
> >
> > dev->connected = true;
> > dev->vhostfd = connectfd;
> > ...
> 
> Ok.
> 
> >
> > > +
> > > +	dev->vhostfd = connectfd;
> > > +	virtio_user_set_block(connectfd, true);
> > > +	return 0;
> > > +err:
> > > +	close(dev->listenfd);
> > > +	return -1;
> > > +}
> > > +
> > >   /**
> > >    * Set up environment to talk with a vhost user backend.
> > >    *
> > > @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
> > >   {
> > >   	int fd;
> > >   	int flag;
> > > +	int ret = 0;
> > >   	struct sockaddr_un un;
> > >
> > >   	fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -405,14 +450,21 @@
> > > vhost_user_setup(struct virtio_user_dev *dev)
> > >   	memset(&un, 0, sizeof(un));
> > >   	un.sun_family = AF_UNIX;
> > >   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > > -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > > -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > > -		close(fd);
> > > -		return -1;
> > > +
> > > +	if (dev->is_server) {
> > > +		dev->listenfd = fd;
> > > +		ret = virtio_user_start_server(dev, &un);
> > > +	} else {
> > > +		dev->vhostfd = fd;
> > > +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > > +			PMD_DRV_LOG(ERR, "connect error, %s",
> > strerror(errno));
> > > +			close(fd);
> > > +			return -1;
> > > +		}
> > > +		dev->connected = true;
> > >   	}
> > >
> > > -	dev->vhostfd = fd;
> > > -	return 0;
> > > +	return ret;
> > >   }
> > >
> > >   static int
> > > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > index f90fee9e5..dd9fa9bdf 100644
> > > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct
> virtio_user_dev
> > *dev)
> > >   {
> > >   	uint32_t i;
> > >
> > > +	if (!dev->connected)
> > > +		return -1;
> > > +
> > >   	for (i = 0; i < dev->max_queue_pairs; ++i)
> > >   		dev->ops->enable_qp(dev, i, 0);
> > >
> > > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> > *dev)
> > >   	dev->vhostfds = NULL;
> > >   	dev->tapfds = NULL;
> >
> > Add a check here:
> > if (dev->is_server && !is_vhost_user_by_type(dev->path))
> >          return error;
> 
> Ok.
> 
> >
> > >
> > > -	if (is_vhost_user_by_type(dev->path)) {
> > > -		dev->ops = &ops_user;
> > > +	if (dev->is_server) {
> > > +		dev->ops = &ops_user;/* server mode only supports vhost
> > user*/
> > >   	} else {
> > > -		dev->ops = &ops_kernel;
> > > -
> > > -		dev->vhostfds = malloc(dev->max_queue_pairs *
> > sizeof(int));
> > > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > > -		if (!dev->vhostfds || !dev->tapfds) {
> > > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > > -			return -1;
> > > -		}
> > > -
> > > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > -			dev->vhostfds[q] = -1;
> > > -			dev->tapfds[q] = -1;
> > > +		if (is_vhost_user_by_type(dev->path)) {
> > > +			dev->ops = &ops_user;
> > > +		} else {
> > > +			dev->ops = &ops_kernel;
> > > +
> > > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > > +					       sizeof(int));
> > > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > > +					     sizeof(int));
> > > +			if (!dev->vhostfds || !dev->tapfds) {
> > > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > > +				return -1;
> > > +			}
> > > +
> > > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > +				dev->vhostfds[q] = -1;
> > > +				dev->tapfds[q] = -1;
> > > +			}
> > >   		}
> > >   	}
> > >
> > > @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev
> > > *dev)
> > >
> > >   	close(dev->vhostfd);
> > >
> > > +	if (dev->is_server && dev->listenfd >= 0) {
> > > +		close(dev->listenfd);
> > > +		dev->listenfd = -1;
> > > +	}
> > > +	dev->connected = false;
> > >   	if (dev->vhostfds) {
> > >   		for (i = 0; i < dev->max_queue_pairs; ++i)
> > >   			close(dev->vhostfds[i]);
> > > @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev
> *dev)
> > >   	}
> > >
> > >   	free(dev->ifname);
> > > +
> > > +	if (dev->is_server)
> > > +		unlink(dev->path);
> > >   }
> > >
> > >   static uint8_t
> > > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > index 64467b4f9..68056720d 100644
> > > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > @@ -6,6 +6,7 @@
> > >   #define _VIRTIO_USER_DEV_H
> > >
> > >   #include <limits.h>
> > > +#include <stdbool.h>
> > >   #include "../virtio_pci.h"
> > >   #include "../virtio_ring.h"
> > >   #include "vhost.h"
> > > @@ -13,6 +14,9 @@
> > >   struct virtio_user_dev {
> > >   	/* for vhost_user backend */
> > >   	int		vhostfd;
> > > +	int		listenfd;   /* listening fd */
> > > +	bool		connected;  /* connection status */
> >
> > Seems not necessary to add this field; as the connection status can be
> > deduced from (vhostfd>=0)
> 
> Ok, remove it.
> 
> >
> > > +	bool		is_server;  /* server or client mode */
> > >
> > >   	/* for vhost_kernel backend */
> > >   	char		*ifname;
> > > diff --git a/drivers/net/virtio/virtio_user_ethdev.c
> > > b/drivers/net/virtio/virtio_user_ethdev.c
> > > index 263649006..5b8c8e291 100644
> > > --- a/drivers/net/virtio/virtio_user_ethdev.c
> > > +++ b/drivers/net/virtio/virtio_user_ethdev.c
> > > @@ -24,15 +24,76 @@
> > >   #define virtio_user_get_dev(hw) \
> > >   	((struct virtio_user_dev *)(hw)->virtio_user_dev)
> > >
> > > +static void
> > > +virtio_user_server_reconnection(struct virtio_user_dev *dev)
> >
> > s/reconnection/reconnect?
> >
> 
> Ok, Good Suggestion.
> 
> > > +{
> > > +	int ret;
> > > +	int flag;
> > > +	int connectfd;
> > > +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> > > +
> > > +	if (dev->connected)
> > > +		return;
> > > +
> > > +	connectfd = accept(dev->listenfd, NULL, NULL);
> > > +	if (connectfd < 0)
> > > +		return;
> > > +
> > > +	dev->vhostfd = connectfd;
> > > +	flag = fcntl(connectfd, F_GETFD);
> > > +	fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> > > +
> > > +	ret = virtio_user_start_device(dev);
> > > +	if (ret < 0)
> > > +		return;
> > > +
> > > +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> > > +		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > > +			PMD_DRV_LOG(ERR, "interrupt disable failed");
> > > +			return;
> > > +		}
> > > +		rte_intr_callback_unregister(eth_dev->intr_handle,
> > > +					     virtio_interrupt_handler,
> > > +					     eth_dev);
> > > +		eth_dev->intr_handle->fd = connectfd;
> > > +		rte_intr_callback_register(eth_dev->intr_handle,
> > > +					   virtio_interrupt_handler, eth_dev);
> > > +
> > > +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > > +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> > > +			return;
> > > +		}
> > > +	}
> > > +	dev->connected = true;
> > > +	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection
> > > +succeeds!"); }
> > > +
> > >   static void
> > >   virtio_user_delayed_handler(void *param)
> > >   {
> > >   	struct virtio_hw *hw = (struct virtio_hw *)param;
> > > -	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> > > +	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> > > +	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
> > >
> > > -	rte_intr_callback_unregister(dev->intr_handle,
> > > -				     virtio_interrupt_handler,
> > > -				     dev);
> > > +	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > > +		PMD_DRV_LOG(ERR, "interrupt disable failed");
> > > +		return;
> > > +	}
> > > +	rte_intr_callback_unregister(eth_dev->intr_handle,
> > > +				     virtio_interrupt_handler, eth_dev);
> > > +	if (dev->is_server) {
> > > +		if (dev->vhostfd >= 0) {
> > > +			close(dev->vhostfd);
> > > +			dev->vhostfd = -1;
> > > +		}
> > > +		eth_dev->intr_handle->fd = dev->listenfd;
> > > +		rte_intr_callback_register(eth_dev->intr_handle,
> > > +					   virtio_interrupt_handler, eth_dev);
> > > +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > > +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> > > +			return;
> > > +		}
> > > +	}
> > >   }
> > >
> > >   static void
> > > @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> > size_t offset,
> > >   			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
> >
> > As server mode and the connection is not connected, vhostfd is -1 now,
> then
> > r < 0 and errno is EBADF, how could it go into server handling in the "else if"
> > block?
> >
> 
> I think I don't catch you.  When server mode, if vhostfd = -1,
> if (dev->vhostfd >= 0)  is false, so, the code will goto to check else if (dev->is_server)
> to handle server mode.

I'm sorry, I overlooked the " if (dev->vhostfd >= 0)", you are right!

Thanks,
Jianfeng

> 
> > >   			if (r == 0 || (r < 0 && errno != EAGAIN)) {
> > >   				dev->status &= (~VIRTIO_NET_S_LINK_UP);
> > > -				PMD_DRV_LOG(ERR, "virtio-user port %u is
> > down",
> > > -					    hw->port_id);
> > > +
> > >   				/* Only client mode is available now. Once
> > the
> >
> > Can you also correct this note as we support server mode now?
> 
> Ok.
> >
> > >   				 * connection is broken, it can never be up
> > >   				 * again. Besides, this function could be called
> > @@ -74,9
> > > +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t
> > offset,
> > >   				 * callback cannot be unregistered here, set
> > an
> > >   				 * alarm to do it.
> > >   				 */
> > > -				rte_eal_alarm_set(1,
> > > +				if (dev->connected) {
> > > +					dev->connected = false;
> > > +					PMD_DRV_LOG(ERR, "virtio-user
> > port %u is down",
> > > +						    hw->port_id);
> > > +					rte_eal_alarm_set(1,
> > >
> > virtio_user_delayed_handler,
> > >   						  (void *)hw);
> > > +				}
> > >   			} else {
> > >   				dev->status |= VIRTIO_NET_S_LINK_UP;
> > >   			}
> > > @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw
> *hw,
> > size_t offset,
> > >   				PMD_DRV_LOG(ERR, "error clearing
> > O_NONBLOCK flag");
> > >   				return;
> > >   			}
> > > -		}
> > > +
> > > +		} else if (dev->is_server)
> > > +			virtio_user_server_reconnection(dev);
> > > +
> > >   		*(uint16_t *)dst = dev->status;
> > >   	}
> > >
> > > @@ -278,12 +346,15 @@ static const char *valid_args[] = {
> > >   	VIRTIO_USER_ARG_QUEUE_SIZE,
> > >   #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
> > >   	VIRTIO_USER_ARG_INTERFACE_NAME,
> > > +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> > > +	VIRTIO_USER_ARG_SERVER_MODE,
> > >   	NULL
> > >   };
> > >
> > >   #define VIRTIO_USER_DEF_CQ_EN	0
> > >   #define VIRTIO_USER_DEF_Q_NUM	1
> > >   #define VIRTIO_USER_DEF_Q_SZ	256
> > > +#define VIRTIO_USER_DEF_SERVER_MODE	0
> > >
> > >   static int
> > >   get_string_arg(const char *key __rte_unused, @@ -378,10 +449,12 @@
> > > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > >   	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> > >   	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> > >   	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> > > +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> > >   	char *path = NULL;
> > >   	char *ifname = NULL;
> > >   	char *mac_addr = NULL;
> > >   	int ret = -1;
> > > +	struct virtio_user_dev *vu_dev = NULL;
> > >
> > >   	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
> > >   	if (!kvlist) {
> > > @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device
> > *dev)
> > >   		}
> > >   	}
> > >
> > > +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1)
> > {
> > > +		if (rte_kvargs_process(kvlist,
> > VIRTIO_USER_ARG_SERVER_MODE,
> > > +				       &get_integer_arg, &server_mode) < 0) {
> > > +			PMD_INIT_LOG(ERR, "error to parse %s",
> > > +				     VIRTIO_USER_ARG_SERVER_MODE);
> > > +			goto end;
> > > +		}
> > > +	}
> > > +
> > >   	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
> > >   		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
> > >   				       &get_integer_arg, &cq) < 0) { @@ -476,6
> > +558,11 @@
> > > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > >   		}
> > >
> > >   		hw = eth_dev->data->dev_private;
> > > +		vu_dev = virtio_user_get_dev(hw);
> > > +		if (server_mode == 1)
> > > +			vu_dev->is_server = true;
> > > +		else
> > > +			vu_dev->is_server = false;
> > >   		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues,
> > cq,
> > >   				 queue_size, mac_addr, &ifname) < 0) {
> > >   			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-03 15:16       ` Tan, Jianfeng
  2018-04-04  3:31         ` Yang, Zhiyong
@ 2018-04-04  5:37         ` Tiwei Bie
  2018-04-04  9:59           ` Yang, Zhiyong
  1 sibling, 1 reply; 65+ messages in thread
From: Tiwei Bie @ 2018-04-04  5:37 UTC (permalink / raw)
  To: zhiyong.yang; +Cc: Tan, Jianfeng, dev, maxime.coquelin, thomas, zhihong.wang

On Tue, Apr 03, 2018 at 11:16:26PM +0800, Tan, Jianfeng wrote:
> On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
[...]
> > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> >   	dev->vhostfds = NULL;
> >   	dev->tapfds = NULL;
> 
> Add a check here:
> if (dev->is_server && !is_vhost_user_by_type(dev->path))
>         return error;
> 
> > -	if (is_vhost_user_by_type(dev->path)) {
> > -		dev->ops = &ops_user;
> > +	if (dev->is_server) {
> > +		dev->ops = &ops_user;/* server mode only supports vhost user*/
> >   	} else {
> > -		dev->ops = &ops_kernel;
> > -
> > -		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
> > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > -		if (!dev->vhostfds || !dev->tapfds) {
> > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > -			return -1;
> > -		}
> > -
> > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > -			dev->vhostfds[q] = -1;
> > -			dev->tapfds[q] = -1;
> > +		if (is_vhost_user_by_type(dev->path)) {
> > +			dev->ops = &ops_user;
> > +		} else {
> > +			dev->ops = &ops_kernel;
> > +
> > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > +					       sizeof(int));
> > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > +					     sizeof(int));
> > +			if (!dev->vhostfds || !dev->tapfds) {
> > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > +				return -1;
> > +			}
> > +
> > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > +				dev->vhostfds[q] = -1;
> > +				dev->tapfds[q] = -1;
> > +			}
> >   		}
> >   	}

Hi Zhiyong,

I think we can keep using is_vhost_user_by_type() to
determine the ops for dev->ops. And you just need to
add a check in the vhost-kernel case. Something like
this:

--- i/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ w/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -270,6 +270,9 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	if (is_vhost_user_by_type(dev->path)) {
 		dev->ops = &ops_user;
 	} else {
+		if (dev->is_server)
+			return -1;
+
 		dev->ops = &ops_kernel;
 
 		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));


Thanks

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-04  5:37         ` Tiwei Bie
@ 2018-04-04  9:59           ` Yang, Zhiyong
  2018-04-04 14:57             ` Yang, Zhiyong
  0 siblings, 1 reply; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-04  9:59 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: Tan, Jianfeng, dev, maxime.coquelin, thomas, Wang, Zhihong



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Wednesday, April 4, 2018 1:37 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; dev@dpdk.org;
> maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> <zhihong.wang@intel.com>
> Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
> 
> On Tue, Apr 03, 2018 at 11:16:26PM +0800, Tan, Jianfeng wrote:
> > On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> [...]
> > > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> *dev)
> > >   	dev->vhostfds = NULL;
> > >   	dev->tapfds = NULL;
> >
> > Add a check here:
> > if (dev->is_server && !is_vhost_user_by_type(dev->path))
> >         return error;
> >
> > > -	if (is_vhost_user_by_type(dev->path)) {
> > > -		dev->ops = &ops_user;
> > > +	if (dev->is_server) {
> > > +		dev->ops = &ops_user;/* server mode only supports vhost
> user*/
> > >   	} else {
> > > -		dev->ops = &ops_kernel;
> > > -
> > > -		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> > > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > > -		if (!dev->vhostfds || !dev->tapfds) {
> > > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > > -			return -1;
> > > -		}
> > > -
> > > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > -			dev->vhostfds[q] = -1;
> > > -			dev->tapfds[q] = -1;
> > > +		if (is_vhost_user_by_type(dev->path)) {
> > > +			dev->ops = &ops_user;
> > > +		} else {
> > > +			dev->ops = &ops_kernel;
> > > +
> > > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > > +					       sizeof(int));
> > > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > > +					     sizeof(int));
> > > +			if (!dev->vhostfds || !dev->tapfds) {
> > > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > > +				return -1;
> > > +			}
> > > +
> > > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > +				dev->vhostfds[q] = -1;
> > > +				dev->tapfds[q] = -1;
> > > +			}
> > >   		}
> > >   	}
> 
> Hi Zhiyong,
> 
> I think we can keep using is_vhost_user_by_type() to determine the ops for
> dev->ops. And you just need to add a check in the vhost-kernel case.
> Something like
> this:
> 
> --- i/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ w/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -270,6 +270,9 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>  	if (is_vhost_user_by_type(dev->path)) {
>  		dev->ops = &ops_user;
>  	} else {
> +		if (dev->is_server)
> +			return -1;
> +
>  		dev->ops = &ops_kernel;
> 
>  		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> 

Ok, thanks, tiwei.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
  2018-04-04  9:59           ` Yang, Zhiyong
@ 2018-04-04 14:57             ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-04 14:57 UTC (permalink / raw)
  To: Yang, Zhiyong, Bie, Tiwei
  Cc: Tan, Jianfeng, dev, maxime.coquelin, thomas, Wang, Zhihong



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yang, Zhiyong
> Sent: Wednesday, April 4, 2018 5:59 PM
> To: Bie, Tiwei <tiwei.bie@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; dev@dpdk.org;
> maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> <zhihong.wang@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v4 1/1] net/virtio-user: add support for
> server mode
> 
> 
> 
> > -----Original Message-----
> > From: Bie, Tiwei
> > Sent: Wednesday, April 4, 2018 1:37 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>
> > Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; dev@dpdk.org;
> > maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> > <zhihong.wang@intel.com>
> > Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server
> > mode
> >
> > On Tue, Apr 03, 2018 at 11:16:26PM +0800, Tan, Jianfeng wrote:
> > > On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> > [...]
> > > > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> > *dev)
> > > >   	dev->vhostfds = NULL;
> > > >   	dev->tapfds = NULL;
> > >
> > > Add a check here:
> > > if (dev->is_server && !is_vhost_user_by_type(dev->path))
> > >         return error;
> > >
> > > > -	if (is_vhost_user_by_type(dev->path)) {
> > > > -		dev->ops = &ops_user;
> > > > +	if (dev->is_server) {
> > > > +		dev->ops = &ops_user;/* server mode only supports vhost
> > user*/
> > > >   	} else {
> > > > -		dev->ops = &ops_kernel;
> > > > -
> > > > -		dev->vhostfds = malloc(dev->max_queue_pairs *
> > sizeof(int));
> > > > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > > > -		if (!dev->vhostfds || !dev->tapfds) {
> > > > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > > > -			return -1;
> > > > -		}
> > > > -
> > > > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > > -			dev->vhostfds[q] = -1;
> > > > -			dev->tapfds[q] = -1;
> > > > +		if (is_vhost_user_by_type(dev->path)) {
> > > > +			dev->ops = &ops_user;
> > > > +		} else {
> > > > +			dev->ops = &ops_kernel;
> > > > +
> > > > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > > > +					       sizeof(int));
> > > > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > > > +					     sizeof(int));
> > > > +			if (!dev->vhostfds || !dev->tapfds) {
> > > > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > > > +				return -1;
> > > > +			}
> > > > +
> > > > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > > +				dev->vhostfds[q] = -1;
> > > > +				dev->tapfds[q] = -1;
> > > > +			}
> > > >   		}
> > > >   	}
> >
> > Hi Zhiyong,
> >
> > I think we can keep using is_vhost_user_by_type() to determine the ops
> > for
> > dev->ops. And you just need to add a check in the vhost-kernel case.
> > Something like
> > this:
> >
> > --- i/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > +++ w/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > @@ -270,6 +270,9 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> >  	if (is_vhost_user_by_type(dev->path)) {
> >  		dev->ops = &ops_user;
> >  	} else {
> > +		if (dev->is_server)
> > +			return -1;
> > +
> >  		dev->ops = &ops_kernel;
> >
> >  		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));

Tiwei, Jianfeng,
Think again, is_vhost_user_by_type() does not help judge user space or kernel driver for server mode.
As no socket file exists here for both.

Thanks
Zhiyong


^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v5] net/virtio-user: add support for server mode
  2018-04-03 12:20     ` [PATCH v4 1/1] net/virtio-user: add support for server mode zhiyong.yang
  2018-04-03 15:16       ` Tan, Jianfeng
@ 2018-04-04 17:17       ` zhiyong.yang
  2018-04-05  8:29         ` Tiwei Bie
                           ` (2 more replies)
  1 sibling, 3 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-04-04 17:17 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, thomas, jianfeng.tan, zhihong.wang, tiwei.bie,
	dong1.wang, Zhiyong Yang

In a container environment if the vhost-user backend restarts, there's
no way for it to reconnect to virtio-user. To address this, support for
server mode is added. In this mode the socket file is created by virtio-
user, which the backend then connects to. This means that if the backend
restarts, it can reconnect to virtio-user and continue communications.

With current implementation, LSC is enabled at virtio-user side to
support to accept the coming connection.

Release note is updated in this patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---

Changes in V5:
1. Support server mode virtio-user startup in non-blocking mode.
2. rebase on top of dpdk-next-virtio.

Changes in V4:
1. Don't create new pthread any more and use librte_eal interrupt thread.
2. virtio-user doesn't work in blocking mode any more for the first connection.
Client mode vhost-user startups firstly, then server mode virtio-user creates
socket file and startups. Keep consistency with usage of client mode
virtio-user. 

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

 doc/guides/rel_notes/release_18_05.rst           |  6 ++
 drivers/net/virtio/virtio_user/vhost_user.c      | 47 ++++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 67 ++++++++++------
 drivers/net/virtio/virtio_user/virtio_user_dev.h |  3 +
 drivers/net/virtio/virtio_user_ethdev.c          | 98 +++++++++++++++++++++---
 5 files changed, 180 insertions(+), 41 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 9cc77f893..f8897b2e9 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,12 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added support for virtio-user server mode.**
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend connects to. This means that if the backend restarts, it can reconnect
+  to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..f08dfeda9 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,34 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+#define MAX_VIRTIO_USER_BACKLOG 1
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int flag;
+	int fd = dev->listenfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		goto err;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	flag = fcntl(fd, F_GETFL);
+	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
+	dev->vhostfd = -1;
+
+	return 0;
+err:
+	close(dev->listenfd);
+	return -1;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -390,6 +418,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
 {
 	int fd;
 	int flag;
+	int ret = 0;
 	struct sockaddr_un un;
 
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,14 +434,20 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		dev->listenfd = fd;
+		ret = virtio_user_start_server(dev, &un);
+	} else {
+		dev->vhostfd = fd;
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
 	}
 
-	dev->vhostfd = fd;
-	return 0;
+	return ret;
 }
 
 static int
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..45e324679 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -254,7 +254,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
 	eth_dev->intr_handle->fd = -1;
 	if (dev->vhostfd >= 0)
 		eth_dev->intr_handle->fd = dev->vhostfd;
-
+	else if (dev->is_server)
+		eth_dev->intr_handle->fd = dev->listenfd;
 	return 0;
 }
 
@@ -267,24 +268,29 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
+	if (dev->is_server) {
+		dev->ops = &ops_user;/* server mode only supports vhost user */
 	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
-			return -1;
-		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
-
 	if (dev->ops->setup(dev) < 0)
 		return -1;
 
@@ -337,16 +343,21 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
 		return -1;
 	}
 
-	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
-		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
-		return -1;
-	}
+	if (dev->vhostfd >= 0) {
+		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
+			PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
+			return -1;
+		}
 
-	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
-			    &dev->device_features) < 0) {
-		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
-		return -1;
+		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
+				&dev->device_features) < 0) {
+			PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
+			return -1;
+		}
+	} else {
+		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
 	}
+
 	if (dev->mac_specified)
 		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
 
@@ -388,6 +399,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0) {
+		close(dev->listenfd);
+		dev->listenfd = -1;
+	}
+
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +412,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 5f8755771..ade727e46 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,7 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +14,8 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd */
+	bool		is_server;  /* server or client mode */
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..c34749953 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -24,15 +24,72 @@
 #define virtio_user_get_dev(hw) \
 	((struct virtio_user_dev *)(hw)->virtio_user_dev)
 
+static void
+virtio_user_server_reconnect(struct virtio_user_dev *dev)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+
+	connectfd = accept(dev->listenfd, NULL, NULL);
+	if (connectfd < 0)
+		return;
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag | O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt disable failed");
+			return;
+		}
+		rte_intr_callback_unregister(eth_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
+	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
+}
+
 static void
 virtio_user_delayed_handler(void *param)
 {
 	struct virtio_hw *hw = (struct virtio_hw *)param;
-	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-	rte_intr_callback_unregister(dev->intr_handle,
-				     virtio_interrupt_handler,
-				     dev);
+	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+		PMD_DRV_LOG(ERR, "interrupt disable failed");
+		return;
+	}
+	rte_intr_callback_unregister(eth_dev->intr_handle,
+				     virtio_interrupt_handler, eth_dev);
+	if (dev->is_server) {
+		if (dev->vhostfd >= 0) {
+			close(dev->vhostfd);
+			dev->vhostfd = -1;
+		}
+		eth_dev->intr_handle->fd = dev->listenfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
 }
 
 static void
@@ -67,12 +124,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
 				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
 					    hw->port_id);
-				/* Only client mode is available now. Once the
-				 * connection is broken, it can never be up
-				 * again. Besides, this function could be called
-				 * in the process of interrupt handling,
-				 * callback cannot be unregistered here, set an
-				 * alarm to do it.
+
+				/* This function could be called in the process
+				 * of interrupt handling, callback cannot be
+				 * unregistered here, set an alarm to do it.
 				 */
 				rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
@@ -85,7 +140,9 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
 				return;
 			}
-		}
+		} else if (dev->is_server)
+			virtio_user_server_reconnect(dev);
+
 		*(uint16_t *)dst = dev->status;
 	}
 
@@ -278,12 +335,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -378,10 +438,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
 	int ret = -1;
+	struct virtio_user_dev *vu_dev = NULL;
 
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
 	if (!kvlist) {
@@ -445,6 +507,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -476,6 +547,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH v5] net/virtio-user: add support for server mode
  2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
@ 2018-04-05  8:29         ` Tiwei Bie
  2018-04-05  9:19           ` Yang, Zhiyong
  2018-04-06  7:22           ` Yang, Zhiyong
  2018-04-05  9:21         ` Yang, Zhiyong
  2018-04-06  0:18         ` [PATCH v6] " zhiyong.yang
  2 siblings, 2 replies; 65+ messages in thread
From: Tiwei Bie @ 2018-04-05  8:29 UTC (permalink / raw)
  To: zhiyong.yang
  Cc: dev, maxime.coquelin, thomas, jianfeng.tan, zhihong.wang, dong1.wang

On Thu, Apr 05, 2018 at 01:17:53AM +0800, zhiyong.yang@intel.com wrote:
[...]
> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
> +{
> +	int ret;
> +	int flag;
> +	int fd = dev->listenfd;
> +
> +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
> +			    dev->path, strerror(errno));
> +		goto err;
> +	}
> +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> +	if (ret < 0)
> +		goto err;
> +
> +	flag = fcntl(fd, F_GETFL);
> +	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> +	dev->vhostfd = -1;
> +
> +	return 0;
> +err:
> +	close(dev->listenfd);

The dev->listenfd isn't created in this function, maybe it's
better to avoid closing this file in this function.

> +	return -1;
> +}
> +
>  /**
>   * Set up environment to talk with a vhost user backend.
>   *
> @@ -390,6 +418,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
>  {
>  	int fd;
>  	int flag;
> +	int ret = 0;
>  	struct sockaddr_un un;
>  
>  	fd = socket(AF_UNIX, SOCK_STREAM, 0);
> @@ -405,14 +434,20 @@ vhost_user_setup(struct virtio_user_dev *dev)
>  	memset(&un, 0, sizeof(un));
>  	un.sun_family = AF_UNIX;
>  	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> -		close(fd);
> -		return -1;
> +
> +	if (dev->is_server) {
> +		dev->listenfd = fd;
> +		ret = virtio_user_start_server(dev, &un);
> +	} else {

Maybe it's better to keep the style consistent. How
about something like this:

	if (dev->is_server) {
		if (virtio_user_start_server(fd, &un) < 0) {
			PMD_DRV_LOG(ERR, some messages...);
			close(fd);
			return -1;
		}
		dev->listenfd = fd;
		dev->vhostfd = -1;
	} else {

> +		dev->vhostfd = fd;
> +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> +			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> +			close(fd);
> +			return -1;
> +		}
>  	}
>  
> -	dev->vhostfd = fd;
> -	return 0;
> +	return ret;
>  }
>  
>  static int
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..45e324679 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -254,7 +254,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
>  	eth_dev->intr_handle->fd = -1;
>  	if (dev->vhostfd >= 0)
>  		eth_dev->intr_handle->fd = dev->vhostfd;
> -

Maybe it's better to keep this empty line (keep it before the return 0).

> +	else if (dev->is_server)
> +		eth_dev->intr_handle->fd = dev->listenfd;
>  	return 0;
>  }
>  
> @@ -267,24 +268,29 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>  	dev->vhostfds = NULL;
>  	dev->tapfds = NULL;
>  
> -	if (is_vhost_user_by_type(dev->path)) {
> -		dev->ops = &ops_user;
> +	if (dev->is_server) {
> +		dev->ops = &ops_user;/* server mode only supports vhost user */
>  	} else {
> -		dev->ops = &ops_kernel;
> -
> -		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		if (!dev->vhostfds || !dev->tapfds) {
> -			PMD_INIT_LOG(ERR, "Failed to malloc");
> -			return -1;
> -		}
> -
> -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> -			dev->vhostfds[q] = -1;
> -			dev->tapfds[q] = -1;
> +		if (is_vhost_user_by_type(dev->path)) {
> +			dev->ops = &ops_user;
> +		} else {
> +			dev->ops = &ops_kernel;
> +
> +			dev->vhostfds = malloc(dev->max_queue_pairs *
> +					       sizeof(int));
> +			dev->tapfds = malloc(dev->max_queue_pairs *
> +					     sizeof(int));
> +			if (!dev->vhostfds || !dev->tapfds) {
> +				PMD_INIT_LOG(ERR, "Failed to malloc");
> +				return -1;
> +			}
> +
> +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> +				dev->vhostfds[q] = -1;
> +				dev->tapfds[q] = -1;
> +			}
>  		}
>  	}
> -

There is no need to remove this empty line.

>  	if (dev->ops->setup(dev) < 0)
>  		return -1;
>  
> @@ -337,16 +343,21 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
>  		return -1;
>  	}
>  
> -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
> -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> -		return -1;
> -	}
> +	if (dev->vhostfd >= 0) {
> +		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
> +			PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> +			return -1;
> +		}
>  
> -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> -			    &dev->device_features) < 0) {
> -		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
> -		return -1;
> +		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> +				&dev->device_features) < 0) {
> +			PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
> +			return -1;
> +		}
> +	} else {
> +		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;

If the backend doesn't support e.g. VIRTIO_RING_F_INDIRECT_DESC.
Will it cause any problem?

>  	}
> +
>  	if (dev->mac_specified)
>  		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
>  
> @@ -388,6 +399,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>  
>  	close(dev->vhostfd);
>  
> +	if (dev->is_server && dev->listenfd >= 0) {
> +		close(dev->listenfd);
> +		dev->listenfd = -1;
> +	}
> +
>  	if (dev->vhostfds) {
>  		for (i = 0; i < dev->max_queue_pairs; ++i)
>  			close(dev->vhostfds[i]);
> @@ -396,6 +412,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>  	}
>  
>  	free(dev->ifname);
> +
> +	if (dev->is_server)
> +		unlink(dev->path);
>  }
[...]
>  
>  static int
>  get_string_arg(const char *key __rte_unused,
> @@ -378,10 +438,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>  	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
>  	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
>  	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
>  	char *path = NULL;
>  	char *ifname = NULL;
>  	char *mac_addr = NULL;
>  	int ret = -1;
> +	struct virtio_user_dev *vu_dev = NULL;

Maybe it's better to move the definition of vu_dev after
eth_dev. And there isn't no need to initialize it.

Thanks

>  
>  	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
>  	if (!kvlist) {
> @@ -445,6 +507,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>  		}
>  	}
>  
> +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
> +		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
> +				       &get_integer_arg, &server_mode) < 0) {
> +			PMD_INIT_LOG(ERR, "error to parse %s",
> +				     VIRTIO_USER_ARG_SERVER_MODE);
> +			goto end;
> +		}
> +	}
> +
>  	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
>  		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
>  				       &get_integer_arg, &cq) < 0) {
> @@ -476,6 +547,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>  		}
>  
>  		hw = eth_dev->data->dev_private;
> +		vu_dev = virtio_user_get_dev(hw);
> +		if (server_mode == 1)
> +			vu_dev->is_server = true;
> +		else
> +			vu_dev->is_server = false;
>  		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
>  				 queue_size, mac_addr, &ifname) < 0) {
>  			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
> -- 
> 2.14.3
> 

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v5] net/virtio-user: add support for server mode
  2018-04-05  8:29         ` Tiwei Bie
@ 2018-04-05  9:19           ` Yang, Zhiyong
  2018-04-06  7:22           ` Yang, Zhiyong
  1 sibling, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-05  9:19 UTC (permalink / raw)
  To: Bie, Tiwei
  Cc: dev, maxime.coquelin, thomas, Tan, Jianfeng, Wang, Zhihong, Wang, Dong1

Tiwei,

Thanks  a lot for your review and comments.

Reply inline.

> -----Original Message-----
> From: Bie, Tiwei
> Sent: Thursday, April 5, 2018 4:29 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: dev@dpdk.org; maxime.coquelin@redhat.com; thomas@monjalon.net;
> Tan, Jianfeng <jianfeng.tan@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>; Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH v5] net/virtio-user: add support for server mode
> 
> On Thu, Apr 05, 2018 at 01:17:53AM +0800, zhiyong.yang@intel.com wrote:
> [...]
> > +static int
> > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > +sockaddr_un *un) {
> > +	int ret;
> > +	int flag;
> > +	int fd = dev->listenfd;
> > +
> > +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > +	if (ret < 0) {
> > +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> > +			    dev->path, strerror(errno));
> > +		goto err;
> > +	}
> > +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > +	if (ret < 0)
> > +		goto err;
> > +
> > +	flag = fcntl(fd, F_GETFL);
> > +	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> > +	dev->vhostfd = -1;
> > +
> > +	return 0;
> > +err:
> > +	close(dev->listenfd);
> 
> The dev->listenfd isn't created in this function, maybe it's better to avoid
> closing this file in this function.
> 

Ok.

> > +	return -1;
> > +}
> > +
> >  /**
> >   * Set up environment to talk with a vhost user backend.
> >   *
> > @@ -390,6 +418,7 @@ vhost_user_setup(struct virtio_user_dev *dev)  {
> >  	int fd;
> >  	int flag;
> > +	int ret = 0;
> >  	struct sockaddr_un un;
> >
> >  	fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -405,14 +434,20 @@
> > vhost_user_setup(struct virtio_user_dev *dev)
> >  	memset(&un, 0, sizeof(un));
> >  	un.sun_family = AF_UNIX;
> >  	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > -		close(fd);
> > -		return -1;
> > +
> > +	if (dev->is_server) {
> > +		dev->listenfd = fd;
> > +		ret = virtio_user_start_server(dev, &un);
> > +	} else {
> 
> Maybe it's better to keep the style consistent. How about something like this:
> 
> 	if (dev->is_server) {
> 		if (virtio_user_start_server(fd, &un) < 0) {
> 			PMD_DRV_LOG(ERR, some messages...);
> 			close(fd);
> 			return -1;
> 		}
> 		dev->listenfd = fd;
> 		dev->vhostfd = -1;
> 	} else {
> 

Ok. it looks better.

So, the following code changes also.

> > +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > +			PMD_DRV_LOG(ERR, "connect error, %s",
> strerror(errno));
> > +			close(fd);
> > +			return -1;
> > +		}
> > +		dev->vhostfd = fd;

Keep consistency.

> >  	}
> >
> > -	dev->vhostfd = fd;
> > -	return 0;
> > +	return ret;
> >  }
> >
> >  static int
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > index f90fee9e5..45e324679 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > @@ -254,7 +254,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev
> *dev)
> >  	eth_dev->intr_handle->fd = -1;
> >  	if (dev->vhostfd >= 0)
> >  		eth_dev->intr_handle->fd = dev->vhostfd;
> > -
> 
> Maybe it's better to keep this empty line (keep it before the return 0).
>

Ok.
 
> > +	else if (dev->is_server)
> > +		eth_dev->intr_handle->fd = dev->listenfd;
> >  	return 0;
> >  }
> >
> > @@ -267,24 +268,29 @@ virtio_user_dev_setup(struct virtio_user_dev
> *dev)
> >  	dev->vhostfds = NULL;
> >  	dev->tapfds = NULL;
> >
> > -	if (is_vhost_user_by_type(dev->path)) {
> > -		dev->ops = &ops_user;
> > +	if (dev->is_server) {
> > +		dev->ops = &ops_user;/* server mode only supports vhost
> user */
> >  	} else {
> > -		dev->ops = &ops_kernel;
> > -
> > -		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> > -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > -		if (!dev->vhostfds || !dev->tapfds) {
> > -			PMD_INIT_LOG(ERR, "Failed to malloc");
> > -			return -1;
> > -		}
> > -
> > -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> > -			dev->vhostfds[q] = -1;
> > -			dev->tapfds[q] = -1;
> > +		if (is_vhost_user_by_type(dev->path)) {
> > +			dev->ops = &ops_user;
> > +		} else {
> > +			dev->ops = &ops_kernel;
> > +
> > +			dev->vhostfds = malloc(dev->max_queue_pairs *
> > +					       sizeof(int));
> > +			dev->tapfds = malloc(dev->max_queue_pairs *
> > +					     sizeof(int));
> > +			if (!dev->vhostfds || !dev->tapfds) {
> > +				PMD_INIT_LOG(ERR, "Failed to malloc");
> > +				return -1;
> > +			}
> > +
> > +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> > +				dev->vhostfds[q] = -1;
> > +				dev->tapfds[q] = -1;
> > +			}
> >  		}
> >  	}
> > -
> 
> There is no need to remove this empty line.
> 
Ok
> >  	if (dev->ops->setup(dev) < 0)
> >  		return -1;
> >
> > @@ -337,16 +343,21 @@ virtio_user_dev_init(struct virtio_user_dev *dev,
> char *path, int queues,
> >  		return -1;
> >  	}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL)
> < 0) {
> > -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> > -		return -1;
> > -	}
> > +	if (dev->vhostfd >= 0) {
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_SET_OWNER, NULL) < 0) {
> > +			PMD_INIT_LOG(ERR, "set_owner fails: %s",
> strerror(errno));
> > +			return -1;
> > +		}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> > -			    &dev->device_features) < 0) {
> > -		PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > -		return -1;
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_GET_FEATURES,
> > +				&dev->device_features) < 0) {
> > +			PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > +			return -1;
> > +		}
> > +	} else {
> > +		dev->device_features =
> VIRTIO_USER_SUPPORTED_FEATURES;
> 
> If the backend doesn't support e.g. VIRTIO_RING_F_INDIRECT_DESC.
> Will it cause any problem?
> 
Let me try it  and see what will be happening.

> >  	}
> > +
> >  	if (dev->mac_specified)
> >  		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
> >
> > @@ -388,6 +399,11 @@ virtio_user_dev_uninit(struct virtio_user_dev
> > *dev)
> >
> >  	close(dev->vhostfd);
> >
> > +	if (dev->is_server && dev->listenfd >= 0) {
> > +		close(dev->listenfd);
> > +		dev->listenfd = -1;
> > +	}
> > +
> >  	if (dev->vhostfds) {
> >  		for (i = 0; i < dev->max_queue_pairs; ++i)
> >  			close(dev->vhostfds[i]);
> > @@ -396,6 +412,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
> >  	}
> >
> >  	free(dev->ifname);
> > +
> > +	if (dev->is_server)
> > +		unlink(dev->path);
> >  }
> [...]
> >
> >  static int
> >  get_string_arg(const char *key __rte_unused, @@ -378,10 +438,12 @@
> > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> >  	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> >  	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> >  	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> > +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> >  	char *path = NULL;
> >  	char *ifname = NULL;
> >  	char *mac_addr = NULL;
> >  	int ret = -1;
> > +	struct virtio_user_dev *vu_dev = NULL;
> 
> Maybe it's better to move the definition of vu_dev after eth_dev. And there
> isn't no need to initialize it.
> 

Ok.

thanks
Zhiyong


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v5] net/virtio-user: add support for server mode
  2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
  2018-04-05  8:29         ` Tiwei Bie
@ 2018-04-05  9:21         ` Yang, Zhiyong
  2018-04-06  0:18         ` [PATCH v6] " zhiyong.yang
  2 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-05  9:21 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, thomas, Tan, Jianfeng, Wang, Zhihong, Bie,
	Tiwei, Wang, Dong1

Ping Maxime, Jianfeng

Do you have any comments about the patch?

Thanks
Zhiyong

> -----Original Message-----
> From: Yang, Zhiyong
> Sent: Thursday, April 5, 2018 1:18 AM
> To: dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Tan, Jianfeng
> <jianfeng.tan@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Dong1 <dong1.wang@intel.com>; Yang,
> Zhiyong <zhiyong.yang@intel.com>
> Subject: [PATCH v5] net/virtio-user: add support for server mode
> 
> In a container environment if the vhost-user backend restarts, there's no
> way for it to reconnect to virtio-user. To address this, support for server
> mode is added. In this mode the socket file is created by virtio- user, which
> the backend then connects to. This means that if the backend restarts, it can
> reconnect to virtio-user and continue communications.
> 
> With current implementation, LSC is enabled at virtio-user side to support to
> accept the coming connection.
> 
> Release note is updated in this patch.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v6] net/virtio-user: add support for server mode
  2018-04-06  0:18         ` [PATCH v6] " zhiyong.yang
@ 2018-04-05 18:13           ` Tan, Jianfeng
  2018-04-06  7:14             ` Yang, Zhiyong
  2018-04-06  9:25           ` [PATCH v7] " zhiyong.yang
  1 sibling, 1 reply; 65+ messages in thread
From: Tan, Jianfeng @ 2018-04-05 18:13 UTC (permalink / raw)
  To: zhiyong.yang, dev; +Cc: maxime.coquelin, tiwei.bie, dong1.wang, zhihong.wang



On 4/6/2018 8:18 AM, zhiyong.yang@intel.com wrote:
> In a container environment if the vhost-user backend restarts, there's
> no way for it to reconnect to virtio-user. To address this, support for
> server mode is added. In this mode the socket file is created by virtio-
> user, which the backend then connects to. This means that if the backend
> restarts, it can reconnect to virtio-user and continue communications.
>
> With current implementation, LSC is enabled at virtio-user side to
> support to accept the coming connection.
>
> Release note is updated in this patch.
>
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>
> Changes in V6:
> 1. fix report wrong link stauts in server mode.
> 2. fix some code style issues.
>
> Changes in V5:
> 1. Support server mode virtio-user startup in non-blocking mode.
> 2. rebase on top of dpdk-next-virtio.
>
> Changes in V4:
> 1. Don't create new pthread any more and use librte_eal interrupt thread.
> 2. virtio-user doesn't work in blocking mode any more for the first connection.
> Client mode vhost-user startups firstly, then server mode virtio-user creates
> socket file and startups. Keep consistency with usage of client mode
> virtio-user.
>
> Changes in V3:
> 1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
> event APIs.
> 2. rebase the code on top of dpdk-next-virtio
>
> Changes in V2:
> 1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
> which is not strongly related to support for server mode
> 2. move fdset related functions to librte_eal from librte_vhost exposed as
> new APIs.
> 3. release note is added in the patch 5/5.
> 4. squash data structure change patch into 4/5 according to Maxime's suggestion.
>
>   doc/guides/rel_notes/release_18_05.rst           |   6 ++
>   drivers/net/virtio/virtio_user/vhost_user.c      |  45 ++++++++--
>   drivers/net/virtio/virtio_user/virtio_user_dev.c |  40 +++++++--
>   drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
>   drivers/net/virtio/virtio_user_ethdev.c          | 101 ++++++++++++++++++++---
>   5 files changed, 171 insertions(+), 24 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index 9cc77f893..f8897b2e9 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -58,6 +58,12 @@ New Features
>     * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
>     * Added support for DROP action in flow API.
>   
> +* **Added support for virtio-user server mode.**
> +  In a container environment if the vhost-user backend restarts, there's no way
> +  for it to reconnect to virtio-user. To address this, support for server mode
> +  is added. In this mode the socket file is created by virtio-user, which the
> +  backend connects to. This means that if the backend restarts, it can reconnect
> +  to virtio-user and continue communications.
>   
>   API Changes
>   -----------
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index 91c6449bb..a6df97a00 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   	return 0;
>   }
>   
> +#define MAX_VIRTIO_USER_BACKLOG 1
> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
> +{
> +	int ret;
> +	int flag;
> +	int fd = dev->listenfd;
> +
> +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
> +			    dev->path, strerror(errno));
> +		return -1;
> +	}
> +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> +	if (ret < 0)
> +		return -1;
> +
> +	flag = fcntl(fd, F_GETFL);
> +	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> +
> +	return 0;
> +}
> +
>   /**
>    * Set up environment to talk with a vhost user backend.
>    *
> @@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
>   	memset(&un, 0, sizeof(un));
>   	un.sun_family = AF_UNIX;
>   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> -		close(fd);
> -		return -1;
> +
> +	if (dev->is_server) {
> +		dev->listenfd = fd;
> +		if (virtio_user_start_server(dev, &un) < 0) {
> +			PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
> +			close(fd);
> +			return -1;
> +		}
> +		dev->vhostfd = -1;
> +	} else {
> +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> +			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> +			close(fd);
> +			return -1;
> +		}
> +		dev->vhostfd = fd;
>   	}
>   
> -	dev->vhostfd = fd;
>   	return 0;
>   }
>   
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..3b776282c 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -99,6 +99,9 @@ virtio_user_start_device(struct virtio_user_dev *dev)
>   	uint64_t features;
>   	int ret;
>   
> +	if (dev->vhostfd < 0)
> +		return -1;

This breaks virtio-user with vhost-kernel which does not need vhostfd, 
and will be always -1.

> +
>   	/* Do not check return as already done in init, or reset in stop */
>   	dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
>   
> @@ -254,6 +257,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
>   	eth_dev->intr_handle->fd = -1;
>   	if (dev->vhostfd >= 0)
>   		eth_dev->intr_handle->fd = dev->vhostfd;
> +	else if (dev->is_server)
> +		eth_dev->intr_handle->fd = dev->listenfd;
>   
>   	return 0;
>   }
> @@ -267,7 +272,7 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>   	dev->vhostfds = NULL;
>   	dev->tapfds = NULL;
>   
> -	if (is_vhost_user_by_type(dev->path)) {
> +	if (dev->is_server || is_vhost_user_by_type(dev->path)) {

I think we still fail to pick out an invalidated case: specify "server" 
parameter for a vhost-kernel path.

>   		dev->ops = &ops_user;
>   	} else {
>   		dev->ops = &ops_kernel;
> @@ -337,16 +342,25 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
>   		return -1;
>   	}
>   
> -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
> -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> -		return -1;
> -	}
> +	if (dev->vhostfd >= 0) {
> +		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
> +					   NULL) < 0) {
> +			PMD_INIT_LOG(ERR, "set_owner fails: %s",
> +				     strerror(errno));
> +			return -1;
> +		}
>   
> -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> -			    &dev->device_features) < 0) {
> -		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
> -		return -1;
> +		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> +					   &dev->device_features) < 0) {
> +			PMD_INIT_LOG(ERR, "get_features failed: %s",
> +				     strerror(errno));
> +			return -1;
> +		}
> +	} else {
> +		/* Just pretend vhost-user can support all these features */

Although I don't think we need to fix this, please also note that this 
could be problematic that if some feature is negotiated but not 
supported by the vhost-user which comes later.

> +		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
>   	}
> +
>   	if (dev->mac_specified)
>   		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
>   
> @@ -388,6 +402,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   
>   	close(dev->vhostfd);
>   
> +	if (dev->is_server && dev->listenfd >= 0) {
> +		close(dev->listenfd);
> +		dev->listenfd = -1;
> +	}
> +
>   	if (dev->vhostfds) {
>   		for (i = 0; i < dev->max_queue_pairs; ++i)
>   			close(dev->vhostfds[i]);
> @@ -396,6 +415,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>   	}
>   
>   	free(dev->ifname);
> +
> +	if (dev->is_server)
> +		unlink(dev->path);
>   }
>   
>   static uint8_t
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 5f8755771..ade727e46 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,6 +6,7 @@
>   #define _VIRTIO_USER_DEV_H
>   
>   #include <limits.h>
> +#include <stdbool.h>
>   #include "../virtio_pci.h"
>   #include "../virtio_ring.h"
>   #include "vhost.h"
> @@ -13,6 +14,8 @@
>   struct virtio_user_dev {
>   	/* for vhost_user backend */
>   	int		vhostfd;
> +	int		listenfd;   /* listening fd */
> +	bool		is_server;  /* server or client mode */
>   
>   	/* for vhost_kernel backend */
>   	char		*ifname;
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index 263649006..4e7b3c34f 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -24,15 +24,73 @@
>   #define virtio_user_get_dev(hw) \
>   	((struct virtio_user_dev *)(hw)->virtio_user_dev)
>   
> +static int
> +virtio_user_server_reconnect(struct virtio_user_dev *dev)
> +{
> +	int ret;
> +	int flag;
> +	int connectfd;
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> +
> +	connectfd = accept(dev->listenfd, NULL, NULL);
> +	if (connectfd < 0)
> +		return -1;
> +
> +	dev->vhostfd = connectfd;
> +	flag = fcntl(connectfd, F_GETFD);
> +	fcntl(connectfd, F_SETFL, flag | O_NONBLOCK);
> +
> +	ret = virtio_user_start_device(dev);
> +	if (ret < 0)
> +		return -1;
> +
> +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> +		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt disable failed");
> +			return -1;
> +		}
> +		rte_intr_callback_unregister(eth_dev->intr_handle,
> +					     virtio_interrupt_handler,
> +					     eth_dev);
> +		eth_dev->intr_handle->fd = connectfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return -1;
> +		}
> +	}
> +	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
> +	return 0;
> +}
> +
>   static void
>   virtio_user_delayed_handler(void *param)
>   {
>   	struct virtio_hw *hw = (struct virtio_hw *)param;
> -	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> +	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
>   
> -	rte_intr_callback_unregister(dev->intr_handle,
> -				     virtio_interrupt_handler,
> -				     dev);
> +	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +		PMD_DRV_LOG(ERR, "interrupt disable failed");
> +		return;
> +	}
> +	rte_intr_callback_unregister(eth_dev->intr_handle,
> +				     virtio_interrupt_handler, eth_dev);
> +	if (dev->is_server) {
> +		if (dev->vhostfd >= 0) {
> +			close(dev->vhostfd);
> +			dev->vhostfd = -1;
> +		}
> +		eth_dev->intr_handle->fd = dev->listenfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return;
> +		}
> +	}
>   }
>   
>   static void
> @@ -67,12 +125,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   				dev->status &= (~VIRTIO_NET_S_LINK_UP);
>   				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
>   					    hw->port_id);
> -				/* Only client mode is available now. Once the
> -				 * connection is broken, it can never be up
> -				 * again. Besides, this function could be called
> -				 * in the process of interrupt handling,
> -				 * callback cannot be unregistered here, set an
> -				 * alarm to do it.
> +
> +				/* This function could be called in the process
> +				 * of interrupt handling, callback cannot be
> +				 * unregistered here, set an alarm to do it.
>   				 */
>   				rte_eal_alarm_set(1,
>   						  virtio_user_delayed_handler,
> @@ -85,7 +141,12 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
>   				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
>   				return;
>   			}
> +		} else if (dev->is_server) {
> +			dev->status &= (~VIRTIO_NET_S_LINK_UP);
> +			if (virtio_user_server_reconnect(dev) >= 0)
> +				dev->status |= VIRTIO_NET_S_LINK_UP;
>   		}
> +
>   		*(uint16_t *)dst = dev->status;
>   	}
>   
> @@ -278,12 +339,15 @@ static const char *valid_args[] = {
>   	VIRTIO_USER_ARG_QUEUE_SIZE,
>   #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
>   	VIRTIO_USER_ARG_INTERFACE_NAME,
> +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> +	VIRTIO_USER_ARG_SERVER_MODE,
>   	NULL
>   };
>   
>   #define VIRTIO_USER_DEF_CQ_EN	0
>   #define VIRTIO_USER_DEF_Q_NUM	1
>   #define VIRTIO_USER_DEF_Q_SZ	256
> +#define VIRTIO_USER_DEF_SERVER_MODE	0
>   
>   static int
>   get_string_arg(const char *key __rte_unused,
> @@ -378,6 +442,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
>   	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
>   	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
>   	char *path = NULL;
>   	char *ifname = NULL;
>   	char *mac_addr = NULL;
> @@ -445,6 +510,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   	}
>   
> +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
> +		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
> +				       &get_integer_arg, &server_mode) < 0) {
> +			PMD_INIT_LOG(ERR, "error to parse %s",
> +				     VIRTIO_USER_ARG_SERVER_MODE);
> +			goto end;
> +		}
> +	}
> +
>   	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
>   		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
>   				       &get_integer_arg, &cq) < 0) {
> @@ -469,6 +543,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   	}
>   
>   	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		struct virtio_user_dev *vu_dev;
> +
>   		eth_dev = virtio_user_eth_dev_alloc(dev);
>   		if (!eth_dev) {
>   			PMD_INIT_LOG(ERR, "virtio_user fails to alloc device");
> @@ -476,6 +552,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>   		}
>   
>   		hw = eth_dev->data->dev_private;
> +		vu_dev = virtio_user_get_dev(hw);
> +		if (server_mode == 1)
> +			vu_dev->is_server = true;
> +		else
> +			vu_dev->is_server = false;
>   		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
>   				 queue_size, mac_addr, &ifname) < 0) {
>   			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v6] net/virtio-user: add support for server mode
  2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
  2018-04-05  8:29         ` Tiwei Bie
  2018-04-05  9:21         ` Yang, Zhiyong
@ 2018-04-06  0:18         ` zhiyong.yang
  2018-04-05 18:13           ` Tan, Jianfeng
  2018-04-06  9:25           ` [PATCH v7] " zhiyong.yang
  2 siblings, 2 replies; 65+ messages in thread
From: zhiyong.yang @ 2018-04-06  0:18 UTC (permalink / raw)
  To: dev
  Cc: maxime.coquelin, jianfeng.tan, tiwei.bie, dong1.wang,
	zhihong.wang, Zhiyong Yang

In a container environment if the vhost-user backend restarts, there's
no way for it to reconnect to virtio-user. To address this, support for
server mode is added. In this mode the socket file is created by virtio-
user, which the backend then connects to. This means that if the backend
restarts, it can reconnect to virtio-user and continue communications.

With current implementation, LSC is enabled at virtio-user side to
support to accept the coming connection.

Release note is updated in this patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---

Changes in V6:
1. fix report wrong link stauts in server mode.
2. fix some code style issues.

Changes in V5:
1. Support server mode virtio-user startup in non-blocking mode.
2. rebase on top of dpdk-next-virtio.

Changes in V4:
1. Don't create new pthread any more and use librte_eal interrupt thread.
2. virtio-user doesn't work in blocking mode any more for the first connection.
Client mode vhost-user startups firstly, then server mode virtio-user creates
socket file and startups. Keep consistency with usage of client mode
virtio-user. 

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

 doc/guides/rel_notes/release_18_05.rst           |   6 ++
 drivers/net/virtio/virtio_user/vhost_user.c      |  45 ++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c |  40 +++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
 drivers/net/virtio/virtio_user_ethdev.c          | 101 ++++++++++++++++++++---
 5 files changed, 171 insertions(+), 24 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 9cc77f893..f8897b2e9 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,12 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added support for virtio-user server mode.**
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend connects to. This means that if the backend restarts, it can reconnect
+  to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..a6df97a00 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+#define MAX_VIRTIO_USER_BACKLOG 1
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int flag;
+	int fd = dev->listenfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		return -1;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		return -1;
+
+	flag = fcntl(fd, F_GETFL);
+	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
+
+	return 0;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		dev->listenfd = fd;
+		if (virtio_user_start_server(dev, &un) < 0) {
+			PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
+			close(fd);
+			return -1;
+		}
+		dev->vhostfd = -1;
+	} else {
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->vhostfd = fd;
 	}
 
-	dev->vhostfd = fd;
 	return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..3b776282c 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -99,6 +99,9 @@ virtio_user_start_device(struct virtio_user_dev *dev)
 	uint64_t features;
 	int ret;
 
+	if (dev->vhostfd < 0)
+		return -1;
+
 	/* Do not check return as already done in init, or reset in stop */
 	dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
 
@@ -254,6 +257,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
 	eth_dev->intr_handle->fd = -1;
 	if (dev->vhostfd >= 0)
 		eth_dev->intr_handle->fd = dev->vhostfd;
+	else if (dev->is_server)
+		eth_dev->intr_handle->fd = dev->listenfd;
 
 	return 0;
 }
@@ -267,7 +272,7 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
+	if (dev->is_server || is_vhost_user_by_type(dev->path)) {
 		dev->ops = &ops_user;
 	} else {
 		dev->ops = &ops_kernel;
@@ -337,16 +342,25 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
 		return -1;
 	}
 
-	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
-		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
-		return -1;
-	}
+	if (dev->vhostfd >= 0) {
+		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
+					   NULL) < 0) {
+			PMD_INIT_LOG(ERR, "set_owner fails: %s",
+				     strerror(errno));
+			return -1;
+		}
 
-	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
-			    &dev->device_features) < 0) {
-		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
-		return -1;
+		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
+					   &dev->device_features) < 0) {
+			PMD_INIT_LOG(ERR, "get_features failed: %s",
+				     strerror(errno));
+			return -1;
+		}
+	} else {
+		/* Just pretend vhost-user can support all these features */
+		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
 	}
+
 	if (dev->mac_specified)
 		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
 
@@ -388,6 +402,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0) {
+		close(dev->listenfd);
+		dev->listenfd = -1;
+	}
+
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +415,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 5f8755771..ade727e46 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,7 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +14,8 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd */
+	bool		is_server;  /* server or client mode */
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..4e7b3c34f 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -24,15 +24,73 @@
 #define virtio_user_get_dev(hw) \
 	((struct virtio_user_dev *)(hw)->virtio_user_dev)
 
+static int
+virtio_user_server_reconnect(struct virtio_user_dev *dev)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+
+	connectfd = accept(dev->listenfd, NULL, NULL);
+	if (connectfd < 0)
+		return -1;
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag | O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return -1;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt disable failed");
+			return -1;
+		}
+		rte_intr_callback_unregister(eth_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return -1;
+		}
+	}
+	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
+	return 0;
+}
+
 static void
 virtio_user_delayed_handler(void *param)
 {
 	struct virtio_hw *hw = (struct virtio_hw *)param;
-	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-	rte_intr_callback_unregister(dev->intr_handle,
-				     virtio_interrupt_handler,
-				     dev);
+	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+		PMD_DRV_LOG(ERR, "interrupt disable failed");
+		return;
+	}
+	rte_intr_callback_unregister(eth_dev->intr_handle,
+				     virtio_interrupt_handler, eth_dev);
+	if (dev->is_server) {
+		if (dev->vhostfd >= 0) {
+			close(dev->vhostfd);
+			dev->vhostfd = -1;
+		}
+		eth_dev->intr_handle->fd = dev->listenfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
 }
 
 static void
@@ -67,12 +125,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
 				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
 					    hw->port_id);
-				/* Only client mode is available now. Once the
-				 * connection is broken, it can never be up
-				 * again. Besides, this function could be called
-				 * in the process of interrupt handling,
-				 * callback cannot be unregistered here, set an
-				 * alarm to do it.
+
+				/* This function could be called in the process
+				 * of interrupt handling, callback cannot be
+				 * unregistered here, set an alarm to do it.
 				 */
 				rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
@@ -85,7 +141,12 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
 				return;
 			}
+		} else if (dev->is_server) {
+			dev->status &= (~VIRTIO_NET_S_LINK_UP);
+			if (virtio_user_server_reconnect(dev) >= 0)
+				dev->status |= VIRTIO_NET_S_LINK_UP;
 		}
+
 		*(uint16_t *)dst = dev->status;
 	}
 
@@ -278,12 +339,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -378,6 +442,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
@@ -445,6 +510,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -469,6 +543,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	}
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		struct virtio_user_dev *vu_dev;
+
 		eth_dev = virtio_user_eth_dev_alloc(dev);
 		if (!eth_dev) {
 			PMD_INIT_LOG(ERR, "virtio_user fails to alloc device");
@@ -476,6 +552,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH v6] net/virtio-user: add support for server mode
  2018-04-05 18:13           ` Tan, Jianfeng
@ 2018-04-06  7:14             ` Yang, Zhiyong
  0 siblings, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-06  7:14 UTC (permalink / raw)
  To: Tan, Jianfeng, dev
  Cc: maxime.coquelin, Bie, Tiwei, Wang, Dong1, Wang, Zhihong

Hi Jianfeng,

> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Friday, April 6, 2018 2:13 AM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Bie, Tiwei <tiwei.bie@intel.com>; Wang,
> Dong1 <dong1.wang@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Subject: Re: [PATCH v6] net/virtio-user: add support for server mode
> 
> 
> 
> On 4/6/2018 8:18 AM, zhiyong.yang@intel.com wrote:
> > In a container environment if the vhost-user backend restarts, there's
> > no way for it to reconnect to virtio-user. To address this, support
> > for server mode is added. In this mode the socket file is created by
> > virtio- user, which the backend then connects to. This means that if
> > the backend restarts, it can reconnect to virtio-user and continue
> communications.
> >
> > With current implementation, LSC is enabled at virtio-user side to
> > support to accept the coming connection.
> >
> > Release note is updated in this patch.
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> >
> > Changes in V6:
> > 1. fix report wrong link stauts in server mode.
> > 2. fix some code style issues.
> >
> > Changes in V5:
> > 1. Support server mode virtio-user startup in non-blocking mode.
> > 2. rebase on top of dpdk-next-virtio.
> >
> > Changes in V4:
> > 1. Don't create new pthread any more and use librte_eal interrupt thread.
> > 2. virtio-user doesn't work in blocking mode any more for the first
> connection.
> > Client mode vhost-user startups firstly, then server mode virtio-user
> > creates socket file and startups. Keep consistency with usage of
> > client mode virtio-user.
> >
> > Changes in V3:
> > 1. use EAL epoll mechanism instead of vhost events. Cancel to export
> > vhost event APIs.
> > 2. rebase the code on top of dpdk-next-virtio
> >
> > Changes in V2:
> > 1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing
> > issues which is not strongly related to support for server mode 2.
> > move fdset related functions to librte_eal from librte_vhost exposed
> > as new APIs.
> > 3. release note is added in the patch 5/5.
> > 4. squash data structure change patch into 4/5 according to Maxime's
> suggestion.
> >
> >   doc/guides/rel_notes/release_18_05.rst           |   6 ++
> >   drivers/net/virtio/virtio_user/vhost_user.c      |  45 ++++++++--
> >   drivers/net/virtio/virtio_user/virtio_user_dev.c |  40 +++++++--
> >   drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
> >   drivers/net/virtio/virtio_user_ethdev.c          | 101
> ++++++++++++++++++++---
> >   5 files changed, 171 insertions(+), 24 deletions(-)
> >
> > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > b/doc/guides/rel_notes/release_18_05.rst
> > index 9cc77f893..f8897b2e9 100644
> > --- a/doc/guides/rel_notes/release_18_05.rst
> > +++ b/doc/guides/rel_notes/release_18_05.rst
> > @@ -58,6 +58,12 @@ New Features
> >     * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> >     * Added support for DROP action in flow API.
> >
> > +* **Added support for virtio-user server mode.**
> > +  In a container environment if the vhost-user backend restarts,
> > +there's no way
> > +  for it to reconnect to virtio-user. To address this, support for
> > +server mode
> > +  is added. In this mode the socket file is created by virtio-user,
> > +which the
> > +  backend connects to. This means that if the backend restarts, it
> > +can reconnect
> > +  to virtio-user and continue communications.
> >
> >   API Changes
> >   -----------
> > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > b/drivers/net/virtio/virtio_user/vhost_user.c
> > index 91c6449bb..a6df97a00 100644
> > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > @@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
> >   	return 0;
> >   }
> >
> > +#define MAX_VIRTIO_USER_BACKLOG 1
> > +static int
> > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > +sockaddr_un *un) {
> > +	int ret;
> > +	int flag;
> > +	int fd = dev->listenfd;
> > +
> > +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > +	if (ret < 0) {
> > +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> > +			    dev->path, strerror(errno));
> > +		return -1;
> > +	}
> > +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > +	if (ret < 0)
> > +		return -1;
> > +
> > +	flag = fcntl(fd, F_GETFL);
> > +	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> > +
> > +	return 0;
> > +}
> > +
> >   /**
> >    * Set up environment to talk with a vhost user backend.
> >    *
> > @@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
> >   	memset(&un, 0, sizeof(un));
> >   	un.sun_family = AF_UNIX;
> >   	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > -		close(fd);
> > -		return -1;
> > +
> > +	if (dev->is_server) {
> > +		dev->listenfd = fd;
> > +		if (virtio_user_start_server(dev, &un) < 0) {
> > +			PMD_DRV_LOG(ERR, "virtio-user startup fails in
> server mode");
> > +			close(fd);
> > +			return -1;
> > +		}
> > +		dev->vhostfd = -1;
> > +	} else {
> > +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > +			PMD_DRV_LOG(ERR, "connect error, %s",
> strerror(errno));
> > +			close(fd);
> > +			return -1;
> > +		}
> > +		dev->vhostfd = fd;
> >   	}
> >
> > -	dev->vhostfd = fd;
> >   	return 0;
> >   }
> >
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > index f90fee9e5..3b776282c 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > @@ -99,6 +99,9 @@ virtio_user_start_device(struct virtio_user_dev *dev)
> >   	uint64_t features;
> >   	int ret;
> >
> > +	if (dev->vhostfd < 0)
> > +		return -1;
> 
> This breaks virtio-user with vhost-kernel which does not need vhostfd, and
> will be always -1.
> 

Ok, I will modify the code as followings
	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
		return -1; 

(is_vhost_user_by_type(dev->path))
> > +
> >   	/* Do not check return as already done in init, or reset in stop */
> >   	dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
> >
> > @@ -254,6 +257,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev
> *dev)
> >   	eth_dev->intr_handle->fd = -1;
> >   	if (dev->vhostfd >= 0)
> >   		eth_dev->intr_handle->fd = dev->vhostfd;
> > +	else if (dev->is_server)
> > +		eth_dev->intr_handle->fd = dev->listenfd;
> >
> >   	return 0;
> >   }
> > @@ -267,7 +272,7 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> >   	dev->vhostfds = NULL;
> >   	dev->tapfds = NULL;
> >
> > -	if (is_vhost_user_by_type(dev->path)) {
> > +	if (dev->is_server || is_vhost_user_by_type(dev->path)) {
> 
> I think we still fail to pick out an invalidated case: specify "server"
> parameter for a vhost-kernel path.

I will add the checking at the starting of the function.

If (dev->is_server && access(dev->path) == 0)
	Return -1;
How about the change ?

> 
> >   		dev->ops = &ops_user;
> >   	} else {
> >   		dev->ops = &ops_kernel;
> > @@ -337,16 +342,25 @@ virtio_user_dev_init(struct virtio_user_dev *dev,
> char *path, int queues,
> >   		return -1;
> >   	}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL)
> < 0) {
> > -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> > -		return -1;
> > -	}
> > +	if (dev->vhostfd >= 0) {
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_SET_OWNER,
> > +					   NULL) < 0) {
> > +			PMD_INIT_LOG(ERR, "set_owner fails: %s",
> > +				     strerror(errno));
> > +			return -1;
> > +		}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> > -			    &dev->device_features) < 0) {
> > -		PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > -		return -1;
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_GET_FEATURES,
> > +					   &dev->device_features) < 0) {
> > +			PMD_INIT_LOG(ERR, "get_features failed: %s",
> > +				     strerror(errno));
> > +			return -1;
> > +		}
> > +	} else {
> > +		/* Just pretend vhost-user can support all these features */
> 
> Although I don't think we need to fix this, please also note that this could be
> problematic that if some feature is negotiated but not supported by the
> vhost-user which comes later.
>

Good description. I will add it in next version.
 
Thanks for your those comments

Zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v5] net/virtio-user: add support for server mode
  2018-04-05  8:29         ` Tiwei Bie
  2018-04-05  9:19           ` Yang, Zhiyong
@ 2018-04-06  7:22           ` Yang, Zhiyong
  1 sibling, 0 replies; 65+ messages in thread
From: Yang, Zhiyong @ 2018-04-06  7:22 UTC (permalink / raw)
  To: Bie, Tiwei
  Cc: dev, maxime.coquelin, thomas, Tan, Jianfeng, Wang, Zhihong, Wang, Dong1



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Thursday, April 5, 2018 4:29 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: dev@dpdk.org; maxime.coquelin@redhat.com; thomas@monjalon.net;
> Tan, Jianfeng <jianfeng.tan@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>; Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH v5] net/virtio-user: add support for server mode
> 
> On Thu, Apr 05, 2018 at 01:17:53AM +0800, zhiyong.yang@intel.com wrote:

<snip>

> > @@ -337,16 +343,21 @@ virtio_user_dev_init(struct virtio_user_dev *dev,
> char *path, int queues,
> >  		return -1;
> >  	}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL)
> < 0) {
> > -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> > -		return -1;
> > -	}
> > +	if (dev->vhostfd >= 0) {
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_SET_OWNER, NULL) < 0) {
> > +			PMD_INIT_LOG(ERR, "set_owner fails: %s",
> strerror(errno));
> > +			return -1;
> > +		}
> >
> > -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> > -			    &dev->device_features) < 0) {
> > -		PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > -		return -1;
> > +		if (dev->ops->send_request(dev,
> VHOST_USER_GET_FEATURES,
> > +				&dev->device_features) < 0) {
> > +			PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > +			return -1;
> > +		}
> > +	} else {
> > +		dev->device_features =
> VIRTIO_USER_SUPPORTED_FEATURES;
> 
> If the backend doesn't support e.g. VIRTIO_RING_F_INDIRECT_DESC.
> Will it cause any problem?
> 
vhost-user will compare virtio-user and vhost-user features, as you said that
if VIRTIO_RING_F_INDIRECT_DESC was not supported ,
vhost-user come across failure. vhost-user closes the connecting socket and later virtio-user will detect the
broken connection by LSC. 

thanks
zhiyong

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v7] net/virtio-user: add support for server mode
  2018-04-06  0:18         ` [PATCH v6] " zhiyong.yang
  2018-04-05 18:13           ` Tan, Jianfeng
@ 2018-04-06  9:25           ` zhiyong.yang
  2018-04-08  0:36             ` Tan, Jianfeng
  1 sibling, 1 reply; 65+ messages in thread
From: zhiyong.yang @ 2018-04-06  9:25 UTC (permalink / raw)
  To: dev
  Cc: Zhiyong Yang, maxime.coquelin, jianfeng.tan, tiwei.bie,
	zhihong.wang, dong1.wang, thomas

In a container environment if the vhost-user backend restarts, there's
no way for it to reconnect to virtio-user. To address this, support for
server mode is added. In this mode the socket file is created by virtio-
user, which the backend then connects to. This means that if the backend
restarts, it can reconnect to virtio-user and continue communications.

With current implementation, LSC is enabled at virtio-user side to
support to accept the coming connection.

Server mode virtio-user only supports to work with vhost-user.

Release note is updated in this patch.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---

Cc: maxime.coquelin@redhat.com
Cc: jianfeng.tan@intel.com
Cc: tiwei.bie@intel.com
Cc: zhihong.wang@intel.com
Cc: dong1.wang@intel.com
Cc: thomas@monjalon.net

Changes in V7:
1. avoid misusing vhost-kernel in server mode virtio-user.
2. move the funciton definition is_vhost_user_by_type before 
virtio_user_start_device in order that it can be called.
3. add comments in the code to state feature negotiation limit.

Changes in V6:
1. fix report wrong link stauts in server mode.
2. fix some code style issues.

Changes in V5:
1. Support server mode virtio-user startup in non-blocking mode.
2. rebase on top of dpdk-next-virtio.

Changes in V4:
1. Don't create new pthread any more and use librte_eal interrupt thread
instead. 

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

 doc/guides/rel_notes/release_18_05.rst           |   6 ++
 drivers/net/virtio/virtio_user/vhost_user.c      |  45 ++++++++--
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 101 ++++++++++++++++-------
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
 drivers/net/virtio/virtio_user_ethdev.c          | 101 ++++++++++++++++++++---
 5 files changed, 209 insertions(+), 47 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index 9cc77f893..f8897b2e9 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,12 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added support for virtio-user server mode.**
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend connects to. This means that if the backend restarts, it can reconnect
+  to virtio-user and continue communications.
 
 API Changes
 -----------
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..a6df97a00 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
 	return 0;
 }
 
+#define MAX_VIRTIO_USER_BACKLOG 1
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+	int ret;
+	int flag;
+	int fd = dev->listenfd;
+
+	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+			    dev->path, strerror(errno));
+		return -1;
+	}
+	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+	if (ret < 0)
+		return -1;
+
+	flag = fcntl(fd, F_GETFL);
+	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
+
+	return 0;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
 	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-		close(fd);
-		return -1;
+
+	if (dev->is_server) {
+		dev->listenfd = fd;
+		if (virtio_user_start_server(dev, &un) < 0) {
+			PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
+			close(fd);
+			return -1;
+		}
+		dev->vhostfd = -1;
+	} else {
+		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+			close(fd);
+			return -1;
+		}
+		dev->vhostfd = fd;
 	}
 
-	dev->vhostfd = fd;
 	return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index f90fee9e5..38b8bc90d 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -93,12 +93,26 @@ virtio_user_queue_setup(struct virtio_user_dev *dev,
 	return 0;
 }
 
+int
+is_vhost_user_by_type(const char *path)
+{
+	struct stat sb;
+
+	if (stat(path, &sb) == -1)
+		return 0;
+
+	return S_ISSOCK(sb.st_mode);
+}
+
 int
 virtio_user_start_device(struct virtio_user_dev *dev)
 {
 	uint64_t features;
 	int ret;
 
+	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
+		return -1;
+
 	/* Do not check return as already done in init, or reset in stop */
 	dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
 
@@ -174,17 +188,6 @@ parse_mac(struct virtio_user_dev *dev, const char *mac)
 	}
 }
 
-int
-is_vhost_user_by_type(const char *path)
-{
-	struct stat sb;
-
-	if (stat(path, &sb) == -1)
-		return 0;
-
-	return S_ISSOCK(sb.st_mode);
-}
-
 static int
 virtio_user_dev_init_notify(struct virtio_user_dev *dev)
 {
@@ -254,6 +257,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
 	eth_dev->intr_handle->fd = -1;
 	if (dev->vhostfd >= 0)
 		eth_dev->intr_handle->fd = dev->vhostfd;
+	else if (dev->is_server)
+		eth_dev->intr_handle->fd = dev->listenfd;
 
 	return 0;
 }
@@ -267,21 +272,32 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 	dev->vhostfds = NULL;
 	dev->tapfds = NULL;
 
-	if (is_vhost_user_by_type(dev->path)) {
-		dev->ops = &ops_user;
-	} else {
-		dev->ops = &ops_kernel;
-
-		dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
-		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
-		if (!dev->vhostfds || !dev->tapfds) {
-			PMD_INIT_LOG(ERR, "Failed to malloc");
+	if (dev->is_server) {
+		if (access(dev->path, F_OK) == 0 &&
+		    !is_vhost_user_by_type(dev->path)) {
+			PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!");
 			return -1;
 		}
-
-		for (q = 0; q < dev->max_queue_pairs; ++q) {
-			dev->vhostfds[q] = -1;
-			dev->tapfds[q] = -1;
+		dev->ops = &ops_user;
+	} else {
+		if (is_vhost_user_by_type(dev->path)) {
+			dev->ops = &ops_user;
+		} else {
+			dev->ops = &ops_kernel;
+
+			dev->vhostfds = malloc(dev->max_queue_pairs *
+					       sizeof(int));
+			dev->tapfds = malloc(dev->max_queue_pairs *
+					     sizeof(int));
+			if (!dev->vhostfds || !dev->tapfds) {
+				PMD_INIT_LOG(ERR, "Failed to malloc");
+				return -1;
+			}
+
+			for (q = 0; q < dev->max_queue_pairs; ++q) {
+				dev->vhostfds[q] = -1;
+				dev->tapfds[q] = -1;
+			}
 		}
 	}
 
@@ -337,16 +353,29 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
 		return -1;
 	}
 
-	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
-		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
-		return -1;
-	}
+	if (dev->vhostfd >= 0) {
+		if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
+					   NULL) < 0) {
+			PMD_INIT_LOG(ERR, "set_owner fails: %s",
+				     strerror(errno));
+			return -1;
+		}
 
-	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
-			    &dev->device_features) < 0) {
-		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
-		return -1;
+		if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
+					   &dev->device_features) < 0) {
+			PMD_INIT_LOG(ERR, "get_features failed: %s",
+				     strerror(errno));
+			return -1;
+		}
+	} else {
+		/* We just pretend vhost-user can support all these features.
+		 * Note that this could be problematic that if some feature is
+		 * negotiated but not supported by the vhost-user which comes
+		 * later.
+		 */
+		dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
 	}
+
 	if (dev->mac_specified)
 		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
 
@@ -388,6 +417,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 
 	close(dev->vhostfd);
 
+	if (dev->is_server && dev->listenfd >= 0) {
+		close(dev->listenfd);
+		dev->listenfd = -1;
+	}
+
 	if (dev->vhostfds) {
 		for (i = 0; i < dev->max_queue_pairs; ++i)
 			close(dev->vhostfds[i]);
@@ -396,6 +430,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
 	}
 
 	free(dev->ifname);
+
+	if (dev->is_server)
+		unlink(dev->path);
 }
 
 static uint8_t
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index 5f8755771..ade727e46 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -6,6 +6,7 @@
 #define _VIRTIO_USER_DEV_H
 
 #include <limits.h>
+#include <stdbool.h>
 #include "../virtio_pci.h"
 #include "../virtio_ring.h"
 #include "vhost.h"
@@ -13,6 +14,8 @@
 struct virtio_user_dev {
 	/* for vhost_user backend */
 	int		vhostfd;
+	int		listenfd;   /* listening fd */
+	bool		is_server;  /* server or client mode */
 
 	/* for vhost_kernel backend */
 	char		*ifname;
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 263649006..4e7b3c34f 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -24,15 +24,73 @@
 #define virtio_user_get_dev(hw) \
 	((struct virtio_user_dev *)(hw)->virtio_user_dev)
 
+static int
+virtio_user_server_reconnect(struct virtio_user_dev *dev)
+{
+	int ret;
+	int flag;
+	int connectfd;
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+
+	connectfd = accept(dev->listenfd, NULL, NULL);
+	if (connectfd < 0)
+		return -1;
+
+	dev->vhostfd = connectfd;
+	flag = fcntl(connectfd, F_GETFD);
+	fcntl(connectfd, F_SETFL, flag | O_NONBLOCK);
+
+	ret = virtio_user_start_device(dev);
+	if (ret < 0)
+		return -1;
+
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt disable failed");
+			return -1;
+		}
+		rte_intr_callback_unregister(eth_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+		eth_dev->intr_handle->fd = connectfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return -1;
+		}
+	}
+	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
+	return 0;
+}
+
 static void
 virtio_user_delayed_handler(void *param)
 {
 	struct virtio_hw *hw = (struct virtio_hw *)param;
-	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
+	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-	rte_intr_callback_unregister(dev->intr_handle,
-				     virtio_interrupt_handler,
-				     dev);
+	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+		PMD_DRV_LOG(ERR, "interrupt disable failed");
+		return;
+	}
+	rte_intr_callback_unregister(eth_dev->intr_handle,
+				     virtio_interrupt_handler, eth_dev);
+	if (dev->is_server) {
+		if (dev->vhostfd >= 0) {
+			close(dev->vhostfd);
+			dev->vhostfd = -1;
+		}
+		eth_dev->intr_handle->fd = dev->listenfd;
+		rte_intr_callback_register(eth_dev->intr_handle,
+					   virtio_interrupt_handler, eth_dev);
+		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+			return;
+		}
+	}
 }
 
 static void
@@ -67,12 +125,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
 				PMD_DRV_LOG(ERR, "virtio-user port %u is down",
 					    hw->port_id);
-				/* Only client mode is available now. Once the
-				 * connection is broken, it can never be up
-				 * again. Besides, this function could be called
-				 * in the process of interrupt handling,
-				 * callback cannot be unregistered here, set an
-				 * alarm to do it.
+
+				/* This function could be called in the process
+				 * of interrupt handling, callback cannot be
+				 * unregistered here, set an alarm to do it.
 				 */
 				rte_eal_alarm_set(1,
 						  virtio_user_delayed_handler,
@@ -85,7 +141,12 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 				PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
 				return;
 			}
+		} else if (dev->is_server) {
+			dev->status &= (~VIRTIO_NET_S_LINK_UP);
+			if (virtio_user_server_reconnect(dev) >= 0)
+				dev->status |= VIRTIO_NET_S_LINK_UP;
 		}
+
 		*(uint16_t *)dst = dev->status;
 	}
 
@@ -278,12 +339,15 @@ static const char *valid_args[] = {
 	VIRTIO_USER_ARG_QUEUE_SIZE,
 #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
 	VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+	VIRTIO_USER_ARG_SERVER_MODE,
 	NULL
 };
 
 #define VIRTIO_USER_DEF_CQ_EN	0
 #define VIRTIO_USER_DEF_Q_NUM	1
 #define VIRTIO_USER_DEF_Q_SZ	256
+#define VIRTIO_USER_DEF_SERVER_MODE	0
 
 static int
 get_string_arg(const char *key __rte_unused,
@@ -378,6 +442,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
 	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
 	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
 	char *path = NULL;
 	char *ifname = NULL;
 	char *mac_addr = NULL;
@@ -445,6 +510,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	}
 
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+				       &get_integer_arg, &server_mode) < 0) {
+			PMD_INIT_LOG(ERR, "error to parse %s",
+				     VIRTIO_USER_ARG_SERVER_MODE);
+			goto end;
+		}
+	}
+
 	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
 		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
 				       &get_integer_arg, &cq) < 0) {
@@ -469,6 +543,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 	}
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		struct virtio_user_dev *vu_dev;
+
 		eth_dev = virtio_user_eth_dev_alloc(dev);
 		if (!eth_dev) {
 			PMD_INIT_LOG(ERR, "virtio_user fails to alloc device");
@@ -476,6 +552,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 
 		hw = eth_dev->data->dev_private;
+		vu_dev = virtio_user_get_dev(hw);
+		if (server_mode == 1)
+			vu_dev->is_server = true;
+		else
+			vu_dev->is_server = false;
 		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
 				 queue_size, mac_addr, &ifname) < 0) {
 			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH v7] net/virtio-user: add support for server mode
  2018-04-06  9:25           ` [PATCH v7] " zhiyong.yang
@ 2018-04-08  0:36             ` Tan, Jianfeng
  2018-04-10 11:55               ` Maxime Coquelin
  0 siblings, 1 reply; 65+ messages in thread
From: Tan, Jianfeng @ 2018-04-08  0:36 UTC (permalink / raw)
  To: Yang, Zhiyong, dev
  Cc: maxime.coquelin, Bie, Tiwei, Wang, Zhihong, Wang, Dong1, thomas



> -----Original Message-----
> From: Yang, Zhiyong
> Sent: Friday, April 6, 2018 5:26 PM
> To: dev@dpdk.org
> Cc: Yang, Zhiyong; maxime.coquelin@redhat.com; Tan, Jianfeng; Bie, Tiwei;
> Wang, Zhihong; Wang, Dong1; thomas@monjalon.net
> Subject: [PATCH v7] net/virtio-user: add support for server mode
> 
> In a container environment if the vhost-user backend restarts, there's
> no way for it to reconnect to virtio-user. To address this, support for
> server mode is added. In this mode the socket file is created by virtio-
> user, which the backend then connects to. This means that if the backend
> restarts, it can reconnect to virtio-user and continue communications.
> 
> With current implementation, LSC is enabled at virtio-user side to
> support to accept the coming connection.
> 
> Server mode virtio-user only supports to work with vhost-user.
> 
> Release note is updated in this patch.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>

Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks for the great work!

> ---
> 
> Cc: maxime.coquelin@redhat.com
> Cc: jianfeng.tan@intel.com
> Cc: tiwei.bie@intel.com
> Cc: zhihong.wang@intel.com
> Cc: dong1.wang@intel.com
> Cc: thomas@monjalon.net
> 
> Changes in V7:
> 1. avoid misusing vhost-kernel in server mode virtio-user.
> 2. move the funciton definition is_vhost_user_by_type before
> virtio_user_start_device in order that it can be called.
> 3. add comments in the code to state feature negotiation limit.
> 
> Changes in V6:
> 1. fix report wrong link stauts in server mode.
> 2. fix some code style issues.
> 
> Changes in V5:
> 1. Support server mode virtio-user startup in non-blocking mode.
> 2. rebase on top of dpdk-next-virtio.
> 
> Changes in V4:
> 1. Don't create new pthread any more and use librte_eal interrupt thread
> instead.
> 
> Changes in V3:
> 1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
> event APIs.
> 2. rebase the code on top of dpdk-next-virtio
> 
> Changes in V2:
> 1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
> which is not strongly related to support for server mode
> 2. move fdset related functions to librte_eal from librte_vhost exposed as
> new APIs.
> 3. release note is added in the patch 5/5.
> 4. squash data structure change patch into 4/5 according to Maxime's
> suggestion.
> 
>  doc/guides/rel_notes/release_18_05.rst           |   6 ++
>  drivers/net/virtio/virtio_user/vhost_user.c      |  45 ++++++++--
>  drivers/net/virtio/virtio_user/virtio_user_dev.c | 101 ++++++++++++++++--
> -----
>  drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
>  drivers/net/virtio/virtio_user_ethdev.c          | 101
> ++++++++++++++++++++---
>  5 files changed, 209 insertions(+), 47 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_18_05.rst
> b/doc/guides/rel_notes/release_18_05.rst
> index 9cc77f893..f8897b2e9 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -58,6 +58,12 @@ New Features
>    * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
>    * Added support for DROP action in flow API.
> 
> +* **Added support for virtio-user server mode.**
> +  In a container environment if the vhost-user backend restarts, there's no
> way
> +  for it to reconnect to virtio-user. To address this, support for server mode
> +  is added. In this mode the socket file is created by virtio-user, which the
> +  backend connects to. This means that if the backend restarts, it can
> reconnect
> +  to virtio-user and continue communications.
> 
>  API Changes
>  -----------
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> b/drivers/net/virtio/virtio_user/vhost_user.c
> index 91c6449bb..a6df97a00 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
>  	return 0;
>  }
> 
> +#define MAX_VIRTIO_USER_BACKLOG 1
> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un
> *un)
> +{
> +	int ret;
> +	int flag;
> +	int fd = dev->listenfd;
> +
> +	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> +			    dev->path, strerror(errno));
> +		return -1;
> +	}
> +	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> +	if (ret < 0)
> +		return -1;
> +
> +	flag = fcntl(fd, F_GETFL);
> +	fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> +
> +	return 0;
> +}
> +
>  /**
>   * Set up environment to talk with a vhost user backend.
>   *
> @@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
>  	memset(&un, 0, sizeof(un));
>  	un.sun_family = AF_UNIX;
>  	snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> -	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> -		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> -		close(fd);
> -		return -1;
> +
> +	if (dev->is_server) {
> +		dev->listenfd = fd;
> +		if (virtio_user_start_server(dev, &un) < 0) {
> +			PMD_DRV_LOG(ERR, "virtio-user startup fails in
> server mode");
> +			close(fd);
> +			return -1;
> +		}
> +		dev->vhostfd = -1;
> +	} else {
> +		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> +			PMD_DRV_LOG(ERR, "connect error, %s",
> strerror(errno));
> +			close(fd);
> +			return -1;
> +		}
> +		dev->vhostfd = fd;
>  	}
> 
> -	dev->vhostfd = fd;
>  	return 0;
>  }
> 
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..38b8bc90d 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -93,12 +93,26 @@ virtio_user_queue_setup(struct virtio_user_dev
> *dev,
>  	return 0;
>  }
> 
> +int
> +is_vhost_user_by_type(const char *path)
> +{
> +	struct stat sb;
> +
> +	if (stat(path, &sb) == -1)
> +		return 0;
> +
> +	return S_ISSOCK(sb.st_mode);
> +}
> +
>  int
>  virtio_user_start_device(struct virtio_user_dev *dev)
>  {
>  	uint64_t features;
>  	int ret;
> 
> +	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
> +		return -1;
> +
>  	/* Do not check return as already done in init, or reset in stop */
>  	dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL);
> 
> @@ -174,17 +188,6 @@ parse_mac(struct virtio_user_dev *dev, const char
> *mac)
>  	}
>  }
> 
> -int
> -is_vhost_user_by_type(const char *path)
> -{
> -	struct stat sb;
> -
> -	if (stat(path, &sb) == -1)
> -		return 0;
> -
> -	return S_ISSOCK(sb.st_mode);
> -}
> -
>  static int
>  virtio_user_dev_init_notify(struct virtio_user_dev *dev)
>  {
> @@ -254,6 +257,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev
> *dev)
>  	eth_dev->intr_handle->fd = -1;
>  	if (dev->vhostfd >= 0)
>  		eth_dev->intr_handle->fd = dev->vhostfd;
> +	else if (dev->is_server)
> +		eth_dev->intr_handle->fd = dev->listenfd;
> 
>  	return 0;
>  }
> @@ -267,21 +272,32 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>  	dev->vhostfds = NULL;
>  	dev->tapfds = NULL;
> 
> -	if (is_vhost_user_by_type(dev->path)) {
> -		dev->ops = &ops_user;
> -	} else {
> -		dev->ops = &ops_kernel;
> -
> -		dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> -		dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> -		if (!dev->vhostfds || !dev->tapfds) {
> -			PMD_INIT_LOG(ERR, "Failed to malloc");
> +	if (dev->is_server) {
> +		if (access(dev->path, F_OK) == 0 &&
> +		    !is_vhost_user_by_type(dev->path)) {
> +			PMD_DRV_LOG(ERR, "Server mode doesn't support
> vhost-kernel!");
>  			return -1;
>  		}
> -
> -		for (q = 0; q < dev->max_queue_pairs; ++q) {
> -			dev->vhostfds[q] = -1;
> -			dev->tapfds[q] = -1;
> +		dev->ops = &ops_user;
> +	} else {
> +		if (is_vhost_user_by_type(dev->path)) {
> +			dev->ops = &ops_user;
> +		} else {
> +			dev->ops = &ops_kernel;
> +
> +			dev->vhostfds = malloc(dev->max_queue_pairs *
> +					       sizeof(int));
> +			dev->tapfds = malloc(dev->max_queue_pairs *
> +					     sizeof(int));
> +			if (!dev->vhostfds || !dev->tapfds) {
> +				PMD_INIT_LOG(ERR, "Failed to malloc");
> +				return -1;
> +			}
> +
> +			for (q = 0; q < dev->max_queue_pairs; ++q) {
> +				dev->vhostfds[q] = -1;
> +				dev->tapfds[q] = -1;
> +			}
>  		}
>  	}
> 
> @@ -337,16 +353,29 @@ virtio_user_dev_init(struct virtio_user_dev *dev,
> char *path, int queues,
>  		return -1;
>  	}
> 
> -	if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL)
> < 0) {
> -		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> -		return -1;
> -	}
> +	if (dev->vhostfd >= 0) {
> +		if (dev->ops->send_request(dev,
> VHOST_USER_SET_OWNER,
> +					   NULL) < 0) {
> +			PMD_INIT_LOG(ERR, "set_owner fails: %s",
> +				     strerror(errno));
> +			return -1;
> +		}
> 
> -	if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> -			    &dev->device_features) < 0) {
> -		PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> -		return -1;
> +		if (dev->ops->send_request(dev,
> VHOST_USER_GET_FEATURES,
> +					   &dev->device_features) < 0) {
> +			PMD_INIT_LOG(ERR, "get_features failed: %s",
> +				     strerror(errno));
> +			return -1;
> +		}
> +	} else {
> +		/* We just pretend vhost-user can support all these features.
> +		 * Note that this could be problematic that if some feature is
> +		 * negotiated but not supported by the vhost-user which
> comes
> +		 * later.
> +		 */
> +		dev->device_features =
> VIRTIO_USER_SUPPORTED_FEATURES;
>  	}
> +
>  	if (dev->mac_specified)
>  		dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
> 
> @@ -388,6 +417,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
> 
>  	close(dev->vhostfd);
> 
> +	if (dev->is_server && dev->listenfd >= 0) {
> +		close(dev->listenfd);
> +		dev->listenfd = -1;
> +	}
> +
>  	if (dev->vhostfds) {
>  		for (i = 0; i < dev->max_queue_pairs; ++i)
>  			close(dev->vhostfds[i]);
> @@ -396,6 +430,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>  	}
> 
>  	free(dev->ifname);
> +
> +	if (dev->is_server)
> +		unlink(dev->path);
>  }
> 
>  static uint8_t
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 5f8755771..ade727e46 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,6 +6,7 @@
>  #define _VIRTIO_USER_DEV_H
> 
>  #include <limits.h>
> +#include <stdbool.h>
>  #include "../virtio_pci.h"
>  #include "../virtio_ring.h"
>  #include "vhost.h"
> @@ -13,6 +14,8 @@
>  struct virtio_user_dev {
>  	/* for vhost_user backend */
>  	int		vhostfd;
> +	int		listenfd;   /* listening fd */
> +	bool		is_server;  /* server or client mode */
> 
>  	/* for vhost_kernel backend */
>  	char		*ifname;
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c
> b/drivers/net/virtio/virtio_user_ethdev.c
> index 263649006..4e7b3c34f 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -24,15 +24,73 @@
>  #define virtio_user_get_dev(hw) \
>  	((struct virtio_user_dev *)(hw)->virtio_user_dev)
> 
> +static int
> +virtio_user_server_reconnect(struct virtio_user_dev *dev)
> +{
> +	int ret;
> +	int flag;
> +	int connectfd;
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> +
> +	connectfd = accept(dev->listenfd, NULL, NULL);
> +	if (connectfd < 0)
> +		return -1;
> +
> +	dev->vhostfd = connectfd;
> +	flag = fcntl(connectfd, F_GETFD);
> +	fcntl(connectfd, F_SETFL, flag | O_NONBLOCK);
> +
> +	ret = virtio_user_start_device(dev);
> +	if (ret < 0)
> +		return -1;
> +
> +	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> +		if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt disable failed");
> +			return -1;
> +		}
> +		rte_intr_callback_unregister(eth_dev->intr_handle,
> +					     virtio_interrupt_handler,
> +					     eth_dev);
> +		eth_dev->intr_handle->fd = connectfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return -1;
> +		}
> +	}
> +	PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection
> succeeds!");
> +	return 0;
> +}
> +
>  static void
>  virtio_user_delayed_handler(void *param)
>  {
>  	struct virtio_hw *hw = (struct virtio_hw *)param;
> -	struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> +	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
> 
> -	rte_intr_callback_unregister(dev->intr_handle,
> -				     virtio_interrupt_handler,
> -				     dev);
> +	if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> +		PMD_DRV_LOG(ERR, "interrupt disable failed");
> +		return;
> +	}
> +	rte_intr_callback_unregister(eth_dev->intr_handle,
> +				     virtio_interrupt_handler, eth_dev);
> +	if (dev->is_server) {
> +		if (dev->vhostfd >= 0) {
> +			close(dev->vhostfd);
> +			dev->vhostfd = -1;
> +		}
> +		eth_dev->intr_handle->fd = dev->listenfd;
> +		rte_intr_callback_register(eth_dev->intr_handle,
> +					   virtio_interrupt_handler, eth_dev);
> +		if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +			return;
> +		}
> +	}
>  }
> 
>  static void
> @@ -67,12 +125,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
>  				dev->status &= (~VIRTIO_NET_S_LINK_UP);
>  				PMD_DRV_LOG(ERR, "virtio-user port %u is
> down",
>  					    hw->port_id);
> -				/* Only client mode is available now. Once
> the
> -				 * connection is broken, it can never be up
> -				 * again. Besides, this function could be called
> -				 * in the process of interrupt handling,
> -				 * callback cannot be unregistered here, set
> an
> -				 * alarm to do it.
> +
> +				/* This function could be called in the process
> +				 * of interrupt handling, callback cannot be
> +				 * unregistered here, set an alarm to do it.
>  				 */
>  				rte_eal_alarm_set(1,
> 
> virtio_user_delayed_handler,
> @@ -85,7 +141,12 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
>  				PMD_DRV_LOG(ERR, "error clearing
> O_NONBLOCK flag");
>  				return;
>  			}
> +		} else if (dev->is_server) {
> +			dev->status &= (~VIRTIO_NET_S_LINK_UP);
> +			if (virtio_user_server_reconnect(dev) >= 0)
> +				dev->status |= VIRTIO_NET_S_LINK_UP;
>  		}
> +
>  		*(uint16_t *)dst = dev->status;
>  	}
> 
> @@ -278,12 +339,15 @@ static const char *valid_args[] = {
>  	VIRTIO_USER_ARG_QUEUE_SIZE,
>  #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
>  	VIRTIO_USER_ARG_INTERFACE_NAME,
> +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> +	VIRTIO_USER_ARG_SERVER_MODE,
>  	NULL
>  };
> 
>  #define VIRTIO_USER_DEF_CQ_EN	0
>  #define VIRTIO_USER_DEF_Q_NUM	1
>  #define VIRTIO_USER_DEF_Q_SZ	256
> +#define VIRTIO_USER_DEF_SERVER_MODE	0
> 
>  static int
>  get_string_arg(const char *key __rte_unused,
> @@ -378,6 +442,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>  	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
>  	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
>  	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> +	uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
>  	char *path = NULL;
>  	char *ifname = NULL;
>  	char *mac_addr = NULL;
> @@ -445,6 +510,15 @@ virtio_user_pmd_probe(struct rte_vdev_device
> *dev)
>  		}
>  	}
> 
> +	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1)
> {
> +		if (rte_kvargs_process(kvlist,
> VIRTIO_USER_ARG_SERVER_MODE,
> +				       &get_integer_arg, &server_mode) < 0) {
> +			PMD_INIT_LOG(ERR, "error to parse %s",
> +				     VIRTIO_USER_ARG_SERVER_MODE);
> +			goto end;
> +		}
> +	}
> +
>  	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
>  		if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
>  				       &get_integer_arg, &cq) < 0) {
> @@ -469,6 +543,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
>  	}
> 
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		struct virtio_user_dev *vu_dev;
> +
>  		eth_dev = virtio_user_eth_dev_alloc(dev);
>  		if (!eth_dev) {
>  			PMD_INIT_LOG(ERR, "virtio_user fails to alloc
> device");
> @@ -476,6 +552,11 @@ virtio_user_pmd_probe(struct rte_vdev_device
> *dev)
>  		}
> 
>  		hw = eth_dev->data->dev_private;
> +		vu_dev = virtio_user_get_dev(hw);
> +		if (server_mode == 1)
> +			vu_dev->is_server = true;
> +		else
> +			vu_dev->is_server = false;
>  		if (virtio_user_dev_init(hw->virtio_user_dev, path, queues,
> cq,
>  				 queue_size, mac_addr, &ifname) < 0) {
>  			PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
> --
> 2.14.3

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v7] net/virtio-user: add support for server mode
  2018-04-08  0:36             ` Tan, Jianfeng
@ 2018-04-10 11:55               ` Maxime Coquelin
  0 siblings, 0 replies; 65+ messages in thread
From: Maxime Coquelin @ 2018-04-10 11:55 UTC (permalink / raw)
  To: Tan, Jianfeng, Yang, Zhiyong, dev
  Cc: Bie, Tiwei, Wang, Zhihong, Wang, Dong1, thomas



On 04/08/2018 02:36 AM, Tan, Jianfeng wrote:
> 
>> -----Original Message-----
>> From: Yang, Zhiyong
>> Sent: Friday, April 6, 2018 5:26 PM
>> To:dev@dpdk.org
>> Cc: Yang, Zhiyong;maxime.coquelin@redhat.com; Tan, Jianfeng; Bie, Tiwei;
>> Wang, Zhihong; Wang, Dong1;thomas@monjalon.net
>> Subject: [PATCH v7] net/virtio-user: add support for server mode
>>
>> In a container environment if the vhost-user backend restarts, there's
>> no way for it to reconnect to virtio-user. To address this, support for
>> server mode is added. In this mode the socket file is created by virtio-
>> user, which the backend then connects to. This means that if the backend
>> restarts, it can reconnect to virtio-user and continue communications.
>>
>> With current implementation, LSC is enabled at virtio-user side to
>> support to accept the coming connection.
>>
>> Server mode virtio-user only supports to work with vhost-user.
>>
>> Release note is updated in this patch.
>>
>> Signed-off-by: Zhiyong Yang<zhiyong.yang@intel.com>
> Reviewed-by: Jianfeng Tan<jianfeng.tan@intel.com>

Applied to dpdk-next-virtio/master.

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 65+ messages in thread

end of thread, other threads:[~2018-04-10 11:55 UTC | newest]

Thread overview: 65+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-14 14:53 [PATCH 0/4] add to support for virtio-user server mode Zhiyong Yang
2018-02-14 14:53 ` [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h Zhiyong Yang
2018-02-27 17:51   ` Maxime Coquelin
2018-02-28  1:36     ` Yang, Zhiyong
2018-02-28  8:45       ` Maxime Coquelin
2018-03-01  6:02         ` Tan, Jianfeng
2018-03-01 14:13           ` Thomas Monjalon
2018-03-05  7:43             ` Yang, Zhiyong
2018-03-05  8:54               ` Thomas Monjalon
2018-03-13  8:46                 ` Yang, Zhiyong
2018-03-13  9:43                   ` Thomas Monjalon
2018-03-13  9:50                     ` Yang, Zhiyong
2018-03-15  9:32                       ` Thomas Monjalon
2018-03-16  8:43                         ` Yang, Zhiyong
2018-03-21  6:51                           ` Yang, Zhiyong
2018-03-15  9:45   ` [PATCH v2 0/5] add support for virtio-user server mode zhiyong.yang
2018-03-15  9:45     ` [PATCH v2 1/5] net/virtio: fix add pointer checking zhiyong.yang
2018-03-15  9:45     ` [PATCH v2 2/5] net/virtio: add checking for cvq zhiyong.yang
2018-03-15  9:45     ` [PATCH v2 3/5] eal: expose fdset related APIs zhiyong.yang
2018-03-15  9:45     ` [PATCH v2 4/5] net/virtio-user: add support for server mode zhiyong.yang
2018-03-15  9:45     ` [PATCH v2 5/5] net/vhost: add memory checking zhiyong.yang
2018-02-14 14:53 ` [PATCH 2/4] net/virtio-user: add data members to support server mode Zhiyong Yang
2018-02-27 17:53   ` Maxime Coquelin
2018-02-28  1:38     ` Yang, Zhiyong
2018-02-14 14:53 ` [PATCH 3/4] net/virtio-user: " Zhiyong Yang
2018-02-27 18:01   ` Maxime Coquelin
2018-02-28  1:53     ` Yang, Zhiyong
2018-02-28  8:33       ` Maxime Coquelin
2018-02-14 14:53 ` [PATCH 4/4] net/vhost: add memory checking to support client mode Zhiyong Yang
2018-03-21  3:03 ` [PATCH v3 0/4] add support for virtio-user server mode zhiyong.yang
2018-03-21  3:03   ` [PATCH v3 1/4] net/virtio: fix add pointer checking zhiyong.yang
2018-03-28  7:26     ` Tan, Jianfeng
2018-03-28  7:48       ` Yang, Zhiyong
2018-03-29 11:59     ` Maxime Coquelin
2018-03-29 12:01     ` Maxime Coquelin
2018-03-21  3:03   ` [PATCH v3 2/4] net/virtio: add checking for cvq zhiyong.yang
2018-03-28  8:34     ` Tan, Jianfeng
2018-03-29 11:59     ` Maxime Coquelin
2018-03-29 12:06     ` Maxime Coquelin
2018-03-21  3:03   ` [PATCH v3 3/4] net/virtio-user: add support for server mode zhiyong.yang
2018-03-28 15:14     ` Tan, Jianfeng
2018-03-30  2:08       ` Yang, Zhiyong
2018-03-21  3:03   ` [PATCH v3 4/4] net/vhost: add NULL pointer checking zhiyong.yang
2018-03-29 13:19     ` Maxime Coquelin
2018-03-30  2:00       ` Yang, Zhiyong
2018-03-30  7:41         ` Yang, Zhiyong
2018-04-03 12:20   ` [PATCH v4 0/1] server mode virtio-user zhiyong.yang
2018-04-03 12:20     ` [PATCH v4 1/1] net/virtio-user: add support for server mode zhiyong.yang
2018-04-03 15:16       ` Tan, Jianfeng
2018-04-04  3:31         ` Yang, Zhiyong
2018-04-04  3:47           ` Tan, Jianfeng
2018-04-04  5:37         ` Tiwei Bie
2018-04-04  9:59           ` Yang, Zhiyong
2018-04-04 14:57             ` Yang, Zhiyong
2018-04-04 17:17       ` [PATCH v5] " zhiyong.yang
2018-04-05  8:29         ` Tiwei Bie
2018-04-05  9:19           ` Yang, Zhiyong
2018-04-06  7:22           ` Yang, Zhiyong
2018-04-05  9:21         ` Yang, Zhiyong
2018-04-06  0:18         ` [PATCH v6] " zhiyong.yang
2018-04-05 18:13           ` Tan, Jianfeng
2018-04-06  7:14             ` Yang, Zhiyong
2018-04-06  9:25           ` [PATCH v7] " zhiyong.yang
2018-04-08  0:36             ` Tan, Jianfeng
2018-04-10 11:55               ` Maxime Coquelin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.