All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] eal: add internal flag indicating init has completed
@ 2018-02-22 18:21 Anatoly Burakov
  2018-02-22 18:21 ` [PATCH 2/3] eal: don't process IPC messages before init finished Anatoly Burakov
                   ` (7 more replies)
  0 siblings, 8 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-22 18:21 UTC (permalink / raw)
  To: dev

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..4e2c2e6 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 2/3] eal: don't process IPC messages before init finished
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-02-22 18:21 ` Anatoly Burakov
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-22 18:21 UTC (permalink / raw)
  To: dev

It is not possible for a primary process to receive any messages
while initializing, because RTE_MAGIC value is not set in the
shared config, and hence no secondary process can ever spin up
during that time.

However, it is possible for a secondary process to receive messages
from the primary during initialization. We can't just drop the
messages as they may be important, and also we might need to process
replies to our own requests (e.g. VFIO) during initialization.

Therefore, add a tailq for incoming messages, and queue them up
until initialization is complete, and process them in order they
arrived.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_proc.c | 50 +++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 3a1088e..b4d00c3 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -25,6 +25,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#include <rte_tailq.h>
 
 #include "eal_private.h"
 #include "eal_filesystem.h"
@@ -58,6 +59,18 @@ struct mp_msg_internal {
 	struct rte_mp_msg msg;
 };
 
+struct message_queue_entry {
+	TAILQ_ENTRY(message_queue_entry) next;
+	struct mp_msg_internal msg;
+	struct sockaddr_un sa;
+};
+
+/** Double linked list of received messages. */
+TAILQ_HEAD(message_queue, message_queue_entry);
+
+static struct message_queue message_queue =
+	TAILQ_HEAD_INITIALIZER(message_queue);
+
 struct sync_request {
 	TAILQ_ENTRY(sync_request) next;
 	int reply_received;
@@ -276,12 +289,39 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 static void *
 mp_handle(void *arg __rte_unused)
 {
-	struct mp_msg_internal msg;
-	struct sockaddr_un sa;
-
+	struct message_queue_entry *cur_msg, *next_msg, *new_msg = NULL;
 	while (1) {
-		if (read_msg(&msg, &sa) == 0)
-			process_msg(&msg, &sa);
+		/* we want to process all messages in order of their arrival,
+		 * but status of init_complete may change while we're iterating
+		 * the tailq. so, store it here and check once every iteration.
+		 */
+		int init_complete = internal_config.init_complete;
+
+		if (new_msg == NULL)
+			new_msg = malloc(sizeof(*new_msg));
+		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
+			/* we successfully read the message, so enqueue it */
+			TAILQ_INSERT_TAIL(&message_queue, new_msg, next);
+			new_msg = NULL;
+		} /* reuse new_msg for next message if we couldn't read_msg */
+
+		/* tailq only accessed here, so no locking needed */
+		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next, next_msg) {
+			/* secondary process should not process any incoming
+			 * requests until its initialization is complete, but
+			 * it is allowed to process replies to its own queries.
+			 */
+			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+					!init_complete &&
+					cur_msg->msg.type != MP_REP)
+				continue;
+
+			TAILQ_REMOVE(&message_queue, cur_msg, next);
+
+			process_msg(&cur_msg->msg, &cur_msg->sa);
+
+			free(cur_msg);
+		}
 	}
 
 	return NULL;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 3/3] eal: use locks to determine if secondary process is active
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
  2018-02-22 18:21 ` [PATCH 2/3] eal: don't process IPC messages before init finished Anatoly Burakov
@ 2018-02-22 18:21 ` Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
                     ` (4 more replies)
  2018-02-22 18:32 ` [PATCH 1/3] eal: add internal flag indicating init has completed Burakov, Anatoly
                   ` (5 subsequent siblings)
  7 siblings, 5 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-22 18:21 UTC (permalink / raw)
  To: dev

Previously, IPC would remove sockets it considers to be "inactive"
based on whether they have responded. Change this to create lock
files in addition to socket files, so that we can determine if
secondary process is active before attempting to communicate with
it. That way, we can distinguish secondaries that are alive but
are not responding, from those that have already died.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_proc.c | 204 +++++++++++++++++++++++++++-----
 1 file changed, 175 insertions(+), 29 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index b4d00c3..17fded7 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -32,6 +33,7 @@
 #include "eal_internal_cfg.h"
 
 static int mp_fd = -1;
+static int lock_fd = -1;
 static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
 static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
@@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
+static void
+create_lockfile_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 1)
+		snprintf(buf, len, "%slock_%s", prefix, name);
+	else
+		snprintf(buf, len, "%slock", prefix);
+}
+
+static const char *
+get_peer_name(const char *socket_full_path)
+{
+	char buf[PATH_MAX] = {0};
+	int len;
+
+	/* primary process has no peer name */
+	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
+		return NULL;
+
+	/* construct dummy socket file name - make it one character long so that
+	 * we hit the code path where underscores are added
+	 */
+	create_socket_path("a", buf, sizeof(buf));
+
+	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
+	len = strlen(buf) - 1;
+	return &socket_full_path[len];
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -330,8 +372,29 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
+	char lockfile[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
+			 getpid(), rte_rdtsc());
+
+	/* try to create lockfile */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	lock_fd = open(lockfile, O_CREAT | O_RDWR);
+	if (lock_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	/* no need to downgrade to shared lock */
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -341,13 +404,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -359,6 +420,44 @@ open_socket_fd(void)
 	return mp_fd;
 }
 
+/* find corresponding lock file and try to lock it */
+static int
+socket_is_active(const char *peer_name)
+{
+	char lockfile[PATH_MAX] = {0};
+	int fd, ret = -1;
+
+	/* construct lockfile filename */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	/* try to lock it */
+	fd = open(lockfile, O_CREAT | O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	ret = flock(fd, LOCK_EX | LOCK_NB);
+	if (ret < 0) {
+		if (errno == EWOULDBLOCK) {
+			/* file is locked */
+			ret = 1;
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
+				strerror(errno));
+			ret = -1;
+		}
+	} else {
+		ret = 0;
+		/* unlink lockfile automatically */
+		unlink(lockfile);
+		flock(fd, LOCK_UN);
+	}
+	close(fd);
+
+	return ret;
+}
+
 static int
 unlink_sockets(const char *filter)
 {
@@ -374,28 +473,33 @@ unlink_sockets(const char *filter)
 	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
-		if (fnmatch(filter, ent->d_name, 0) == 0)
+		if (fnmatch(filter, ent->d_name, 0) == 0) {
+			const char *peer_name;
+			char path[PATH_MAX];
+			int ret;
+
+			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+				 ent->d_name);
+			peer_name = get_peer_name(path);
+
+			ret = socket_is_active(peer_name);
+			if (ret < 0) {
+				RTE_LOG(ERR, EAL, "Error getting socket active status\n");
+				return -1;
+			} else if (ret == 1) {
+				RTE_LOG(ERR, EAL, "Socket is active (old secondary process still running?)\n");
+				return -1;
+			}
+			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
+					ent->d_name);
 			unlinkat(dir_fd, ent->d_name, 0);
+		}
 	}
 
 	closedir(mp_dir);
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
@@ -485,10 +589,25 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
 		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
-			return 0;
+			const char *peer_name = get_peer_name(dst_path);
+			int active, ret = 0;
+
+			active = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+					socket_is_active(peer_name) :
+					rte_eal_primary_proc_alive(NULL);
+
+			if (active > 0) {
+				RTE_LOG(ERR, EAL, "Couldn't communicate with active peer\n");
+			} else if (active < 0) {
+				RTE_LOG(ERR, EAL, "Couldn't get peer status\n");
+				ret = -1;
+			} else if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+				/* peer isn't active anymore, so unlink its
+				 * socket.
+				 */
+				unlink(dst_path);
+			}
+			return ret;
 		}
 		if (errno == ENOBUFS) {
 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
@@ -506,7 +625,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -528,15 +647,28 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
+		const char *peer_name;
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
-		if (send_msg(path, msg, type) < 0)
+		peer_name = get_peer_name(path);
+
+		/* only send if we can expect to receive a reply, otherwise
+		 * remove the socket.
+		 */
+		active = socket_is_active(peer_name);
+		if (active < 0)
+			ret = -1;
+		else if (active == 0)
+			unlinkat(dir_fd, ent->d_name, 0);
+		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
 
@@ -661,7 +793,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -696,15 +828,29 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
+		const char *peer_name;
 		char path[PATH_MAX];
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
+		peer_name = get_peer_name(path);
+
+		active = socket_is_active(peer_name);
+
+		if (active < 0) {
+			ret = -1;
+			break;
+		} else if (active == 0) {
+			unlinkat(dir_fd, ent->d_name, 0);
+			continue;
+		}
 
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH 1/3] eal: add internal flag indicating init has completed
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
  2018-02-22 18:21 ` [PATCH 2/3] eal: don't process IPC messages before init finished Anatoly Burakov
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
@ 2018-02-22 18:32 ` Burakov, Anatoly
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-22 18:32 UTC (permalink / raw)
  To: dev

On 22-Feb-18 6:21 PM, Anatoly Burakov wrote:
> Currently, primary process initialization is finalized by setting
> the RTE_MAGIC value in the shared config. However, it is not
> possible to check whether secondary process initialization has
> completed. Add such a value to internal config.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---

I should've mentioned that this patch series is dependent upon IPC 
bugfixes[1] i posted earlier.

[1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 1/5] eal: add internal flag indicating init has completed
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
                   ` (2 preceding siblings ...)
  2018-02-22 18:32 ` [PATCH 1/3] eal: add internal flag indicating init has completed Burakov, Anatoly
@ 2018-02-27 13:23 ` Anatoly Burakov
  2018-02-27 14:35   ` [PATCH v3 " Anatoly Burakov
                     ` (4 more replies)
  2018-02-27 13:23 ` [PATCH v2 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
                   ` (3 subsequent siblings)
  7 siblings, 5 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 13:23 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    This patch is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/
    
    v2 changes: none

 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..4e2c2e6 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 2/5] eal: don't process IPC messages before init finished
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
                   ` (3 preceding siblings ...)
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
@ 2018-02-27 13:23 ` Anatoly Burakov
  2018-02-27 13:23 ` [PATCH v2 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 13:23 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

It is not possible for a primary process to receive any messages
while initializing, because RTE_MAGIC value is not set in the
shared config, and hence no secondary process can ever spin up
during that time.

However, it is possible for a secondary process to receive messages
from the primary during initialization. We can't just drop the
messages as they may be important, and also we might need to process
replies to our own requests (e.g. VFIO) during initialization.

Therefore, add a tailq for incoming messages, and queue them up
until initialization is complete, and process them in order they
arrived.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 50 +++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 3a1088e..b4d00c3 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -25,6 +25,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#include <rte_tailq.h>
 
 #include "eal_private.h"
 #include "eal_filesystem.h"
@@ -58,6 +59,18 @@ struct mp_msg_internal {
 	struct rte_mp_msg msg;
 };
 
+struct message_queue_entry {
+	TAILQ_ENTRY(message_queue_entry) next;
+	struct mp_msg_internal msg;
+	struct sockaddr_un sa;
+};
+
+/** Double linked list of received messages. */
+TAILQ_HEAD(message_queue, message_queue_entry);
+
+static struct message_queue message_queue =
+	TAILQ_HEAD_INITIALIZER(message_queue);
+
 struct sync_request {
 	TAILQ_ENTRY(sync_request) next;
 	int reply_received;
@@ -276,12 +289,39 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 static void *
 mp_handle(void *arg __rte_unused)
 {
-	struct mp_msg_internal msg;
-	struct sockaddr_un sa;
-
+	struct message_queue_entry *cur_msg, *next_msg, *new_msg = NULL;
 	while (1) {
-		if (read_msg(&msg, &sa) == 0)
-			process_msg(&msg, &sa);
+		/* we want to process all messages in order of their arrival,
+		 * but status of init_complete may change while we're iterating
+		 * the tailq. so, store it here and check once every iteration.
+		 */
+		int init_complete = internal_config.init_complete;
+
+		if (new_msg == NULL)
+			new_msg = malloc(sizeof(*new_msg));
+		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
+			/* we successfully read the message, so enqueue it */
+			TAILQ_INSERT_TAIL(&message_queue, new_msg, next);
+			new_msg = NULL;
+		} /* reuse new_msg for next message if we couldn't read_msg */
+
+		/* tailq only accessed here, so no locking needed */
+		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next, next_msg) {
+			/* secondary process should not process any incoming
+			 * requests until its initialization is complete, but
+			 * it is allowed to process replies to its own queries.
+			 */
+			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+					!init_complete &&
+					cur_msg->msg.type != MP_REP)
+				continue;
+
+			TAILQ_REMOVE(&message_queue, cur_msg, next);
+
+			process_msg(&cur_msg->msg, &cur_msg->sa);
+
+			free(cur_msg);
+		}
 	}
 
 	return NULL;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 3/5] eal: use locks to determine if secondary process is active
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
                   ` (4 preceding siblings ...)
  2018-02-27 13:23 ` [PATCH v2 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
@ 2018-02-27 13:23 ` Anatoly Burakov
  2018-02-27 13:23 ` [PATCH v2 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
  2018-02-27 13:23 ` [PATCH v2 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
  7 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 13:23 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Previously, IPC would remove sockets it considers to be "inactive"
based on whether they have responded. Change this to create lock
files in addition to socket files, so that we can determine if
secondary process is active before attempting to communicate with
it. That way, we can distinguish secondaries that are alive but
are not responding, from those that have already died.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 204 +++++++++++++++++++++++++++-----
 1 file changed, 175 insertions(+), 29 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index b4d00c3..17fded7 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -32,6 +33,7 @@
 #include "eal_internal_cfg.h"
 
 static int mp_fd = -1;
+static int lock_fd = -1;
 static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
 static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
@@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
+static void
+create_lockfile_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 1)
+		snprintf(buf, len, "%slock_%s", prefix, name);
+	else
+		snprintf(buf, len, "%slock", prefix);
+}
+
+static const char *
+get_peer_name(const char *socket_full_path)
+{
+	char buf[PATH_MAX] = {0};
+	int len;
+
+	/* primary process has no peer name */
+	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
+		return NULL;
+
+	/* construct dummy socket file name - make it one character long so that
+	 * we hit the code path where underscores are added
+	 */
+	create_socket_path("a", buf, sizeof(buf));
+
+	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
+	len = strlen(buf) - 1;
+	return &socket_full_path[len];
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -330,8 +372,29 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
+	char lockfile[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
+			 getpid(), rte_rdtsc());
+
+	/* try to create lockfile */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	lock_fd = open(lockfile, O_CREAT | O_RDWR);
+	if (lock_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	/* no need to downgrade to shared lock */
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -341,13 +404,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -359,6 +420,44 @@ open_socket_fd(void)
 	return mp_fd;
 }
 
+/* find corresponding lock file and try to lock it */
+static int
+socket_is_active(const char *peer_name)
+{
+	char lockfile[PATH_MAX] = {0};
+	int fd, ret = -1;
+
+	/* construct lockfile filename */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	/* try to lock it */
+	fd = open(lockfile, O_CREAT | O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	ret = flock(fd, LOCK_EX | LOCK_NB);
+	if (ret < 0) {
+		if (errno == EWOULDBLOCK) {
+			/* file is locked */
+			ret = 1;
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
+				strerror(errno));
+			ret = -1;
+		}
+	} else {
+		ret = 0;
+		/* unlink lockfile automatically */
+		unlink(lockfile);
+		flock(fd, LOCK_UN);
+	}
+	close(fd);
+
+	return ret;
+}
+
 static int
 unlink_sockets(const char *filter)
 {
@@ -374,28 +473,33 @@ unlink_sockets(const char *filter)
 	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
-		if (fnmatch(filter, ent->d_name, 0) == 0)
+		if (fnmatch(filter, ent->d_name, 0) == 0) {
+			const char *peer_name;
+			char path[PATH_MAX];
+			int ret;
+
+			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+				 ent->d_name);
+			peer_name = get_peer_name(path);
+
+			ret = socket_is_active(peer_name);
+			if (ret < 0) {
+				RTE_LOG(ERR, EAL, "Error getting socket active status\n");
+				return -1;
+			} else if (ret == 1) {
+				RTE_LOG(ERR, EAL, "Socket is active (old secondary process still running?)\n");
+				return -1;
+			}
+			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
+					ent->d_name);
 			unlinkat(dir_fd, ent->d_name, 0);
+		}
 	}
 
 	closedir(mp_dir);
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
@@ -485,10 +589,25 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
 		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
-			return 0;
+			const char *peer_name = get_peer_name(dst_path);
+			int active, ret = 0;
+
+			active = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+					socket_is_active(peer_name) :
+					rte_eal_primary_proc_alive(NULL);
+
+			if (active > 0) {
+				RTE_LOG(ERR, EAL, "Couldn't communicate with active peer\n");
+			} else if (active < 0) {
+				RTE_LOG(ERR, EAL, "Couldn't get peer status\n");
+				ret = -1;
+			} else if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+				/* peer isn't active anymore, so unlink its
+				 * socket.
+				 */
+				unlink(dst_path);
+			}
+			return ret;
 		}
 		if (errno == ENOBUFS) {
 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
@@ -506,7 +625,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -528,15 +647,28 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
+		const char *peer_name;
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
-		if (send_msg(path, msg, type) < 0)
+		peer_name = get_peer_name(path);
+
+		/* only send if we can expect to receive a reply, otherwise
+		 * remove the socket.
+		 */
+		active = socket_is_active(peer_name);
+		if (active < 0)
+			ret = -1;
+		else if (active == 0)
+			unlinkat(dir_fd, ent->d_name, 0);
+		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
 
@@ -661,7 +793,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -696,15 +828,29 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
+		const char *peer_name;
 		char path[PATH_MAX];
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
+		peer_name = get_peer_name(path);
+
+		active = socket_is_active(peer_name);
+
+		if (active < 0) {
+			ret = -1;
+			break;
+		} else if (active == 0) {
+			unlinkat(dir_fd, ent->d_name, 0);
+			continue;
+		}
 
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 4/5] eal: prevent secondary process init while sending messages
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
                   ` (5 preceding siblings ...)
  2018-02-27 13:23 ` [PATCH v2 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
@ 2018-02-27 13:23 ` Anatoly Burakov
  2018-02-27 13:23 ` [PATCH v2 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
  7 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 13:23 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, it is possible to spin up a secondary process while
either sendmsg or request is in progress. Fix this by adding
directory locks during init, sendmsg and requests.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2: added this patch

 lib/librte_eal/common/eal_common_proc.c | 47 ++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 17fded7..82ea4a7 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -505,6 +505,7 @@ rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char *path;
+	int dir_fd;
 	pthread_t tid;
 
 	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
@@ -514,14 +515,32 @@ rte_mp_channel_init(void)
 	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
 	free(path);
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 	    unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
@@ -534,6 +553,11 @@ rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -648,6 +672,14 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		return -1;
 	}
 	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 		const char *peer_name;
@@ -671,6 +703,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;
@@ -830,6 +864,15 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	}
 	dir_fd = dirfd(mp_dir);
 
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		const char *peer_name;
 		char path[PATH_MAX];
@@ -855,6 +898,8 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 5/5] eal: don't hardcode socket filter value in IPC
  2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
                   ` (6 preceding siblings ...)
  2018-02-27 13:23 ` [PATCH v2 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
@ 2018-02-27 13:23 ` Anatoly Burakov
  7 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 13:23 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, filter value is hardcoded and disconnected from actual
value returned by eal_mp_socket_path(). Fix this to generate filter
value by deriving it from eal_mp_socket_path() instead.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v2: added this patch

 lib/librte_eal/common/eal_common_proc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 82ea4a7..bdea6d6 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -504,16 +504,17 @@ int
 rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
+	char path[PATH_MAX];
 	int dir_fd;
 	pthread_t tid;
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
 	/* lock the directory */
 	dir_fd = open(mp_dir_path, O_RDONLY);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 1/5] eal: add internal flag indicating init has completed
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
@ 2018-02-27 14:35   ` Anatoly Burakov
  2018-02-28  2:12     ` Tan, Jianfeng
  2018-02-27 14:35   ` [PATCH v3 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 14:35 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    This patch is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/
    
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..4e2c2e6 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 2/5] eal: don't process IPC messages before init finished
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
  2018-02-27 14:35   ` [PATCH v3 " Anatoly Burakov
@ 2018-02-27 14:35   ` Anatoly Burakov
  2018-02-28  1:09     ` Tan, Jianfeng
  2018-02-28  4:00     ` Wiles, Keith
  2018-02-27 14:35   ` [PATCH v3 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
                     ` (2 subsequent siblings)
  4 siblings, 2 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 14:35 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

It is not possible for a primary process to receive any messages
while initializing, because RTE_MAGIC value is not set in the
shared config, and hence no secondary process can ever spin up
during that time.

However, it is possible for a secondary process to receive messages
from the primary during initialization. We can't just drop the
messages as they may be important, and also we might need to process
replies to our own requests (e.g. VFIO) during initialization.

Therefore, add a tailq for incoming messages, and queue them up
until initialization is complete, and process them in order they
arrived.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3: check for init_complete after receiving message
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 52 +++++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 3a1088e..a6e24e6 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -25,6 +25,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#include <rte_tailq.h>
 
 #include "eal_private.h"
 #include "eal_filesystem.h"
@@ -58,6 +59,18 @@ struct mp_msg_internal {
 	struct rte_mp_msg msg;
 };
 
+struct message_queue_entry {
+	TAILQ_ENTRY(message_queue_entry) next;
+	struct mp_msg_internal msg;
+	struct sockaddr_un sa;
+};
+
+/** Double linked list of received messages. */
+TAILQ_HEAD(message_queue, message_queue_entry);
+
+static struct message_queue message_queue =
+	TAILQ_HEAD_INITIALIZER(message_queue);
+
 struct sync_request {
 	TAILQ_ENTRY(sync_request) next;
 	int reply_received;
@@ -276,12 +289,41 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 static void *
 mp_handle(void *arg __rte_unused)
 {
-	struct mp_msg_internal msg;
-	struct sockaddr_un sa;
-
+	struct message_queue_entry *cur_msg, *next_msg, *new_msg = NULL;
 	while (1) {
-		if (read_msg(&msg, &sa) == 0)
-			process_msg(&msg, &sa);
+		/* we want to process all messages in order of their arrival,
+		 * but status of init_complete may change while we're iterating
+		 * the tailq. so, store it here and check once every iteration.
+		 */
+		int init_complete;
+
+		if (new_msg == NULL)
+			new_msg = malloc(sizeof(*new_msg));
+		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
+			/* we successfully read the message, so enqueue it */
+			TAILQ_INSERT_TAIL(&message_queue, new_msg, next);
+			new_msg = NULL;
+		} /* reuse new_msg for next message if we couldn't read_msg */
+
+		init_complete = internal_config.init_complete;
+
+		/* tailq only accessed here, so no locking needed */
+		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next, next_msg) {
+			/* secondary process should not process any incoming
+			 * requests until its initialization is complete, but
+			 * it is allowed to process replies to its own queries.
+			 */
+			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+					!init_complete &&
+					cur_msg->msg.type != MP_REP)
+				continue;
+
+			TAILQ_REMOVE(&message_queue, cur_msg, next);
+
+			process_msg(&cur_msg->msg, &cur_msg->sa);
+
+			free(cur_msg);
+		}
 	}
 
 	return NULL;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 3/5] eal: use locks to determine if secondary process is active
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
  2018-02-27 14:35   ` [PATCH v3 " Anatoly Burakov
  2018-02-27 14:35   ` [PATCH v3 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
@ 2018-02-27 14:35   ` Anatoly Burakov
  2018-02-28  1:26     ` Tan, Jianfeng
  2018-02-28  4:17     ` Wiles, Keith
  2018-02-27 14:35   ` [PATCH v3 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
  2018-02-27 14:35   ` [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
  4 siblings, 2 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 14:35 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Previously, IPC would remove sockets it considers to be "inactive"
based on whether they have responded. Change this to create lock
files in addition to socket files, so that we can determine if
secondary process is active before attempting to communicate with
it. That way, we can distinguish secondaries that are alive but
are not responding, from those that have already died.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 204 +++++++++++++++++++++++++++-----
 1 file changed, 175 insertions(+), 29 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index a6e24e6..7c87971 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -32,6 +33,7 @@
 #include "eal_internal_cfg.h"
 
 static int mp_fd = -1;
+static int lock_fd = -1;
 static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
 static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
@@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
+static void
+create_lockfile_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+	if (strlen(name) > 1)
+		snprintf(buf, len, "%slock_%s", prefix, name);
+	else
+		snprintf(buf, len, "%slock", prefix);
+}
+
+static const char *
+get_peer_name(const char *socket_full_path)
+{
+	char buf[PATH_MAX] = {0};
+	int len;
+
+	/* primary process has no peer name */
+	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
+		return NULL;
+
+	/* construct dummy socket file name - make it one character long so that
+	 * we hit the code path where underscores are added
+	 */
+	create_socket_path("a", buf, sizeof(buf));
+
+	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
+	len = strlen(buf) - 1;
+	return &socket_full_path[len];
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -332,8 +374,29 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
+	char lockfile[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
+			 getpid(), rte_rdtsc());
+
+	/* try to create lockfile */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	lock_fd = open(lockfile, O_CREAT | O_RDWR);
+	if (lock_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	/* no need to downgrade to shared lock */
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -343,13 +406,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -361,6 +422,44 @@ open_socket_fd(void)
 	return mp_fd;
 }
 
+/* find corresponding lock file and try to lock it */
+static int
+socket_is_active(const char *peer_name)
+{
+	char lockfile[PATH_MAX] = {0};
+	int fd, ret = -1;
+
+	/* construct lockfile filename */
+	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
+
+	/* try to lock it */
+	fd = open(lockfile, O_CREAT | O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
+			strerror(errno));
+		return -1;
+	}
+	ret = flock(fd, LOCK_EX | LOCK_NB);
+	if (ret < 0) {
+		if (errno == EWOULDBLOCK) {
+			/* file is locked */
+			ret = 1;
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
+				strerror(errno));
+			ret = -1;
+		}
+	} else {
+		ret = 0;
+		/* unlink lockfile automatically */
+		unlink(lockfile);
+		flock(fd, LOCK_UN);
+	}
+	close(fd);
+
+	return ret;
+}
+
 static int
 unlink_sockets(const char *filter)
 {
@@ -376,28 +475,33 @@ unlink_sockets(const char *filter)
 	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
-		if (fnmatch(filter, ent->d_name, 0) == 0)
+		if (fnmatch(filter, ent->d_name, 0) == 0) {
+			const char *peer_name;
+			char path[PATH_MAX];
+			int ret;
+
+			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+				 ent->d_name);
+			peer_name = get_peer_name(path);
+
+			ret = socket_is_active(peer_name);
+			if (ret < 0) {
+				RTE_LOG(ERR, EAL, "Error getting socket active status\n");
+				return -1;
+			} else if (ret == 1) {
+				RTE_LOG(ERR, EAL, "Socket is active (old secondary process still running?)\n");
+				return -1;
+			}
+			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
+					ent->d_name);
 			unlinkat(dir_fd, ent->d_name, 0);
+		}
 	}
 
 	closedir(mp_dir);
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
@@ -487,10 +591,25 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
 		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
-			return 0;
+			const char *peer_name = get_peer_name(dst_path);
+			int active, ret = 0;
+
+			active = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+					socket_is_active(peer_name) :
+					rte_eal_primary_proc_alive(NULL);
+
+			if (active > 0) {
+				RTE_LOG(ERR, EAL, "Couldn't communicate with active peer\n");
+			} else if (active < 0) {
+				RTE_LOG(ERR, EAL, "Couldn't get peer status\n");
+				ret = -1;
+			} else if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+				/* peer isn't active anymore, so unlink its
+				 * socket.
+				 */
+				unlink(dst_path);
+			}
+			return ret;
 		}
 		if (errno == ENOBUFS) {
 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
@@ -508,7 +627,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -530,15 +649,28 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
+		const char *peer_name;
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
-		if (send_msg(path, msg, type) < 0)
+		peer_name = get_peer_name(path);
+
+		/* only send if we can expect to receive a reply, otherwise
+		 * remove the socket.
+		 */
+		active = socket_is_active(peer_name);
+		if (active < 0)
+			ret = -1;
+		else if (active == 0)
+			unlinkat(dir_fd, ent->d_name, 0);
+		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
 
@@ -663,7 +795,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -698,15 +830,29 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		rte_errno = errno;
 		return -1;
 	}
+	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
+		const char *peer_name;
 		char path[PATH_MAX];
+		int active;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
+		peer_name = get_peer_name(path);
+
+		active = socket_is_active(peer_name);
+
+		if (active < 0) {
+			ret = -1;
+			break;
+		} else if (active == 0) {
+			unlinkat(dir_fd, ent->d_name, 0);
+			continue;
+		}
 
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 4/5] eal: prevent secondary process init while sending messages
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
                     ` (2 preceding siblings ...)
  2018-02-27 14:35   ` [PATCH v3 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
@ 2018-02-27 14:35   ` Anatoly Burakov
  2018-02-28  1:58     ` Tan, Jianfeng
  2018-02-27 14:35   ` [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
  4 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 14:35 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, it is possible to spin up a secondary process while
either sendmsg or request is in progress. Fix this by adding
directory locks during init, sendmsg and requests.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 47 ++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 7c87971..7856a7b 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -507,6 +507,7 @@ rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char *path;
+	int dir_fd;
 	pthread_t tid;
 
 	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
@@ -516,14 +517,32 @@ rte_mp_channel_init(void)
 	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
 	free(path);
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 	    unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
@@ -536,6 +555,11 @@ rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -650,6 +674,14 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		return -1;
 	}
 	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 		const char *peer_name;
@@ -673,6 +705,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		else if (active > 0 && send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;
@@ -832,6 +866,15 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	}
 	dir_fd = dirfd(mp_dir);
 
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		const char *peer_name;
 		char path[PATH_MAX];
@@ -857,6 +900,8 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
 	closedir(mp_dir);
 	return ret;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
  2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
                     ` (3 preceding siblings ...)
  2018-02-27 14:35   ` [PATCH v3 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
@ 2018-02-27 14:35   ` Anatoly Burakov
  2018-02-28  1:52     ` Tan, Jianfeng
  4 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-02-27 14:35 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan

Currently, filter value is hardcoded and disconnected from actual
value returned by eal_mp_socket_path(). Fix this to generate filter
value by deriving it from eal_mp_socket_path() instead.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 7856a7b..4cf3aa6 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -506,16 +506,17 @@ int
 rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
+	char path[PATH_MAX];
 	int dir_fd;
 	pthread_t tid;
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
 	/* lock the directory */
 	dir_fd = open(mp_dir_path, O_RDONLY);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 2/5] eal: don't process IPC messages before init finished
  2018-02-27 14:35   ` [PATCH v3 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
@ 2018-02-28  1:09     ` Tan, Jianfeng
  2018-02-28  9:45       ` Burakov, Anatoly
  2018-02-28  4:00     ` Wiles, Keith
  1 sibling, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28  1:09 UTC (permalink / raw)
  To: Burakov, Anatoly, dev



> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 2/5] eal: don't process IPC messages before init finished
> 
> It is not possible for a primary process to receive any messages
> while initializing, because RTE_MAGIC value is not set in the
> shared config, and hence no secondary process can ever spin up
> during that time.
> 
> However, it is possible for a secondary process to receive messages
> from the primary during initialization. We can't just drop the
> messages as they may be important, and also we might need to process
> replies to our own requests (e.g. VFIO) during initialization.
> 
> Therefore, add a tailq for incoming messages, and queue them up
> until initialization is complete, and process them in order they
> arrived.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v3: check for init_complete after receiving message
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_proc.c | 52
> +++++++++++++++++++++++++++++----
>  1 file changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index 3a1088e..a6e24e6 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -25,6 +25,7 @@
>  #include <rte_errno.h>
>  #include <rte_lcore.h>
>  #include <rte_log.h>
> +#include <rte_tailq.h>
> 
>  #include "eal_private.h"
>  #include "eal_filesystem.h"
> @@ -58,6 +59,18 @@ struct mp_msg_internal {
>  	struct rte_mp_msg msg;
>  };
> 
> +struct message_queue_entry {
> +	TAILQ_ENTRY(message_queue_entry) next;
> +	struct mp_msg_internal msg;
> +	struct sockaddr_un sa;
> +};
> +
> +/** Double linked list of received messages. */
> +TAILQ_HEAD(message_queue, message_queue_entry);
> +
> +static struct message_queue message_queue =
> +	TAILQ_HEAD_INITIALIZER(message_queue);
> +
>  struct sync_request {
>  	TAILQ_ENTRY(sync_request) next;
>  	int reply_received;
> @@ -276,12 +289,41 @@ process_msg(struct mp_msg_internal *m, struct
> sockaddr_un *s)
>  static void *
>  mp_handle(void *arg __rte_unused)
>  {
> -	struct mp_msg_internal msg;
> -	struct sockaddr_un sa;
> -
> +	struct message_queue_entry *cur_msg, *next_msg, *new_msg =
> NULL;
>  	while (1) {
> -		if (read_msg(&msg, &sa) == 0)
> -			process_msg(&msg, &sa);
> +		/* we want to process all messages in order of their arrival,
> +		 * but status of init_complete may change while we're
> iterating
> +		 * the tailq. so, store it here and check once every iteration.
> +		 */
> +		int init_complete;
> +
> +		if (new_msg == NULL)
> +			new_msg = malloc(sizeof(*new_msg));
> +		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {

Suppose a case that: req and msg received but init not completed, so we enqueue all of them in the tailq; and from now on, no req/rep/msg comes. Then mp thread will hang here for reading new message.
In such a case, we might need the master thread to signal mp thread to wake up when init is completed?

Thanks,
Jianfeng

> +			/* we successfully read the message, so enqueue it
> */
> +			TAILQ_INSERT_TAIL(&message_queue, new_msg,
> next);
> +			new_msg = NULL;
> +		} /* reuse new_msg for next message if we couldn't
> read_msg */
> +
> +		init_complete = internal_config.init_complete;
> +
> +		/* tailq only accessed here, so no locking needed */
> +		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next,
> next_msg) {
> +			/* secondary process should not process any
> incoming
> +			 * requests until its initialization is complete, but
> +			 * it is allowed to process replies to its own queries.
> +			 */
> +			if (rte_eal_process_type() ==
> RTE_PROC_SECONDARY &&
> +					!init_complete &&
> +					cur_msg->msg.type != MP_REP)
> +				continue;
> +
> +			TAILQ_REMOVE(&message_queue, cur_msg, next);
> +
> +			process_msg(&cur_msg->msg, &cur_msg->sa);
> +
> +			free(cur_msg);
> +		}
>  	}
> 
>  	return NULL;
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 3/5] eal: use locks to determine if secondary process is active
  2018-02-27 14:35   ` [PATCH v3 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
@ 2018-02-28  1:26     ` Tan, Jianfeng
  2018-02-28 10:15       ` Burakov, Anatoly
  2018-02-28  4:17     ` Wiles, Keith
  1 sibling, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28  1:26 UTC (permalink / raw)
  To: Burakov, Anatoly, dev



> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 3/5] eal: use locks to determine if secondary process is
> active
> 
> Previously, IPC would remove sockets it considers to be "inactive"
> based on whether they have responded.

To be more precise, it was not depending on if the other side responses or not; it was depending on sendmsg return error, ECONNREFUSED.

> Change this to create lock
> files in addition to socket files, so that we can determine if
> secondary process is active before attempting to communicate with
> it. That way, we can distinguish secondaries that are alive but
> are not responding, from those that have already died.

I think, by the old way, we can also "distinguish secondaries that are alive but are not responding, from those that have already died", can't we?

Thanks,
Jianfeng

> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v3: no changes
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_proc.c | 204
> +++++++++++++++++++++++++++-----
>  1 file changed, 175 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index a6e24e6..7c87971 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -13,6 +13,7 @@
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> +#include <sys/file.h>
>  #include <sys/time.h>
>  #include <sys/types.h>
>  #include <sys/socket.h>
> @@ -32,6 +33,7 @@
>  #include "eal_internal_cfg.h"
> 
>  static int mp_fd = -1;
> +static int lock_fd = -1;
>  static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
>  static char mp_dir_path[PATH_MAX]; /* The directory path for all mp
> sockets */
>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
> @@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char
> *act_name)
>  	return r;
>  }
> 
> +static void
> +create_socket_path(const char *name, char *buf, int len)
> +{
> +	const char *prefix = eal_mp_socket_path();
> +	if (strlen(name) > 0)
> +		snprintf(buf, len, "%s_%s", prefix, name);
> +	else
> +		snprintf(buf, len, "%s", prefix);
> +}
> +
> +static void
> +create_lockfile_path(const char *name, char *buf, int len)
> +{
> +	const char *prefix = eal_mp_socket_path();
> +	if (strlen(name) > 1)
> +		snprintf(buf, len, "%slock_%s", prefix, name);
> +	else
> +		snprintf(buf, len, "%slock", prefix);
> +}
> +
> +static const char *
> +get_peer_name(const char *socket_full_path)
> +{
> +	char buf[PATH_MAX] = {0};
> +	int len;
> +
> +	/* primary process has no peer name */
> +	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
> +		return NULL;
> +
> +	/* construct dummy socket file name - make it one character long so
> that
> +	 * we hit the code path where underscores are added
> +	 */
> +	create_socket_path("a", buf, sizeof(buf));
> +
> +	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
> +	len = strlen(buf) - 1;
> +	return &socket_full_path[len];
> +}
> +
>  int
>  rte_eal_primary_proc_alive(const char *config_file_path)
>  {
> @@ -332,8 +374,29 @@ mp_handle(void *arg __rte_unused)
>  static int
>  open_socket_fd(void)
>  {
> +	char peer_name[PATH_MAX] = {0};
> +	char lockfile[PATH_MAX] = {0};
>  	struct sockaddr_un un;
> -	const char *prefix = eal_mp_socket_path();
> +
> +	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
> +		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
> +			 getpid(), rte_rdtsc());
> +
> +	/* try to create lockfile */
> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
> +
> +	lock_fd = open(lockfile, O_CREAT | O_RDWR);
> +	if (lock_fd < 0) {
> +		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
> +			strerror(errno));
> +		return -1;
> +	}
> +	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
> +		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
> +			strerror(errno));
> +		return -1;
> +	}
> +	/* no need to downgrade to shared lock */
> 
>  	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
>  	if (mp_fd < 0) {
> @@ -343,13 +406,11 @@ open_socket_fd(void)
> 
>  	memset(&un, 0, sizeof(un));
>  	un.sun_family = AF_UNIX;
> -	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
> -	else {
> -		snprintf(un.sun_path, sizeof(un.sun_path),
> "%s_%d_%"PRIx64,
> -			 prefix, getpid(), rte_rdtsc());
> -	}
> +
> +	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
> +
>  	unlink(un.sun_path); /* May still exist since last run */
> +
>  	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
>  		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
>  			un.sun_path, strerror(errno));
> @@ -361,6 +422,44 @@ open_socket_fd(void)
>  	return mp_fd;
>  }
> 
> +/* find corresponding lock file and try to lock it */
> +static int
> +socket_is_active(const char *peer_name)
> +{
> +	char lockfile[PATH_MAX] = {0};
> +	int fd, ret = -1;
> +
> +	/* construct lockfile filename */
> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
> +
> +	/* try to lock it */
> +	fd = open(lockfile, O_CREAT | O_RDWR);
> +	if (fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
> +			strerror(errno));
> +		return -1;
> +	}
> +	ret = flock(fd, LOCK_EX | LOCK_NB);
> +	if (ret < 0) {
> +		if (errno == EWOULDBLOCK) {
> +			/* file is locked */
> +			ret = 1;
> +		} else {
> +			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
> +				strerror(errno));
> +			ret = -1;
> +		}
> +	} else {
> +		ret = 0;
> +		/* unlink lockfile automatically */
> +		unlink(lockfile);
> +		flock(fd, LOCK_UN);
> +	}
> +	close(fd);
> +
> +	return ret;
> +}
> +
>  static int
>  unlink_sockets(const char *filter)
>  {
> @@ -376,28 +475,33 @@ unlink_sockets(const char *filter)
>  	dir_fd = dirfd(mp_dir);
> 
>  	while ((ent = readdir(mp_dir))) {
> -		if (fnmatch(filter, ent->d_name, 0) == 0)
> +		if (fnmatch(filter, ent->d_name, 0) == 0) {
> +			const char *peer_name;
> +			char path[PATH_MAX];
> +			int ret;
> +
> +			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> +				 ent->d_name);
> +			peer_name = get_peer_name(path);
> +
> +			ret = socket_is_active(peer_name);
> +			if (ret < 0) {
> +				RTE_LOG(ERR, EAL, "Error getting socket
> active status\n");
> +				return -1;
> +			} else if (ret == 1) {
> +				RTE_LOG(ERR, EAL, "Socket is active (old
> secondary process still running?)\n");
> +				return -1;
> +			}
> +			RTE_LOG(DEBUG, EAL, "Removing stale socket file
> '%s'\n",
> +					ent->d_name);
>  			unlinkat(dir_fd, ent->d_name, 0);
> +		}
>  	}
> 
>  	closedir(mp_dir);
>  	return 0;
>  }
> 
> -static void
> -unlink_socket_by_path(const char *path)
> -{
> -	char *filename;
> -	char *fullpath = strdup(path);
> -
> -	if (!fullpath)
> -		return;
> -	filename = basename(fullpath);
> -	unlink_sockets(filename);
> -	free(fullpath);
> -	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
> -}
> -
>  int
>  rte_mp_channel_init(void)
>  {
> @@ -487,10 +591,25 @@ send_msg(const char *dst_path, struct
> rte_mp_msg *msg, int type)
>  		rte_errno = errno;
>  		/* Check if it caused by peer process exits */
>  		if (errno == ECONNREFUSED) {
> -			/* We don't unlink the primary's socket here */
> -			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -				unlink_socket_by_path(dst_path);
> -			return 0;
> +			const char *peer_name = get_peer_name(dst_path);
> +			int active, ret = 0;
> +
> +			active = rte_eal_process_type() ==
> RTE_PROC_PRIMARY ?
> +					socket_is_active(peer_name) :
> +					rte_eal_primary_proc_alive(NULL);
> +
> +			if (active > 0) {
> +				RTE_LOG(ERR, EAL, "Couldn't communicate
> with active peer\n");
> +			} else if (active < 0) {
> +				RTE_LOG(ERR, EAL, "Couldn't get peer
> status\n");
> +				ret = -1;
> +			} else if (rte_eal_process_type() ==
> RTE_PROC_PRIMARY) {
> +				/* peer isn't active anymore, so unlink its
> +				 * socket.
> +				 */
> +				unlink(dst_path);
> +			}
> +			return ret;
>  		}
>  		if (errno == ENOBUFS) {
>  			RTE_LOG(ERR, EAL, "Peer cannot receive
> message %s\n",
> @@ -508,7 +627,7 @@ send_msg(const char *dst_path, struct rte_mp_msg
> *msg, int type)
>  static int
>  mp_send(struct rte_mp_msg *msg, const char *peer, int type)
>  {
> -	int ret = 0;
> +	int dir_fd, ret = 0;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> 
> @@ -530,15 +649,28 @@ mp_send(struct rte_mp_msg *msg, const char
> *peer, int type)
>  		rte_errno = errno;
>  		return -1;
>  	}
> +	dir_fd = dirfd(mp_dir);
>  	while ((ent = readdir(mp_dir))) {
>  		char path[PATH_MAX];
> +		const char *peer_name;
> +		int active;
> 
>  		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
>  			continue;
> 
>  		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
>  			 ent->d_name);
> -		if (send_msg(path, msg, type) < 0)
> +		peer_name = get_peer_name(path);
> +
> +		/* only send if we can expect to receive a reply, otherwise
> +		 * remove the socket.
> +		 */
> +		active = socket_is_active(peer_name);
> +		if (active < 0)
> +			ret = -1;
> +		else if (active == 0)
> +			unlinkat(dir_fd, ent->d_name, 0);
> +		else if (active > 0 && send_msg(path, msg, type) < 0)
>  			ret = -1;
>  	}
> 
> @@ -663,7 +795,7 @@ int __rte_experimental
>  rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
>  		const struct timespec *ts)
>  {
> -	int ret = 0;
> +	int dir_fd, ret = 0;
>  	DIR *mp_dir;
>  	struct dirent *ent;
>  	struct timeval now;
> @@ -698,15 +830,29 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  		rte_errno = errno;
>  		return -1;
>  	}
> +	dir_fd = dirfd(mp_dir);
> 
>  	while ((ent = readdir(mp_dir))) {
> +		const char *peer_name;
>  		char path[PATH_MAX];
> +		int active;
> 
>  		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
>  			continue;
> 
>  		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
>  			 ent->d_name);
> +		peer_name = get_peer_name(path);
> +
> +		active = socket_is_active(peer_name);
> +
> +		if (active < 0) {
> +			ret = -1;
> +			break;
> +		} else if (active == 0) {
> +			unlinkat(dir_fd, ent->d_name, 0);
> +			continue;
> +		}
> 
>  		if (mp_request_one(path, req, reply, &end))
>  			ret = -1;
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
  2018-02-27 14:35   ` [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
@ 2018-02-28  1:52     ` Tan, Jianfeng
  2018-02-28 10:21       ` Burakov, Anatoly
  0 siblings, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28  1:52 UTC (permalink / raw)
  To: Burakov, Anatoly, dev

Hi Anatoly,

> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
> 
> Currently, filter value is hardcoded and disconnected from actual
> value returned by eal_mp_socket_path().

I can understand the hardcode is not good. But why it's not consistent with the actual value returned from eal_mp_socket_path()?

> Fix this to generate filter
> value by deriving it from eal_mp_socket_path() instead.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Anyway, I think your way looks good to me, so

Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks,
Jianfeng

> ---
> 
> Notes:
>     v3: no changes
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_proc.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index 7856a7b..4cf3aa6 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -506,16 +506,17 @@ int
>  rte_mp_channel_init(void)
>  {
>  	char thread_name[RTE_MAX_THREAD_NAME_LEN];
> -	char *path;
> +	char path[PATH_MAX];
>  	int dir_fd;
>  	pthread_t tid;
> 
> -	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
> -		 internal_config.hugefile_prefix);
> +	/* create filter path */
> +	create_socket_path("*", path, sizeof(path));
> +	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
> 
> -	path = strdup(eal_mp_socket_path());
> -	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
> -	free(path);
> +	/* path may have been modified, so recreate it */
> +	create_socket_path("*", path, sizeof(path));
> +	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
> 
>  	/* lock the directory */
>  	dir_fd = open(mp_dir_path, O_RDONLY);
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 4/5] eal: prevent secondary process init while sending messages
  2018-02-27 14:35   ` [PATCH v3 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
@ 2018-02-28  1:58     ` Tan, Jianfeng
  2018-02-28 10:19       ` Burakov, Anatoly
  0 siblings, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28  1:58 UTC (permalink / raw)
  To: Burakov, Anatoly, dev

Hi Anatoly,

> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
> messages
> 
> Currently, it is possible to spin up a secondary process while
> either sendmsg or request is in progress. Fix this by adding
> directory locks during init, sendmsg and requests.

Could you give a more detailed example for this issue?

And why locking the directory can help?

Thanks,
Jianfeng

> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v3: no changes
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_proc.c | 47
> ++++++++++++++++++++++++++++++++-
>  1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> index 7c87971..7856a7b 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -507,6 +507,7 @@ rte_mp_channel_init(void)
>  {
>  	char thread_name[RTE_MAX_THREAD_NAME_LEN];
>  	char *path;
> +	int dir_fd;
>  	pthread_t tid;
> 
>  	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
> @@ -516,14 +517,32 @@ rte_mp_channel_init(void)
>  	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
>  	free(path);
> 
> +	/* lock the directory */
> +	dir_fd = open(mp_dir_path, O_RDONLY);
> +	if (dir_fd < 0) {
> +		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
> +			mp_dir_path, strerror(errno));
> +		return -1;
> +	}
> +
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
> +			mp_dir_path, strerror(errno));
> +		close(dir_fd);
> +		return -1;
> +	}
> +
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>  	    unlink_sockets(mp_filter)) {
>  		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
> +		close(dir_fd);
>  		return -1;
>  	}
> 
> -	if (open_socket_fd() < 0)
> +	if (open_socket_fd() < 0) {
> +		close(dir_fd);
>  		return -1;
> +	}
> 
>  	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
>  		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
> @@ -536,6 +555,11 @@ rte_mp_channel_init(void)
>  	/* try best to set thread name */
>  	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> "rte_mp_handle");
>  	rte_thread_setname(tid, thread_name);
> +
> +	/* unlock the directory */
> +	flock(dir_fd, LOCK_UN);
> +	close(dir_fd);
> +
>  	return 0;
>  }
> 
> @@ -650,6 +674,14 @@ mp_send(struct rte_mp_msg *msg, const char
> *peer, int type)
>  		return -1;
>  	}
>  	dir_fd = dirfd(mp_dir);
> +	/* lock the directory to prevent processes spinning up while we send
> */
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> +			mp_dir_path);
> +		rte_errno = errno;
> +		closedir(mp_dir);
> +		return -1;
> +	}
>  	while ((ent = readdir(mp_dir))) {
>  		char path[PATH_MAX];
>  		const char *peer_name;
> @@ -673,6 +705,8 @@ mp_send(struct rte_mp_msg *msg, const char *peer,
> int type)
>  		else if (active > 0 && send_msg(path, msg, type) < 0)
>  			ret = -1;
>  	}
> +	/* unlock the dir */
> +	flock(dir_fd, LOCK_UN);
> 
>  	closedir(mp_dir);
>  	return ret;
> @@ -832,6 +866,15 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  	}
>  	dir_fd = dirfd(mp_dir);
> 
> +	/* lock the directory to prevent processes spinning up while we send
> */
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> +			mp_dir_path);
> +		closedir(mp_dir);
> +		rte_errno = errno;
> +		return -1;
> +	}
> +
>  	while ((ent = readdir(mp_dir))) {
>  		const char *peer_name;
>  		char path[PATH_MAX];
> @@ -857,6 +900,8 @@ rte_mp_request(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  		if (mp_request_one(path, req, reply, &end))
>  			ret = -1;
>  	}
> +	/* unlock the directory */
> +	flock(dir_fd, LOCK_UN);
> 
>  	closedir(mp_dir);
>  	return ret;
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 1/5] eal: add internal flag indicating init has completed
  2018-02-27 14:35   ` [PATCH v3 " Anatoly Burakov
@ 2018-02-28  2:12     ` Tan, Jianfeng
  2018-02-28  9:43       ` Burakov, Anatoly
  0 siblings, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28  2:12 UTC (permalink / raw)
  To: Burakov, Anatoly, dev

Hi Anatoly,

> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Tuesday, February 27, 2018 10:36 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng
> Subject: [PATCH v3 1/5] eal: add internal flag indicating init has completed
> 
> Currently, primary process initialization is finalized by setting
> the RTE_MAGIC value in the shared config. However, it is not
> possible to check whether secondary process initialization has
> completed. Add such a value to internal config.

A nit:
"check whether secondary process initialization has completed" sounds like checking comes from another process.
Does it look better, "check whether initialization has completed in secondary process"?

> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks,
Jianfeng

> ---
> 
> Notes:
>     This patch is dependent upon earlier IPC fixes patchset [1].
> 
>     [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/
> 
>     v3: no changes
> 
>     v2: no changes
> 
>  lib/librte_eal/common/eal_common_options.c | 1 +
>  lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
>  lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
>  3 files changed, 5 insertions(+)
> 
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 9f2f8d2..0be80cb 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config
> *internal_cfg)
>  	internal_cfg->vmware_tsc_map = 0;
>  	internal_cfg->create_uio_dev = 0;
>  	internal_cfg->user_mbuf_pool_ops_name = NULL;
> +	internal_cfg->init_complete = 0;
>  }
> 
>  static int
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h
> b/lib/librte_eal/common/eal_internal_cfg.h
> index 1169fcc..4e2c2e6 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -56,6 +56,8 @@ struct internal_config {
>  			/**< user defined mbuf pool ops name */
>  	unsigned num_hugepage_sizes;      /**< how many sizes on this
> system */
>  	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
> +	unsigned int init_complete;
> +	/**< indicates whether EAL has completed initialization */
>  };
>  extern struct internal_config internal_config; /**< Global EAL configuration.
> */
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 38306bf..2ecd07b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
>  	/* ALL shared mem_config related INIT DONE */
>  	if (rte_config.process_type == RTE_PROC_PRIMARY)
>  		rte_config.mem_config->magic = RTE_MAGIC;
> +
> +	internal_config.init_complete = 1;
>  }
> 
>  /*
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 2/5] eal: don't process IPC messages before init finished
  2018-02-27 14:35   ` [PATCH v3 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
  2018-02-28  1:09     ` Tan, Jianfeng
@ 2018-02-28  4:00     ` Wiles, Keith
  2018-02-28  9:47       ` Burakov, Anatoly
  1 sibling, 1 reply; 57+ messages in thread
From: Wiles, Keith @ 2018-02-28  4:00 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev, Tan, Jianfeng



> 
> +	struct message_queue_entry *cur_msg, *next_msg, *new_msg = NULL;
> 	while (1) {
> -		if (read_msg(&msg, &sa) == 0)
> -			process_msg(&msg, &sa);
> +		/* we want to process all messages in order of their arrival,
> +		 * but status of init_complete may change while we're iterating
> +		 * the tailq. so, store it here and check once every iteration.
> +		 */
> +		int init_complete;

Do we allow variables to be defined in the middle of a block, I thought we only allowed them after a ‘{‘ or open block.

> +
> +		if (new_msg == NULL)
> +			new_msg = malloc(sizeof(*new_msg));

I am very concerned about allocating memory with no limit. If the process never completes then we could receive messages and consume a lot of memory. I would want to see a limit to the number of messages we can consume in the queue just to be sure.

> +		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
> +			/* we successfully read the message, so enqueue it */
> +			TAILQ_INSERT_TAIL(&message_queue, new_msg, next);
> +			new_msg = NULL;
> +		} /* reuse new_msg for next message if we couldn't read_msg */
> +
> +		init_complete = internal_config.init_complete;

Does the internal_config.init_complete need to be a volatile to make sure it is reread each time thru the loop?

> +
> +		/* tailq only accessed here, so no locking needed */
> +		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next, next_msg) {
> +			/* secondary process should not process any incoming
> +			 * requests until its initialization is complete, but
> +			 * it is allowed to process replies to its own queries.
> +			 */
> +			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
> +					!init_complete &&
> +					cur_msg->msg.type != MP_REP)
> +				continue;
> +
> +			TAILQ_REMOVE(&message_queue, cur_msg, next);
> +
> +			process_msg(&cur_msg->msg, &cur_msg->sa);
> +
> +			free(cur_msg);
> +		}
> 	}
> 
> 	return NULL;
> -- 
> 2.7.4

Regards,
Keith


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 3/5] eal: use locks to determine if secondary process is active
  2018-02-27 14:35   ` [PATCH v3 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
  2018-02-28  1:26     ` Tan, Jianfeng
@ 2018-02-28  4:17     ` Wiles, Keith
  2018-02-28 10:17       ` Burakov, Anatoly
  1 sibling, 1 reply; 57+ messages in thread
From: Wiles, Keith @ 2018-02-28  4:17 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev, Tan, Jianfeng



> On Feb 27, 2018, at 8:35 AM, Anatoly Burakov <anatoly.burakov@intel.com> wrote:
> 
> Previously, IPC would remove sockets it considers to be "inactive"
> based on whether they have responded. Change this to create lock
> files in addition to socket files, so that we can determine if
> secondary process is active before attempting to communicate with
> it. That way, we can distinguish secondaries that are alive but
> are not responding, from those that have already died.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>    v3: no changes
> 
>    v2: no changes
> 
> lib/librte_eal/common/eal_common_proc.c | 204 +++++++++++++++++++++++++++-----
> 1 file changed, 175 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
> index a6e24e6..7c87971 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -13,6 +13,7 @@
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <sys/file.h>
> #include <sys/time.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> @@ -32,6 +33,7 @@
> #include "eal_internal_cfg.h"
> 
> static int mp_fd = -1;
> +static int lock_fd = -1;

I did not find where lock_fd is closed in this code, should it be closed at some point?

> static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
> static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
> static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
> @@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char *act_name)
> 	return r;
> }
> 
> +static void
> +create_socket_path(const char *name, char *buf, int len)
> +{
> +	const char *prefix = eal_mp_socket_path();

I thought we needed a space here after the variables, no?

> +	if (strlen(name) > 0)
> +		snprintf(buf, len, "%s_%s", prefix, name);
> +	else
> +		snprintf(buf, len, "%s", prefix);
> +}
> +
> +static void
> +create_lockfile_path(const char *name, char *buf, int len)
> +{
> +	const char *prefix = eal_mp_socket_path();

Same here
> +	if (strlen(name) > 1)
> +		snprintf(buf, len, "%slock_%s", prefix, name);
> +	else
> +		snprintf(buf, len, "%slock", prefix);
> +}
> +
> +static const char *
> +get_peer_name(const char *socket_full_path)
> +{
> +	char buf[PATH_MAX] = {0};
> +	int len;
> +
> +	/* primary process has no peer name */
> +	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
> +		return NULL;
> +
> +	/* construct dummy socket file name - make it one character long so that
> +	 * we hit the code path where underscores are added
> +	 */
> +	create_socket_path("a", buf, sizeof(buf));
> +
> +	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
> +	len = strlen(buf) - 1;
> +	return &socket_full_path[len];
> +}
> +
> int
> rte_eal_primary_proc_alive(const char *config_file_path)
> {
> @@ -332,8 +374,29 @@ mp_handle(void *arg __rte_unused)
> static int
> open_socket_fd(void)
> {
> +	char peer_name[PATH_MAX] = {0};
> +	char lockfile[PATH_MAX] = {0};
> 	struct sockaddr_un un;
> -	const char *prefix = eal_mp_socket_path();
> +
> +	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
> +		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
> +			 getpid(), rte_rdtsc());
> +
> +	/* try to create lockfile */
> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
> +
> +	lock_fd = open(lockfile, O_CREAT | O_RDWR);
> +	if (lock_fd < 0) {
> +		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
> +			strerror(errno));
> +		return -1;
> +	}
> +	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
> +		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
> +			strerror(errno));

Did not close the lock_fd here.

> +		return -1;
> +	}
> +	/* no need to downgrade to shared lock */
> 
> 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
> 	if (mp_fd < 0) {
> @@ -343,13 +406,11 @@ open_socket_fd(void)
> 
> 	memset(&un, 0, sizeof(un));
> 	un.sun_family = AF_UNIX;
> -	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
> -	else {
> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
> -			 prefix, getpid(), rte_rdtsc());
> -	}
> +
> +	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
> +
> 	unlink(un.sun_path); /* May still exist since last run */
> +
> 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
> 			un.sun_path, strerror(errno));
> @@ -361,6 +422,44 @@ open_socket_fd(void)
> 	return mp_fd;
> }
> 
> +/* find corresponding lock file and try to lock it */
> +static int
> +socket_is_active(const char *peer_name)
> +{
> +	char lockfile[PATH_MAX] = {0};
> +	int fd, ret = -1;
> +
> +	/* construct lockfile filename */
> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
> +
> +	/* try to lock it */
> +	fd = open(lockfile, O_CREAT | O_RDWR);
> +	if (fd < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
> +			strerror(errno));
> +		return -1;
> +	}
> +	ret = flock(fd, LOCK_EX | LOCK_NB);
> +	if (ret < 0) {
> +		if (errno == EWOULDBLOCK) {
> +			/* file is locked */
> +			ret = 1;
> +		} else {
> +			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
> +				strerror(errno));
> +			ret = -1;
> +		}
> +	} else {
> +		ret = 0;
> +		/* unlink lockfile automatically */
> +		unlink(lockfile);
> +		flock(fd, LOCK_UN);
> +	}
> +	close(fd);
> +
> +	return ret;
> +}
> +
> static int
> unlink_sockets(const char *filter)
> {
> @@ -376,28 +475,33 @@ unlink_sockets(const char *filter)
> 	dir_fd = dirfd(mp_dir);
> 
> 	while ((ent = readdir(mp_dir))) {
> -		if (fnmatch(filter, ent->d_name, 0) == 0)
> +		if (fnmatch(filter, ent->d_name, 0) == 0) {
> +			const char *peer_name;
> +			char path[PATH_MAX];
> +			int ret;
> +
> +			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> +				 ent->d_name);
> +			peer_name = get_peer_name(path);
> +
> +			ret = socket_is_active(peer_name);
> +			if (ret < 0) {
> +				RTE_LOG(ERR, EAL, "Error getting socket active status\n”);

No close for mp_dir?
> +				return -1;
> +			} else if (ret == 1) {
> +				RTE_LOG(ERR, EAL, "Socket is active (old secondary process still running?)\n”);

And here
> +				return -1;
> +			}
> +			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
> +					ent->d_name);
> 			unlinkat(dir_fd, ent->d_name, 0);

Does unlinkat() close dir_fd?

> +		}
> 	}
> 
> 	closedir(mp_dir);
> 	return 0;
> }
> 
> -static void
> -unlink_socket_by_path(const char *path)
> -{
> -	char *filename;
> -	char *fullpath = strdup(path);
> -
> -	if (!fullpath)
> -		return;
> -	filename = basename(fullpath);
> -	unlink_sockets(filename);
> -	free(fullpath);
> -	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
> -}
> -
> int
> rte_mp_channel_init(void)
> {
> @@ -487,10 +591,25 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
> 		rte_errno = errno;
> 		/* Check if it caused by peer process exits */
> 		if (errno == ECONNREFUSED) {
> -			/* We don't unlink the primary's socket here */
> -			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -				unlink_socket_by_path(dst_path);
> -			return 0;
> +			const char *peer_name = get_peer_name(dst_path);
> +			int active, ret = 0;

Here we have variables in the middle of a block again.

> +
> +			active = rte_eal_process_type() == RTE_PROC_PRIMARY ?
> +					socket_is_active(peer_name) :
> +					rte_eal_primary_proc_alive(NULL);
> +
> +			if (active > 0) {
> +				RTE_LOG(ERR, EAL, "Couldn't communicate with active peer\n");
> +			} else if (active < 0) {
> +				RTE_LOG(ERR, EAL, "Couldn't get peer status\n");
> +				ret = -1;
> +			} else if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +				/* peer isn't active anymore, so unlink its
> +				 * socket.
> +				 */
> +				unlink(dst_path);
> +			}
> +			return ret;
> 		}
> 		if (errno == ENOBUFS) {
> 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
> @@ -508,7 +627,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
> static int
> mp_send(struct rte_mp_msg *msg, const char *peer, int type)
> {
> -	int ret = 0;
> +	int dir_fd, ret = 0;
> 	DIR *mp_dir;
> 	struct dirent *ent;
> 
> @@ -530,15 +649,28 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
> 		rte_errno = errno;
> 		return -1;
> 	}
> +	dir_fd = dirfd(mp_dir);

when is dir_fd closed?

> 	while ((ent = readdir(mp_dir))) {
> 		char path[PATH_MAX];
> +		const char *peer_name;
> +		int active;
> 
> 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
> 			continue;
> 
> 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> 			 ent->d_name);
> -		if (send_msg(path, msg, type) < 0)
> +		peer_name = get_peer_name(path);
> +
> +		/* only send if we can expect to receive a reply, otherwise
> +		 * remove the socket.
> +		 */
> +		active = socket_is_active(peer_name);
> +		if (active < 0)
> +			ret = -1;
> +		else if (active == 0)
> +			unlinkat(dir_fd, ent->d_name, 0);
> +		else if (active > 0 && send_msg(path, msg, type) < 0)
> 			ret = -1;
> 	}
> 
> @@ -663,7 +795,7 @@ int __rte_experimental
> rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
> 		const struct timespec *ts)
> {
> -	int ret = 0;
> +	int dir_fd, ret = 0;
> 	DIR *mp_dir;
> 	struct dirent *ent;
> 	struct timeval now;
> @@ -698,15 +830,29 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
> 		rte_errno = errno;
> 		return -1;
> 	}
> +	dir_fd = dirfd(mp_dir);

When is dir_fd closed?

> 
> 	while ((ent = readdir(mp_dir))) {
> +		const char *peer_name;
> 		char path[PATH_MAX];
> +		int active;
> 
> 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
> 			continue;
> 
> 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> 			 ent->d_name);
> +		peer_name = get_peer_name(path);
> +
> +		active = socket_is_active(peer_name);
> +
> +		if (active < 0) {
> +			ret = -1;
> +			break;
> +		} else if (active == 0) {
> +			unlinkat(dir_fd, ent->d_name, 0);
> +			continue;
> +		}
> 
> 		if (mp_request_one(path, req, reply, &end))
> 			ret = -1;
> -- 
> 2.7.4

Regards,
Keith


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 1/5] eal: add internal flag indicating init has completed
  2018-02-28  2:12     ` Tan, Jianfeng
@ 2018-02-28  9:43       ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28  9:43 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 28-Feb-18 2:12 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 1/5] eal: add internal flag indicating init has completed
>>
>> Currently, primary process initialization is finalized by setting
>> the RTE_MAGIC value in the shared config. However, it is not
>> possible to check whether secondary process initialization has
>> completed. Add such a value to internal config.
> 
> A nit:
> "check whether secondary process initialization has completed" sounds like checking comes from another process.
> Does it look better, "check whether initialization has completed in secondary process"?
> 
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> 
> Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>
> 
> Thanks,
> Jianfeng

You're probably right, i should reword that. Thanks!


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 2/5] eal: don't process IPC messages before init finished
  2018-02-28  1:09     ` Tan, Jianfeng
@ 2018-02-28  9:45       ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28  9:45 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 28-Feb-18 1:09 AM, Tan, Jianfeng wrote:
> 
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 2/5] eal: don't process IPC messages before init finished
>>
>> It is not possible for a primary process to receive any messages
>> while initializing, because RTE_MAGIC value is not set in the
>> shared config, and hence no secondary process can ever spin up
>> during that time.
>>
>> However, it is possible for a secondary process to receive messages
>> from the primary during initialization. We can't just drop the
>> messages as they may be important, and also we might need to process
>> replies to our own requests (e.g. VFIO) during initialization.
>>
>> Therefore, add a tailq for incoming messages, and queue them up
>> until initialization is complete, and process them in order they
>> arrived.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>
>> Notes:
>>      v3: check for init_complete after receiving message
>>
>>      v2: no changes
>>
>>   lib/librte_eal/common/eal_common_proc.c | 52
>> +++++++++++++++++++++++++++++----
>>   1 file changed, 47 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/librte_eal/common/eal_common_proc.c
>> b/lib/librte_eal/common/eal_common_proc.c
>> index 3a1088e..a6e24e6 100644
>> --- a/lib/librte_eal/common/eal_common_proc.c
>> +++ b/lib/librte_eal/common/eal_common_proc.c
>> @@ -25,6 +25,7 @@
>>   #include <rte_errno.h>
>>   #include <rte_lcore.h>
>>   #include <rte_log.h>
>> +#include <rte_tailq.h>
>>
>>   #include "eal_private.h"
>>   #include "eal_filesystem.h"
>> @@ -58,6 +59,18 @@ struct mp_msg_internal {
>>   	struct rte_mp_msg msg;
>>   };
>>
>> +struct message_queue_entry {
>> +	TAILQ_ENTRY(message_queue_entry) next;
>> +	struct mp_msg_internal msg;
>> +	struct sockaddr_un sa;
>> +};
>> +
>> +/** Double linked list of received messages. */
>> +TAILQ_HEAD(message_queue, message_queue_entry);
>> +
>> +static struct message_queue message_queue =
>> +	TAILQ_HEAD_INITIALIZER(message_queue);
>> +
>>   struct sync_request {
>>   	TAILQ_ENTRY(sync_request) next;
>>   	int reply_received;
>> @@ -276,12 +289,41 @@ process_msg(struct mp_msg_internal *m, struct
>> sockaddr_un *s)
>>   static void *
>>   mp_handle(void *arg __rte_unused)
>>   {
>> -	struct mp_msg_internal msg;
>> -	struct sockaddr_un sa;
>> -
>> +	struct message_queue_entry *cur_msg, *next_msg, *new_msg =
>> NULL;
>>   	while (1) {
>> -		if (read_msg(&msg, &sa) == 0)
>> -			process_msg(&msg, &sa);
>> +		/* we want to process all messages in order of their arrival,
>> +		 * but status of init_complete may change while we're
>> iterating
>> +		 * the tailq. so, store it here and check once every iteration.
>> +		 */
>> +		int init_complete;
>> +
>> +		if (new_msg == NULL)
>> +			new_msg = malloc(sizeof(*new_msg));
>> +		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
> 
> Suppose a case that: req and msg received but init not completed, so we enqueue all of them in the tailq; and from now on, no req/rep/msg comes. Then mp thread will hang here for reading new message.
> In such a case, we might need the master thread to signal mp thread to wake up when init is completed?

True. Will have to think about how to do that.

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 2/5] eal: don't process IPC messages before init finished
  2018-02-28  4:00     ` Wiles, Keith
@ 2018-02-28  9:47       ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28  9:47 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, Tan, Jianfeng

On 28-Feb-18 4:00 AM, Wiles, Keith wrote:
> 
> 
>>
>> +	struct message_queue_entry *cur_msg, *next_msg, *new_msg = NULL;
>> 	while (1) {
>> -		if (read_msg(&msg, &sa) == 0)
>> -			process_msg(&msg, &sa);
>> +		/* we want to process all messages in order of their arrival,
>> +		 * but status of init_complete may change while we're iterating
>> +		 * the tailq. so, store it here and check once every iteration.
>> +		 */
>> +		int init_complete;
> 
> Do we allow variables to be defined in the middle of a block, I thought we only allowed them after a ‘{‘ or open block.

Apologies, will fix.

> 
>> +
>> +		if (new_msg == NULL)
>> +			new_msg = malloc(sizeof(*new_msg));
> 
> I am very concerned about allocating memory with no limit. If the process never completes then we could receive messages and consume a lot of memory. I would want to see a limit to the number of messages we can consume in the queue just to be sure.

Sure, will do.

> 
>> +		if (read_msg(&new_msg->msg, &new_msg->sa) == 0) {
>> +			/* we successfully read the message, so enqueue it */
>> +			TAILQ_INSERT_TAIL(&message_queue, new_msg, next);
>> +			new_msg = NULL;
>> +		} /* reuse new_msg for next message if we couldn't read_msg */
>> +
>> +		init_complete = internal_config.init_complete;
> 
> Does the internal_config.init_complete need to be a volatile to make sure it is reread each time thru the loop?

Will fix.

> 
>> +
>> +		/* tailq only accessed here, so no locking needed */
>> +		TAILQ_FOREACH_SAFE(cur_msg, &message_queue, next, next_msg) {
>> +			/* secondary process should not process any incoming
>> +			 * requests until its initialization is complete, but
>> +			 * it is allowed to process replies to its own queries.
>> +			 */
>> +			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
>> +					!init_complete &&
>> +					cur_msg->msg.type != MP_REP)
>> +				continue;
>> +
>> +			TAILQ_REMOVE(&message_queue, cur_msg, next);
>> +
>> +			process_msg(&cur_msg->msg, &cur_msg->sa);
>> +
>> +			free(cur_msg);
>> +		}
>> 	}
>>
>> 	return NULL;
>> -- 
>> 2.7.4
> 
> Regards,
> Keith
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 3/5] eal: use locks to determine if secondary process is active
  2018-02-28  1:26     ` Tan, Jianfeng
@ 2018-02-28 10:15       ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28 10:15 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 28-Feb-18 1:26 AM, Tan, Jianfeng wrote:
> 
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 3/5] eal: use locks to determine if secondary process is
>> active
>>
>> Previously, IPC would remove sockets it considers to be "inactive"
>> based on whether they have responded.
> 
> To be more precise, it was not depending on if the other side responses or not; it was depending on sendmsg return error, ECONNREFUSED.
> 
>> Change this to create lock
>> files in addition to socket files, so that we can determine if
>> secondary process is active before attempting to communicate with
>> it. That way, we can distinguish secondaries that are alive but
>> are not responding, from those that have already died.
> 
> I think, by the old way, we can also "distinguish secondaries that are alive but are not responding, from those that have already died", can't we?
> 
> Thanks,
> Jianfeng
> 

I rechecked, and you're right. For some reason i thought that nb_sent 
gets incremented even if there was ECONNREFUSED error. It doesn't, so 
the effect is the same. I'll drop this patch so (well, i'll keep the 
naming stuff, as it makes things a bit easier).

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 3/5] eal: use locks to determine if secondary process is active
  2018-02-28  4:17     ` Wiles, Keith
@ 2018-02-28 10:17       ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28 10:17 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, Tan, Jianfeng

On 28-Feb-18 4:17 AM, Wiles, Keith wrote:
> 
> 
>> On Feb 27, 2018, at 8:35 AM, Anatoly Burakov <anatoly.burakov@intel.com> wrote:
>>
>> Previously, IPC would remove sockets it considers to be "inactive"
>> based on whether they have responded. Change this to create lock
>> files in addition to socket files, so that we can determine if
>> secondary process is active before attempting to communicate with
>> it. That way, we can distinguish secondaries that are alive but
>> are not responding, from those that have already died.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>>
>> Notes:
>>     v3: no changes
>>
>>     v2: no changes
>>
>> lib/librte_eal/common/eal_common_proc.c | 204 +++++++++++++++++++++++++++-----
>> 1 file changed, 175 insertions(+), 29 deletions(-)
>>
>> diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
>> index a6e24e6..7c87971 100644
>> --- a/lib/librte_eal/common/eal_common_proc.c
>> +++ b/lib/librte_eal/common/eal_common_proc.c
>> @@ -13,6 +13,7 @@
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <string.h>
>> +#include <sys/file.h>
>> #include <sys/time.h>
>> #include <sys/types.h>
>> #include <sys/socket.h>
>> @@ -32,6 +33,7 @@
>> #include "eal_internal_cfg.h"
>>
>> static int mp_fd = -1;
>> +static int lock_fd = -1;
> 
> I did not find where lock_fd is closed in this code, should it be closed at some point?
> 
>> static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
>> static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
>> static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>> @@ -104,6 +106,46 @@ find_sync_request(const char *dst, const char *act_name)
>> 	return r;
>> }
>>
>> +static void
>> +create_socket_path(const char *name, char *buf, int len)
>> +{
>> +	const char *prefix = eal_mp_socket_path();
> 
> I thought we needed a space here after the variables, no?
> 
>> +	if (strlen(name) > 0)
>> +		snprintf(buf, len, "%s_%s", prefix, name);
>> +	else
>> +		snprintf(buf, len, "%s", prefix);
>> +}
>> +
>> +static void
>> +create_lockfile_path(const char *name, char *buf, int len)
>> +{
>> +	const char *prefix = eal_mp_socket_path();
> 
> Same here
>> +	if (strlen(name) > 1)
>> +		snprintf(buf, len, "%slock_%s", prefix, name);
>> +	else
>> +		snprintf(buf, len, "%slock", prefix);
>> +}
>> +
>> +static const char *
>> +get_peer_name(const char *socket_full_path)
>> +{
>> +	char buf[PATH_MAX] = {0};
>> +	int len;
>> +
>> +	/* primary process has no peer name */
>> +	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
>> +		return NULL;
>> +
>> +	/* construct dummy socket file name - make it one character long so that
>> +	 * we hit the code path where underscores are added
>> +	 */
>> +	create_socket_path("a", buf, sizeof(buf));
>> +
>> +	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
>> +	len = strlen(buf) - 1;
>> +	return &socket_full_path[len];
>> +}
>> +
>> int
>> rte_eal_primary_proc_alive(const char *config_file_path)
>> {
>> @@ -332,8 +374,29 @@ mp_handle(void *arg __rte_unused)
>> static int
>> open_socket_fd(void)
>> {
>> +	char peer_name[PATH_MAX] = {0};
>> +	char lockfile[PATH_MAX] = {0};
>> 	struct sockaddr_un un;
>> -	const char *prefix = eal_mp_socket_path();
>> +
>> +	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
>> +		snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64,
>> +			 getpid(), rte_rdtsc());
>> +
>> +	/* try to create lockfile */
>> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
>> +
>> +	lock_fd = open(lockfile, O_CREAT | O_RDWR);
>> +	if (lock_fd < 0) {
>> +		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", lockfile,
>> +			strerror(errno));
>> +		return -1;
>> +	}
>> +	if (flock(lock_fd, LOCK_EX | LOCK_NB)) {
>> +		RTE_LOG(ERR, EAL, "failed to lock '%s': %s\n", lockfile,
>> +			strerror(errno));
> 
> Did not close the lock_fd here.
> 
>> +		return -1;
>> +	}
>> +	/* no need to downgrade to shared lock */
>>
>> 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
>> 	if (mp_fd < 0) {
>> @@ -343,13 +406,11 @@ open_socket_fd(void)
>>
>> 	memset(&un, 0, sizeof(un));
>> 	un.sun_family = AF_UNIX;
>> -	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
>> -	else {
>> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
>> -			 prefix, getpid(), rte_rdtsc());
>> -	}
>> +
>> +	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
>> +
>> 	unlink(un.sun_path); /* May still exist since last run */
>> +
>> 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
>> 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
>> 			un.sun_path, strerror(errno));
>> @@ -361,6 +422,44 @@ open_socket_fd(void)
>> 	return mp_fd;
>> }
>>
>> +/* find corresponding lock file and try to lock it */
>> +static int
>> +socket_is_active(const char *peer_name)
>> +{
>> +	char lockfile[PATH_MAX] = {0};
>> +	int fd, ret = -1;
>> +
>> +	/* construct lockfile filename */
>> +	create_lockfile_path(peer_name, lockfile, sizeof(lockfile));
>> +
>> +	/* try to lock it */
>> +	fd = open(lockfile, O_CREAT | O_RDWR);
>> +	if (fd < 0) {
>> +		RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", lockfile,
>> +			strerror(errno));
>> +		return -1;
>> +	}
>> +	ret = flock(fd, LOCK_EX | LOCK_NB);
>> +	if (ret < 0) {
>> +		if (errno == EWOULDBLOCK) {
>> +			/* file is locked */
>> +			ret = 1;
>> +		} else {
>> +			RTE_LOG(ERR, EAL, "Cannot lock '%s': %s\n", lockfile,
>> +				strerror(errno));
>> +			ret = -1;
>> +		}
>> +	} else {
>> +		ret = 0;
>> +		/* unlink lockfile automatically */
>> +		unlink(lockfile);
>> +		flock(fd, LOCK_UN);
>> +	}
>> +	close(fd);
>> +
>> +	return ret;
>> +}
>> +
>> static int
>> unlink_sockets(const char *filter)
>> {
>> @@ -376,28 +475,33 @@ unlink_sockets(const char *filter)
>> 	dir_fd = dirfd(mp_dir);
>>
>> 	while ((ent = readdir(mp_dir))) {
>> -		if (fnmatch(filter, ent->d_name, 0) == 0)
>> +		if (fnmatch(filter, ent->d_name, 0) == 0) {
>> +			const char *peer_name;
>> +			char path[PATH_MAX];
>> +			int ret;
>> +
>> +			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
>> +				 ent->d_name);
>> +			peer_name = get_peer_name(path);
>> +
>> +			ret = socket_is_active(peer_name);
>> +			if (ret < 0) {
>> +				RTE_LOG(ERR, EAL, "Error getting socket active status\n”);
> 
> No close for mp_dir?
>> +				return -1;
>> +			} else if (ret == 1) {
>> +				RTE_LOG(ERR, EAL, "Socket is active (old secondary process still running?)\n”);
> 
> And here
>> +				return -1;
>> +			}
>> +			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
>> +					ent->d_name);
>> 			unlinkat(dir_fd, ent->d_name, 0);
> 
> Does unlinkat() close dir_fd?
> 
>> +		}
>> 	}
>>
>> 	closedir(mp_dir);
>> 	return 0;
>> }
>>
>> -static void
>> -unlink_socket_by_path(const char *path)
>> -{
>> -	char *filename;
>> -	char *fullpath = strdup(path);
>> -
>> -	if (!fullpath)
>> -		return;
>> -	filename = basename(fullpath);
>> -	unlink_sockets(filename);
>> -	free(fullpath);
>> -	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
>> -}
>> -
>> int
>> rte_mp_channel_init(void)
>> {
>> @@ -487,10 +591,25 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
>> 		rte_errno = errno;
>> 		/* Check if it caused by peer process exits */
>> 		if (errno == ECONNREFUSED) {
>> -			/* We don't unlink the primary's socket here */
>> -			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> -				unlink_socket_by_path(dst_path);
>> -			return 0;
>> +			const char *peer_name = get_peer_name(dst_path);
>> +			int active, ret = 0;
> 
> Here we have variables in the middle of a block again.
> 
>> +
>> +			active = rte_eal_process_type() == RTE_PROC_PRIMARY ?
>> +					socket_is_active(peer_name) :
>> +					rte_eal_primary_proc_alive(NULL);
>> +
>> +			if (active > 0) {
>> +				RTE_LOG(ERR, EAL, "Couldn't communicate with active peer\n");
>> +			} else if (active < 0) {
>> +				RTE_LOG(ERR, EAL, "Couldn't get peer status\n");
>> +				ret = -1;
>> +			} else if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>> +				/* peer isn't active anymore, so unlink its
>> +				 * socket.
>> +				 */
>> +				unlink(dst_path);
>> +			}
>> +			return ret;
>> 		}
>> 		if (errno == ENOBUFS) {
>> 			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
>> @@ -508,7 +627,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
>> static int
>> mp_send(struct rte_mp_msg *msg, const char *peer, int type)
>> {
>> -	int ret = 0;
>> +	int dir_fd, ret = 0;
>> 	DIR *mp_dir;
>> 	struct dirent *ent;
>>
>> @@ -530,15 +649,28 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
>> 		rte_errno = errno;
>> 		return -1;
>> 	}
>> +	dir_fd = dirfd(mp_dir);
> 
> when is dir_fd closed?
> 
>> 	while ((ent = readdir(mp_dir))) {
>> 		char path[PATH_MAX];
>> +		const char *peer_name;
>> +		int active;
>>
>> 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
>> 			continue;
>>
>> 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
>> 			 ent->d_name);
>> -		if (send_msg(path, msg, type) < 0)
>> +		peer_name = get_peer_name(path);
>> +
>> +		/* only send if we can expect to receive a reply, otherwise
>> +		 * remove the socket.
>> +		 */
>> +		active = socket_is_active(peer_name);
>> +		if (active < 0)
>> +			ret = -1;
>> +		else if (active == 0)
>> +			unlinkat(dir_fd, ent->d_name, 0);
>> +		else if (active > 0 && send_msg(path, msg, type) < 0)
>> 			ret = -1;
>> 	}
>>
>> @@ -663,7 +795,7 @@ int __rte_experimental
>> rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
>> 		const struct timespec *ts)
>> {
>> -	int ret = 0;
>> +	int dir_fd, ret = 0;
>> 	DIR *mp_dir;
>> 	struct dirent *ent;
>> 	struct timeval now;
>> @@ -698,15 +830,29 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
>> 		rte_errno = errno;
>> 		return -1;
>> 	}
>> +	dir_fd = dirfd(mp_dir);
> 
> When is dir_fd closed?
> 
>>
>> 	while ((ent = readdir(mp_dir))) {
>> +		const char *peer_name;
>> 		char path[PATH_MAX];
>> +		int active;
>>
>> 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
>> 			continue;
>>
>> 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
>> 			 ent->d_name);
>> +		peer_name = get_peer_name(path);
>> +
>> +		active = socket_is_active(peer_name);
>> +
>> +		if (active < 0) {
>> +			ret = -1;
>> +			break;
>> +		} else if (active == 0) {
>> +			unlinkat(dir_fd, ent->d_name, 0);
>> +			continue;
>> +		}
>>
>> 		if (mp_request_one(path, req, reply, &end))
>> 			ret = -1;
>> -- 
>> 2.7.4
> 
> Regards,
> Keith
> 

Thanks for your comments, but as per Jianfeng's comments, most of this 
patch will be dropped.

Some commends though: dir_fd will be closed automatically because we 
call closedir() on the mp_dir, as per closedir manpage [1]. The lock_fd 
wasn't closed because the process was meant to hold onto the lock for 
the lifetime of the process, but since this "feature" will be dropped, 
it doesn't matter.

I'll make sure we close all handles everywhere for v4.

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 4/5] eal: prevent secondary process init while sending messages
  2018-02-28  1:58     ` Tan, Jianfeng
@ 2018-02-28 10:19       ` Burakov, Anatoly
  2018-02-28 15:49         ` Tan, Jianfeng
  0 siblings, 1 reply; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28 10:19 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while sending
>> messages
>>
>> Currently, it is possible to spin up a secondary process while
>> either sendmsg or request is in progress. Fix this by adding
>> directory locks during init, sendmsg and requests.
> 
> Could you give a more detailed example for this issue?
> 
> And why locking the directory can help?
> 
> Thanks,
> Jianfeng
> 

Consider this. You start a request. Since sending this out takes 
non-zero amount of time, and you're waiting for process to reply each 
time you send a message, there's a non-zero chance where contents of 
/var/run may change and another socket file may appear that wasn't there 
when we started sending out those messages.

This is simply making sending requests atomic, if you will. Honestly, i 
can't think of a situation where this might be a problem, but it just 
doesn't feel right, so i fixed it :)

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
  2018-02-28  1:52     ` Tan, Jianfeng
@ 2018-02-28 10:21       ` Burakov, Anatoly
  2018-02-28 15:01         ` Tan, Jianfeng
  0 siblings, 1 reply; 57+ messages in thread
From: Burakov, Anatoly @ 2018-02-28 10:21 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 28-Feb-18 1:52 AM, Tan, Jianfeng wrote:
> Hi Anatoly,
> 
>> -----Original Message-----
>> From: Burakov, Anatoly
>> Sent: Tuesday, February 27, 2018 10:36 PM
>> To: dev@dpdk.org
>> Cc: Tan, Jianfeng
>> Subject: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
>>
>> Currently, filter value is hardcoded and disconnected from actual
>> value returned by eal_mp_socket_path().
> 
> I can understand the hardcode is not good. But why it's not consistent with the actual value returned from eal_mp_socket_path()?

It is consistent. It's just that it's disconnected from the value 
returned by eal_mp_socket_path(). Meaning, if you change how 
eal_mp_socket_path() works, you'll also have to update the filter (or 
you may forget to do it and have a bug). This patch makes it so that 
mp_filter value is automatically updated, should you change internal 
workings of eal_mp_socket_path().

> 
>> Fix this to generate filter
>> value by deriving it from eal_mp_socket_path() instead.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> 
> Anyway, I think your way looks good to me, so
> 
> Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
> 
> Thanks,
> Jianfeng
> 

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
  2018-02-28 10:21       ` Burakov, Anatoly
@ 2018-02-28 15:01         ` Tan, Jianfeng
  0 siblings, 0 replies; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28 15:01 UTC (permalink / raw)
  To: Burakov, Anatoly, dev



On 2/28/2018 6:21 PM, Burakov, Anatoly wrote:
> On 28-Feb-18 1:52 AM, Tan, Jianfeng wrote:
>> Hi Anatoly,
>>
>>> -----Original Message-----
>>> From: Burakov, Anatoly
>>> Sent: Tuesday, February 27, 2018 10:36 PM
>>> To: dev@dpdk.org
>>> Cc: Tan, Jianfeng
>>> Subject: [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC
>>>
>>> Currently, filter value is hardcoded and disconnected from actual
>>> value returned by eal_mp_socket_path().
>>
>> I can understand the hardcode is not good. But why it's not 
>> consistent with the actual value returned from eal_mp_socket_path()?
>
> It is consistent. It's just that it's disconnected from the value 
> returned by eal_mp_socket_path(). Meaning, if you change how 
> eal_mp_socket_path() works, you'll also have to update the filter (or 
> you may forget to do it and have a bug). This patch makes it so that 
> mp_filter value is automatically updated, should you change internal 
> workings of eal_mp_socket_path().

Yeah, that makes sense. Thank you for fixing it.

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 4/5] eal: prevent secondary process init while sending messages
  2018-02-28 10:19       ` Burakov, Anatoly
@ 2018-02-28 15:49         ` Tan, Jianfeng
  0 siblings, 0 replies; 57+ messages in thread
From: Tan, Jianfeng @ 2018-02-28 15:49 UTC (permalink / raw)
  To: Burakov, Anatoly, dev



On 2/28/2018 6:19 PM, Burakov, Anatoly wrote:
> On 28-Feb-18 1:58 AM, Tan, Jianfeng wrote:
>> Hi Anatoly,
>>
>>> -----Original Message-----
>>> From: Burakov, Anatoly
>>> Sent: Tuesday, February 27, 2018 10:36 PM
>>> To: dev@dpdk.org
>>> Cc: Tan, Jianfeng
>>> Subject: [PATCH v3 4/5] eal: prevent secondary process init while 
>>> sending
>>> messages
>>>
>>> Currently, it is possible to spin up a secondary process while
>>> either sendmsg or request is in progress. Fix this by adding
>>> directory locks during init, sendmsg and requests.
>>
>> Could you give a more detailed example for this issue?
>>
>> And why locking the directory can help?
>>
>> Thanks,
>> Jianfeng
>>
>
> Consider this. You start a request. Since sending this out takes 
> non-zero amount of time, and you're waiting for process to reply each 
> time you send a message, there's a non-zero chance where contents of 
> /var/run may change and another socket file may appear that wasn't 
> there when we started sending out those messages.

OK, I see the issue now. When primary broadcasts a request and another 
secondary joins, then if that request will be delivered to the new 
secondary, it's an undefined behavior.

>
> This is simply making sending requests atomic, if you will. Honestly, 
> i can't think of a situation where this might be a problem, but it 
> just doesn't feel right, so i fixed it :)
>

The way seems a little overkill to me. But I did not find a better way :-)

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 1/5] eal: add internal flag indicating init has completed
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
@ 2018-03-02 15:14   ` Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
                       ` (6 more replies)
  2018-03-02 15:14   ` [PATCH v4 2/5] eal: use file to check if secondary process is ready Anatoly Burakov
                     ` (3 subsequent siblings)
  4 siblings, 7 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-02 15:14 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: make init_complete volatile
    
    This patch is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/
    
    v4: make init_complete volatile
    
    v3: no changes
    
    v2: no changes
    
    This patch is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/

 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..a0082d1 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	volatile unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 2/5] eal: use file to check if secondary process is ready
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-03-02 15:14   ` Anatoly Burakov
  2018-03-06 11:03     ` Burakov, Anatoly
  2018-03-02 15:14   ` [PATCH v4 3/5] eal: prevent secondary process init while sending messages Anatoly Burakov
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-02 15:14 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, jianfeng.tan, keith.wiles

Previously, IPC would remove sockets it considers to be "inactive"
based on whether they have responded. We also need to prevent
sending messages to processes that are active, but haven't yet
finished initialization.

This will create a "init file" per socket which will be removed
after initialization is complete, to prevent primary process from
sending messages to a process that hasn't finished its
initialization.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: changed from "don't process messages until init
        complete" to "don't send messages to processes
        which haven't finished initializing", as the
        former would have resulted in timeouts if init
        took too long to complete.
    
    v4: rework to not send any messages to secondary processes that
        haven't yet initialized, so no need for message queue
    
    v3: no changes
    
    v2: no changes

 lib/librte_eal/bsdapp/eal/eal.c         |   7 ++
 lib/librte_eal/common/eal_common_proc.c | 152 +++++++++++++++++++++++++++-----
 lib/librte_eal/common/eal_private.h     |  10 ++-
 lib/librte_eal/linuxapp/eal/eal.c       |   7 ++
 4 files changed, 151 insertions(+), 25 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 4eafcb5..c003c16 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -694,6 +694,13 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	ret = rte_mp_channel_set_ready();
+	if (ret < 0) {
+		rte_eal_init_alert("Cannot finalize mp channel init\n");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
 	rte_eal_mcfg_complete();
 
 	return fctret;
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index da7930f..4d227fe 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -33,6 +34,7 @@
 static int mp_fd = -1;
 static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
 static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
+static char process_peer_name[PATH_MAX] = {0};  /* this process's peer name */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
 
 struct action_entry {
@@ -91,6 +93,48 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
+static void
+create_initfile_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 1)
+		snprintf(buf, len, "%sinit_%s", prefix, name);
+	else
+		snprintf(buf, len, "%sinit", prefix);
+}
+
+static const char *
+get_peer_name(const char *socket_full_path)
+{
+	char buf[PATH_MAX] = {0};
+	int len;
+
+	/* primary process has no peer name */
+	if (strcmp(socket_full_path, eal_mp_socket_path()) == 0)
+		return NULL;
+
+	/* construct dummy socket file name - make it one character long so that
+	 * we hit the code path where underscores are added
+	 */
+	create_socket_path("a", buf, sizeof(buf));
+
+	/* we want to get everything after /path/.rte_unix_, so discard 'a' */
+	len = strlen(buf) - 1;
+	return &socket_full_path[len];
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -290,8 +334,23 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char initfile[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+	int init_fd;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(process_peer_name, sizeof(process_peer_name),
+				"%d_%"PRIx64, getpid(), rte_rdtsc());
+
+	/* try to create initfile */
+	create_initfile_path(process_peer_name, initfile, sizeof(initfile));
+
+	init_fd = open(initfile, O_CREAT | O_RDWR);
+	if (init_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open '%s': %s\n", initfile,
+			strerror(errno));
+		return -1;
+	}
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -301,13 +360,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(process_peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -319,6 +376,31 @@ open_socket_fd(void)
 	return mp_fd;
 }
 
+/* find corresponding init file */
+static int
+socket_is_ready(const char *peer_name)
+{
+	char initfile[PATH_MAX] = {0};
+	int fd;
+
+	/* construct lockfile filename */
+	create_initfile_path(peer_name, initfile, sizeof(initfile));
+
+	fd = open(initfile, O_RDWR);
+	if (fd >= 0) {
+		/* init file still exists, socket is not ready */
+		close(fd);
+		return 0;
+	}
+	if (fd < 0 && errno == ENOENT) {
+		/* init file not found, socket is ready */
+		return 1;
+	}
+	RTE_LOG(ERR, EAL, "Cannot open '%s': %s\n", initfile,
+		strerror(errno));
+	return -1;
+}
+
 static int
 unlink_sockets(const char *filter)
 {
@@ -334,26 +416,30 @@ unlink_sockets(const char *filter)
 	dir_fd = dirfd(mp_dir);
 
 	while ((ent = readdir(mp_dir))) {
-		if (fnmatch(filter, ent->d_name, 0) == 0)
+		if (fnmatch(filter, ent->d_name, 0) == 0) {
+			char path[PATH_MAX];
+
+			snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+				 ent->d_name);
+
+			RTE_LOG(DEBUG, EAL, "Removing stale socket file '%s'\n",
+					ent->d_name);
 			unlinkat(dir_fd, ent->d_name, 0);
+		}
 	}
 
 	closedir(mp_dir);
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
+int
+rte_mp_channel_set_ready(void)
 {
-	char *filename;
-	char *fullpath = strdup(path);
+	char path[PATH_MAX] = {0};
 
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
+	create_initfile_path(process_peer_name, path, PATH_MAX);
+
+	return unlink(path);
 }
 
 int
@@ -444,10 +530,9 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 	if (snd < 0) {
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
-		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
 			return 0;
 		}
 		if (errno == ENOBUFS) {
@@ -490,14 +575,22 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 	}
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
+		const char *peer_name;
+		int ready;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
-		if (send_msg(path, msg, type) < 0)
+		peer_name = get_peer_name(path);
+
+		/* only send if we can expect to receive a reply */
+		ready = socket_is_ready(peer_name);
+		if (ready < 0)
 			ret = -1;
+		else if (ready > 0)
+			ret = send_msg(path, msg, type);
 	}
 
 	closedir(mp_dir);
@@ -655,15 +748,26 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		rte_errno = errno;
 		return -1;
 	}
-
 	while ((ent = readdir(mp_dir))) {
+		const char *peer_name;
 		char path[PATH_MAX];
+		int ready;
 
 		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
 			continue;
 
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
+		peer_name = get_peer_name(path);
+
+		ready = socket_is_ready(peer_name);
+
+		if (ready < 0) {
+			ret = -1;
+			continue;
+		} else if (ready == 0) {
+			continue;
+		}
 
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0b28770..a3535a0 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -202,7 +202,15 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
  *   0 on success;
  *   (<0) on failure.
  */
-
 int rte_mp_channel_init(void);
 
+/**
+ * Set unix channel for primary/secondary communication as ready.
+ *
+ * @return
+ *   0 on success;
+ *   (<0) on failure.
+ */
+int rte_mp_channel_set_ready(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 2ecd07b..a9af9c6 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -961,6 +961,13 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	ret = rte_mp_channel_set_ready();
+	if (ret < 0) {
+		rte_eal_init_alert("Cannot finalize mp channel init\n");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
 	rte_eal_mcfg_complete();
 
 	return fctret;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 3/5] eal: prevent secondary process init while sending messages
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 2/5] eal: use file to check if secondary process is ready Anatoly Burakov
@ 2018-03-02 15:14   ` Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 4/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 5/5] eal: simplify IPC sync request timeout code Anatoly Burakov
  4 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-02 15:14 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles

Currently, it is possible to spin up a secondary process while
either sendmsg or request is in progress. Fix this by adding
directory locks during init, sendmsg and requests.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: fixed resource leaks and added support for init files
        introduced in v4 series
    
    v4: fix resource leaks, also lock when removing init file
    
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 81 +++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 4d227fe..94672ba 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -436,10 +436,30 @@ int
 rte_mp_channel_set_ready(void)
 {
 	char path[PATH_MAX] = {0};
+	int dir_fd, ret;
+
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
 
 	create_initfile_path(process_peer_name, path, PATH_MAX);
 
-	return unlink(path);
+	ret = unlink(path);
+
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
+	return ret;
 }
 
 int
@@ -447,6 +467,7 @@ rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char *path;
+	int dir_fd;
 	pthread_t tid;
 
 	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
@@ -456,19 +477,38 @@ rte_mp_channel_init(void)
 	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
 	free(path);
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 	    unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
 			strerror(errno));
 		close(mp_fd);
+		close(dir_fd);
 		mp_fd = -1;
 		return -1;
 	}
@@ -476,6 +516,11 @@ rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -551,7 +596,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -573,6 +618,17 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 		const char *peer_name;
@@ -592,7 +648,10 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		else if (ready > 0)
 			ret = send_msg(path, msg, type);
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
@@ -713,7 +772,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -748,6 +807,17 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		rte_errno = errno;
 		return -1;
 	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		const char *peer_name;
 		char path[PATH_MAX];
@@ -772,7 +842,10 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 4/5] eal: don't hardcode socket filter value in IPC
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
                     ` (2 preceding siblings ...)
  2018-03-02 15:14   ` [PATCH v4 3/5] eal: prevent secondary process init while sending messages Anatoly Burakov
@ 2018-03-02 15:14   ` Anatoly Burakov
  2018-03-02 15:14   ` [PATCH v4 5/5] eal: simplify IPC sync request timeout code Anatoly Burakov
  4 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-02 15:14 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles

Currently, filter value is hardcoded and disconnected from actual
value returned by eal_mp_socket_path(). Fix this to generate filter
value by deriving it from eal_mp_socket_path() instead.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v4: added filtering for init files as well
    
    v3: no changes
    
    v2: no changes

 lib/librte_eal/common/eal_common_proc.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 94672ba..f382184 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -402,7 +402,7 @@ socket_is_ready(const char *peer_name)
 }
 
 static int
-unlink_sockets(const char *filter)
+unlink_files(const char *filter)
 {
 	int dir_fd;
 	DIR *mp_dir;
@@ -466,16 +466,22 @@ int
 rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
+	char path[PATH_MAX];
+	char init_filter[PATH_MAX];
 	int dir_fd;
 	pthread_t tid;
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
+
+	/* also, create init file filter */
+	create_initfile_path("*", path, sizeof(path));
+	snprintf(init_filter, sizeof(init_filter), "%s", basename(path));
 
 	/* lock the directory */
 	dir_fd = open(mp_dir_path, O_RDONLY);
@@ -493,7 +499,8 @@ rte_mp_channel_init(void)
 	}
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
-	    unlink_sockets(mp_filter)) {
+			(unlink_files(mp_filter) ||
+			 unlink_files(init_filter))) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
 		close(dir_fd);
 		return -1;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 5/5] eal: simplify IPC sync request timeout code
  2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
                     ` (3 preceding siblings ...)
  2018-03-02 15:14   ` [PATCH v4 4/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
@ 2018-03-02 15:14   ` Anatoly Burakov
  4 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-02 15:14 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_proc.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index f382184..666c566 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -706,7 +706,6 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
-	struct timeval now;
 	struct rte_mp_msg msg, *tmp;
 	struct sync_request sync_req, *exist;
 
@@ -738,19 +737,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	reply->nb_sent++;
 
 	do {
-		pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts);
-		/* Check spurious wakeups */
-		if (sync_req.reply_received == 1)
-			break;
-		/* Check if time is out */
-		if (gettimeofday(&now, NULL) < 0)
-			break;
-		if (ts->tv_sec < now.tv_sec)
-			break;
-		else if (now.tv_sec == ts->tv_sec &&
-			 now.tv_usec * 1000 < ts->tv_nsec)
-			break;
-	} while (1);
+		ret = pthread_cond_timedwait(&sync_req.cond,
+				&sync_requests.lock, ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
 	/* We got the lock now */
 	TAILQ_REMOVE(&sync_requests.requests, &sync_req, next);
 	pthread_mutex_unlock(&sync_requests.lock);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH v4 2/5] eal: use file to check if secondary process is ready
  2018-03-02 15:14   ` [PATCH v4 2/5] eal: use file to check if secondary process is ready Anatoly Burakov
@ 2018-03-06 11:03     ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-03-06 11:03 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, jianfeng.tan, keith.wiles

On 02-Mar-18 3:14 PM, Anatoly Burakov wrote:
> Previously, IPC would remove sockets it considers to be "inactive"
> based on whether they have responded. We also need to prevent
> sending messages to processes that are active, but haven't yet
> finished initialization.
> 
> This will create a "init file" per socket which will be removed
> after initialization is complete, to prevent primary process from
> sending messages to a process that hasn't finished its
> initialization.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---

Self-NACK on this patch. Secondary processes may initialize data 
structures, which means IPC has to be active during init. Each subsystem 
will therefore have to synchronize access to IPC on their own. (For 
example, memory hotplug will only block IPC for a short period between 
rte_config_init() and init of memory/heap init)


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 0/6] Improvements for DPDK IPC
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 " Anatoly Burakov
                         ` (6 more replies)
  2018-03-07 16:56     ` [PATCH v5 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
                       ` (5 subsequent siblings)
  6 siblings, 7 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

This is an assortment of loosely related improvements to IPC, mostly
related to handling corner cases and avoiding race conditions.

Main addition is an attempt to avoid undefined behavior when receiving
messages while secondary process is initializing. It is assumed that
once callback is registered, it is safe to receive messages.

If the callback wasn't registered, then there are two choices - either
we haven't reached the stage where we register this callback (init is
not finished), or user has forgotten to register callback for this
message. The latter can only be known once initialization is complete,
so until init is complete, treat this process as not-existing if there
is no registered callback for the message. This will handle both
scenarios.

v5: - added cover-letter :)
    - drop the "don't send messages to processes which haven't finished
      initializing" model added in previous version. instead, allow
      everyone to receive all messages, but check if initialization is
      completed, and check if there is a callback registered for this
      message. if there is no callback, assume we just didn't get around
      to it yet, so just send a special message to the requestor that
      it should treat this process as if it wasn't there.
v4: - make init_complete volatile
    - changed from "don't process messages until init complete" to
      "don't send messages to processes which haven't finished
      initializing", as the former would have resulted in timeouts if
      init took too long to complete
    - fixed resource leaks
    - added patch to simplify IPC timeouts handling
v3: - move init_complete until after receiving message
v2: - added patch to prevent IPC from sending messages while primary
      is initializing
    - added patch to generate filter from eal_mp_socket_path() instead
      of hardcoding the value

Anatoly Burakov (6):
  eal: add internal flag indicating init has completed
  eal: abstract away IPC socket path generation
  eal: don't hardcode socket filter value in IPC
  eal: lock IPC directory on init and send
  eal: simplify IPC sync request timeout code
  eal: ignore messages until init is complete

 lib/librte_eal/common/eal_common_options.c |   1 +
 lib/librte_eal/common/eal_common_proc.c    | 382 +++++++++++++++++------------
 lib/librte_eal/common/eal_internal_cfg.h   |   2 +
 lib/librte_eal/linuxapp/eal/eal.c          |   2 +
 4 files changed, 228 insertions(+), 159 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 1/6] eal: add internal flag indicating init has completed
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
                       ` (4 subsequent siblings)
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: make init_complete volatile
    
    This patchset is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/

 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..a0082d1 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	volatile unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v5 2/6] eal: abstract away IPC socket path generation
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-11  9:02       ` Tan, Jianfeng
  2018-03-07 16:56     ` [PATCH v5 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
                       ` (3 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v5: remove lock files, leaving only socket paths code
    
    v4: replace lock files with init files

 lib/librte_eal/common/eal_common_proc.c | 48 ++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index da7930f..1aab3ac 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -91,6 +91,17 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -290,8 +301,12 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name),
+				"%d_%"PRIx64, getpid(), rte_rdtsc());
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -301,13 +316,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -342,20 +355,6 @@ unlink_sockets(const char *filter)
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
@@ -444,10 +443,9 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 	if (snd < 0) {
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
-		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
 			return 0;
 		}
 		if (errno == ENOBUFS) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v5 3/6] eal: don't hardcode socket filter value in IPC
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
                       ` (2 preceding siblings ...)
  2018-03-07 16:56     ` [PATCH v5 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 4/6] eal: lock IPC directory on init and send Anatoly Burakov
                       ` (2 subsequent siblings)
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Currently, filter value is hardcoded and disconnected from actual
value returned by eal_mp_socket_path(). Fix this to generate filter
value by deriving it from eal_mp_socket_path() instead.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v5: removed init files
    
    v4: added filtering for init files as well

 lib/librte_eal/common/eal_common_proc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 1aab3ac..9587211 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -359,18 +359,19 @@ int
 rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
+	char path[PATH_MAX];
 	pthread_t tid;
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
-	    unlink_sockets(mp_filter)) {
+			unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
 		return -1;
 	}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v5 4/6] eal: lock IPC directory on init and send
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
                       ` (3 preceding siblings ...)
  2018-03-07 16:56     ` [PATCH v5 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-11  9:14       ` Tan, Jianfeng
  2018-03-07 16:56     ` [PATCH v5 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
  2018-03-07 16:56     ` [PATCH v5 6/6] eal: ignore messages until init is complete Anatoly Burakov
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

When sending IPC messages, prevent new sockets from initializing.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v5: removed init files introduced in v4
    
    v4: fixed resource leaks and added support for init files
        introduced in v4 series

 lib/librte_eal/common/eal_common_proc.c | 59 +++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 9587211..c6fef75 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -360,6 +361,7 @@ rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char path[PATH_MAX];
+	int dir_fd;
 	pthread_t tid;
 
 	/* create filter path */
@@ -370,19 +372,38 @@ rte_mp_channel_init(void)
 	create_socket_path("*", path, sizeof(path));
 	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
 			strerror(errno));
 		close(mp_fd);
+		close(dir_fd);
 		mp_fd = -1;
 		return -1;
 	}
@@ -390,6 +411,11 @@ rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -465,7 +491,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -487,6 +513,17 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 
@@ -498,7 +535,10 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		if (send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
@@ -619,7 +659,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -655,6 +695,16 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 
@@ -667,7 +717,10 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v5 5/6] eal: simplify IPC sync request timeout code
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
                       ` (4 preceding siblings ...)
  2018-03-07 16:56     ` [PATCH v5 4/6] eal: lock IPC directory on init and send Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-11  9:25       ` Tan, Jianfeng
  2018-03-07 16:56     ` [PATCH v5 6/6] eal: ignore messages until init is complete Anatoly Burakov
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: add this patch

 lib/librte_eal/common/eal_common_proc.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index c6fef75..fe27d68 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -586,7 +586,6 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
-	struct timeval now;
 	struct rte_mp_msg msg, *tmp;
 	struct sync_request sync_req, *exist;
 
@@ -618,19 +617,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	reply->nb_sent++;
 
 	do {
-		pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts);
-		/* Check spurious wakeups */
-		if (sync_req.reply_received == 1)
-			break;
-		/* Check if time is out */
-		if (gettimeofday(&now, NULL) < 0)
-			break;
-		if (ts->tv_sec < now.tv_sec)
-			break;
-		else if (now.tv_sec == ts->tv_sec &&
-			 now.tv_usec * 1000 < ts->tv_nsec)
-			break;
-	} while (1);
+		ret = pthread_cond_timedwait(&sync_req.cond,
+				&sync_requests.lock, ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
 	/* We got the lock now */
 	TAILQ_REMOVE(&sync_requests.requests, &sync_req, next);
 	pthread_mutex_unlock(&sync_requests.lock);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v5 6/6] eal: ignore messages until init is complete
  2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
                       ` (5 preceding siblings ...)
  2018-03-07 16:56     ` [PATCH v5 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
@ 2018-03-07 16:56     ` Anatoly Burakov
  2018-03-12  1:42       ` Tan, Jianfeng
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-07 16:56 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

If we receive messages that don't have a callback registered for
them, and we haven't finished initialization yet, it can be reasonably
inferred that we shouldn't have gotten the message in the first
place. Therefore, send requester a special message telling them to
ignore response to this request, as if this process wasn't there.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v5: add this patch
    
    No changes in mp_send and send_msg - just code move.

 lib/librte_eal/common/eal_common_proc.c | 280 +++++++++++++++++---------------
 1 file changed, 151 insertions(+), 129 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index fe27d68..1ea6045 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -52,6 +52,7 @@ enum mp_type {
 	MP_MSG, /* Share message with peers, will not block */
 	MP_REQ, /* Request for information, Will block for a reply */
 	MP_REP, /* Response to previously-received request */
+	MP_IGN, /* Response telling requester to ignore this response */
 };
 
 struct mp_msg_internal {
@@ -205,6 +206,130 @@ rte_mp_action_unregister(const char *name)
 	free(entry);
 }
 
+/**
+ * Return -1, as fail to send message and it's caused by the local side.
+ * Return 0, as fail to send message and it's caused by the remote side.
+ * Return 1, as succeed to send message.
+ *
+ */
+static int
+send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
+{
+	int snd;
+	struct iovec iov;
+	struct msghdr msgh;
+	struct cmsghdr *cmsg;
+	struct sockaddr_un dst;
+	struct mp_msg_internal m;
+	int fd_size = msg->num_fds * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+
+	m.type = type;
+	memcpy(&m.msg, msg, sizeof(*msg));
+
+	memset(&dst, 0, sizeof(dst));
+	dst.sun_family = AF_UNIX;
+	snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
+
+	memset(&msgh, 0, sizeof(msgh));
+	memset(control, 0, sizeof(control));
+
+	iov.iov_base = &m;
+	iov.iov_len = sizeof(m) - sizeof(msg->fds);
+
+	msgh.msg_name = &dst;
+	msgh.msg_namelen = sizeof(dst);
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	cmsg = CMSG_FIRSTHDR(&msgh);
+	cmsg->cmsg_len = CMSG_LEN(fd_size);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
+
+	do {
+		snd = sendmsg(mp_fd, &msgh, 0);
+	} while (snd < 0 && errno == EINTR);
+
+	if (snd < 0) {
+		rte_errno = errno;
+		/* Check if it caused by peer process exits */
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
+			return 0;
+		}
+		if (errno == ENOBUFS) {
+			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
+				dst_path);
+			return 0;
+		}
+		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
+			dst_path, strerror(errno));
+		return -1;
+	}
+
+	return 1;
+}
+
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type)
+{
+	int dir_fd, ret = 0;
+	DIR *mp_dir;
+	struct dirent *ent;
+
+	if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
+		peer = eal_mp_socket_path();
+
+	if (peer) {
+		if (send_msg(peer, msg, type) < 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	/* broadcast to all secondary processes */
+	mp_dir = opendir(mp_dir_path);
+	if (!mp_dir) {
+		RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
+				mp_dir_path);
+		rte_errno = errno;
+		return -1;
+	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
+	while ((ent = readdir(mp_dir))) {
+		char path[PATH_MAX];
+
+		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+			 ent->d_name);
+		if (send_msg(path, msg, type) < 0)
+			ret = -1;
+	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
+
+	/* dir_fd automatically closed on closedir */
+	closedir(mp_dir);
+	return ret;
+}
+
 static int
 read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 {
@@ -260,12 +385,13 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 
 	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
 
-	if (m->type == MP_REP) {
+	if (m->type == MP_REP || m->type == MP_IGN) {
 		pthread_mutex_lock(&sync_requests.lock);
 		sync_req = find_sync_request(s->sun_path, msg->name);
 		if (sync_req) {
 			memcpy(sync_req->reply, msg, sizeof(*msg));
-			sync_req->reply_received = 1;
+			/* -1 indicates that we've been asked to ignore */
+			sync_req->reply_received = m->type == MP_REP ? 1 : -1;
 			pthread_cond_signal(&sync_req->cond);
 		} else
 			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
@@ -279,10 +405,22 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 		action = entry->action;
 	pthread_mutex_unlock(&mp_mutex_action);
 
-	if (!action)
-		RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name);
-	else if (action(msg, s->sun_path) < 0)
+	if (!action) {
+		if (m->type == MP_REQ && !internal_config.init_complete) {
+			/* if this is a request, and init is not yet complete,
+			 * and callback wasn't registered, we should tell the
+			 * requester to ignore our existence because we're not
+			 * yet ready to process this request.
+			 */
+			struct rte_mp_msg dummy = {0};
+			mp_send(&dummy, s->sun_path, MP_IGN);
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+				msg->name);
+		}
+	} else if (action(msg, s->sun_path) < 0) {
 		RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+	}
 }
 
 static void *
@@ -419,130 +557,6 @@ rte_mp_channel_init(void)
 	return 0;
 }
 
-/**
- * Return -1, as fail to send message and it's caused by the local side.
- * Return 0, as fail to send message and it's caused by the remote side.
- * Return 1, as succeed to send message.
- *
- */
-static int
-send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
-{
-	int snd;
-	struct iovec iov;
-	struct msghdr msgh;
-	struct cmsghdr *cmsg;
-	struct sockaddr_un dst;
-	struct mp_msg_internal m;
-	int fd_size = msg->num_fds * sizeof(int);
-	char control[CMSG_SPACE(fd_size)];
-
-	m.type = type;
-	memcpy(&m.msg, msg, sizeof(*msg));
-
-	memset(&dst, 0, sizeof(dst));
-	dst.sun_family = AF_UNIX;
-	snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
-
-	memset(&msgh, 0, sizeof(msgh));
-	memset(control, 0, sizeof(control));
-
-	iov.iov_base = &m;
-	iov.iov_len = sizeof(m) - sizeof(msg->fds);
-
-	msgh.msg_name = &dst;
-	msgh.msg_namelen = sizeof(dst);
-	msgh.msg_iov = &iov;
-	msgh.msg_iovlen = 1;
-	msgh.msg_control = control;
-	msgh.msg_controllen = sizeof(control);
-
-	cmsg = CMSG_FIRSTHDR(&msgh);
-	cmsg->cmsg_len = CMSG_LEN(fd_size);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
-
-	do {
-		snd = sendmsg(mp_fd, &msgh, 0);
-	} while (snd < 0 && errno == EINTR);
-
-	if (snd < 0) {
-		rte_errno = errno;
-		/* Check if it caused by peer process exits */
-		if (errno == ECONNREFUSED &&
-				rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			unlink(dst_path);
-			return 0;
-		}
-		if (errno == ENOBUFS) {
-			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
-				dst_path);
-			return 0;
-		}
-		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
-			dst_path, strerror(errno));
-		return -1;
-	}
-
-	return 1;
-}
-
-static int
-mp_send(struct rte_mp_msg *msg, const char *peer, int type)
-{
-	int dir_fd, ret = 0;
-	DIR *mp_dir;
-	struct dirent *ent;
-
-	if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
-		peer = eal_mp_socket_path();
-
-	if (peer) {
-		if (send_msg(peer, msg, type) < 0)
-			return -1;
-		else
-			return 0;
-	}
-
-	/* broadcast to all secondary processes */
-	mp_dir = opendir(mp_dir_path);
-	if (!mp_dir) {
-		RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
-				mp_dir_path);
-		rte_errno = errno;
-		return -1;
-	}
-
-	dir_fd = dirfd(mp_dir);
-	/* lock the directory to prevent processes spinning up while we send */
-	if (flock(dir_fd, LOCK_EX)) {
-		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
-			mp_dir_path);
-		rte_errno = errno;
-		closedir(mp_dir);
-		return -1;
-	}
-
-	while ((ent = readdir(mp_dir))) {
-		char path[PATH_MAX];
-
-		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
-			continue;
-
-		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
-			 ent->d_name);
-		if (send_msg(path, msg, type) < 0)
-			ret = -1;
-	}
-	/* unlock the dir */
-	flock(dir_fd, LOCK_UN);
-
-	/* dir_fd automatically closed on closedir */
-	closedir(mp_dir);
-	return ret;
-}
-
 static bool
 check_input(const struct rte_mp_msg *msg)
 {
@@ -631,6 +645,14 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 		rte_errno = ETIMEDOUT;
 		return -1;
 	}
+	if (sync_req.reply_received == -1) {
+		RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+		/* not receiving this message is not an error, so decrement
+		 * number of sent messages
+		 */
+		reply->nb_sent--;
+		return 0;
+	}
 
 	tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
 	if (!tmp) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 2/6] eal: abstract away IPC socket path generation
  2018-03-07 16:56     ` [PATCH v5 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
@ 2018-03-11  9:02       ` Tan, Jianfeng
  0 siblings, 0 replies; 57+ messages in thread
From: Tan, Jianfeng @ 2018-03-11  9:02 UTC (permalink / raw)
  To: Anatoly Burakov, dev; +Cc: keith.wiles, konstantin.ananyev



On 3/8/2018 12:56 AM, Anatoly Burakov wrote:
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks,
Jianfeng

> ---
>
> Notes:
>      v5: remove lock files, leaving only socket paths code
>      
>      v4: replace lock files with init files
>
>   lib/librte_eal/common/eal_common_proc.c | 48 ++++++++++++++++-----------------
>   1 file changed, 23 insertions(+), 25 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
> index da7930f..1aab3ac 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -91,6 +91,17 @@ find_sync_request(const char *dst, const char *act_name)
>   	return r;
>   }
>   
> +static void
> +create_socket_path(const char *name, char *buf, int len)
> +{
> +	const char *prefix = eal_mp_socket_path();
> +
> +	if (strlen(name) > 0)
> +		snprintf(buf, len, "%s_%s", prefix, name);
> +	else
> +		snprintf(buf, len, "%s", prefix);
> +}
> +
>   int
>   rte_eal_primary_proc_alive(const char *config_file_path)
>   {
> @@ -290,8 +301,12 @@ mp_handle(void *arg __rte_unused)
>   static int
>   open_socket_fd(void)
>   {
> +	char peer_name[PATH_MAX] = {0};
>   	struct sockaddr_un un;
> -	const char *prefix = eal_mp_socket_path();
> +
> +	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
> +		snprintf(peer_name, sizeof(peer_name),
> +				"%d_%"PRIx64, getpid(), rte_rdtsc());
>   
>   	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
>   	if (mp_fd < 0) {
> @@ -301,13 +316,11 @@ open_socket_fd(void)
>   
>   	memset(&un, 0, sizeof(un));
>   	un.sun_family = AF_UNIX;
> -	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
> -	else {
> -		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
> -			 prefix, getpid(), rte_rdtsc());
> -	}
> +
> +	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
> +
>   	unlink(un.sun_path); /* May still exist since last run */
> +
>   	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
>   		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
>   			un.sun_path, strerror(errno));
> @@ -342,20 +355,6 @@ unlink_sockets(const char *filter)
>   	return 0;
>   }
>   
> -static void
> -unlink_socket_by_path(const char *path)
> -{
> -	char *filename;
> -	char *fullpath = strdup(path);
> -
> -	if (!fullpath)
> -		return;
> -	filename = basename(fullpath);
> -	unlink_sockets(filename);
> -	free(fullpath);
> -	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
> -}
> -
>   int
>   rte_mp_channel_init(void)
>   {
> @@ -444,10 +443,9 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
>   	if (snd < 0) {
>   		rte_errno = errno;
>   		/* Check if it caused by peer process exits */
> -		if (errno == ECONNREFUSED) {
> -			/* We don't unlink the primary's socket here */
> -			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -				unlink_socket_by_path(dst_path);
> +		if (errno == ECONNREFUSED &&
> +				rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +			unlink(dst_path);
>   			return 0;
>   		}
>   		if (errno == ENOBUFS) {

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 4/6] eal: lock IPC directory on init and send
  2018-03-07 16:56     ` [PATCH v5 4/6] eal: lock IPC directory on init and send Anatoly Burakov
@ 2018-03-11  9:14       ` Tan, Jianfeng
  0 siblings, 0 replies; 57+ messages in thread
From: Tan, Jianfeng @ 2018-03-11  9:14 UTC (permalink / raw)
  To: Anatoly Burakov, dev; +Cc: keith.wiles, konstantin.ananyev



On 3/8/2018 12:56 AM, Anatoly Burakov wrote:
> When sending IPC messages, prevent new sockets from initializing.
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Make sense.

Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks!

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 5/6] eal: simplify IPC sync request timeout code
  2018-03-07 16:56     ` [PATCH v5 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
@ 2018-03-11  9:25       ` Tan, Jianfeng
  0 siblings, 0 replies; 57+ messages in thread
From: Tan, Jianfeng @ 2018-03-11  9:25 UTC (permalink / raw)
  To: Anatoly Burakov, dev; +Cc: keith.wiles, konstantin.ananyev



On 3/8/2018 12:56 AM, Anatoly Burakov wrote:
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Why I did not find this simple way in the first place :-)

Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>

Thanks!

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 6/6] eal: ignore messages until init is complete
  2018-03-07 16:56     ` [PATCH v5 6/6] eal: ignore messages until init is complete Anatoly Burakov
@ 2018-03-12  1:42       ` Tan, Jianfeng
  2018-03-12  8:58         ` Burakov, Anatoly
  0 siblings, 1 reply; 57+ messages in thread
From: Tan, Jianfeng @ 2018-03-12  1:42 UTC (permalink / raw)
  To: Anatoly Burakov, dev; +Cc: keith.wiles, konstantin.ananyev


On 3/8/2018 12:56 AM, Anatoly Burakov wrote:
> If we receive messages that don't have a callback registered for
> them, and we haven't finished initialization yet, it can be reasonably
> inferred that we shouldn't have gotten the message in the first
> place. Therefore, send requester a special message telling them to
> ignore response to this request, as if this process wasn't there.

Two nits:
- I think we could add a note in commit log that "it only applies to 
primary to secondary request".
- To make the change more simple, maybe we can just put a declaration of 
function mp_send.

> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>

Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>

> ---
>
> Notes:
>      v5: add this patch
>      
>      No changes in mp_send and send_msg - just code move.
>
>   lib/librte_eal/common/eal_common_proc.c | 280 +++++++++++++++++---------------
>   1 file changed, 151 insertions(+), 129 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
> index fe27d68..1ea6045 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -52,6 +52,7 @@ enum mp_type {
>   	MP_MSG, /* Share message with peers, will not block */
>   	MP_REQ, /* Request for information, Will block for a reply */
>   	MP_REP, /* Response to previously-received request */
> +	MP_IGN, /* Response telling requester to ignore this response */
>   };
>   
>   struct mp_msg_internal {
> @@ -205,6 +206,130 @@ rte_mp_action_unregister(const char *name)
>   	free(entry);
>   }
>   
> +/**
> + * Return -1, as fail to send message and it's caused by the local side.
> + * Return 0, as fail to send message and it's caused by the remote side.
> + * Return 1, as succeed to send message.
> + *
> + */
> +static int
> +send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
> +{
> +	int snd;
> +	struct iovec iov;
> +	struct msghdr msgh;
> +	struct cmsghdr *cmsg;
> +	struct sockaddr_un dst;
> +	struct mp_msg_internal m;
> +	int fd_size = msg->num_fds * sizeof(int);
> +	char control[CMSG_SPACE(fd_size)];
> +
> +	m.type = type;
> +	memcpy(&m.msg, msg, sizeof(*msg));
> +
> +	memset(&dst, 0, sizeof(dst));
> +	dst.sun_family = AF_UNIX;
> +	snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
> +
> +	memset(&msgh, 0, sizeof(msgh));
> +	memset(control, 0, sizeof(control));
> +
> +	iov.iov_base = &m;
> +	iov.iov_len = sizeof(m) - sizeof(msg->fds);
> +
> +	msgh.msg_name = &dst;
> +	msgh.msg_namelen = sizeof(dst);
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +	msgh.msg_control = control;
> +	msgh.msg_controllen = sizeof(control);
> +
> +	cmsg = CMSG_FIRSTHDR(&msgh);
> +	cmsg->cmsg_len = CMSG_LEN(fd_size);
> +	cmsg->cmsg_level = SOL_SOCKET;
> +	cmsg->cmsg_type = SCM_RIGHTS;
> +	memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
> +
> +	do {
> +		snd = sendmsg(mp_fd, &msgh, 0);
> +	} while (snd < 0 && errno == EINTR);
> +
> +	if (snd < 0) {
> +		rte_errno = errno;
> +		/* Check if it caused by peer process exits */
> +		if (errno == ECONNREFUSED &&
> +				rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +			unlink(dst_path);
> +			return 0;
> +		}
> +		if (errno == ENOBUFS) {
> +			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
> +				dst_path);
> +			return 0;
> +		}
> +		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
> +			dst_path, strerror(errno));
> +		return -1;
> +	}
> +
> +	return 1;
> +}
> +
> +static int
> +mp_send(struct rte_mp_msg *msg, const char *peer, int type)
> +{
> +	int dir_fd, ret = 0;
> +	DIR *mp_dir;
> +	struct dirent *ent;
> +
> +	if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
> +		peer = eal_mp_socket_path();
> +
> +	if (peer) {
> +		if (send_msg(peer, msg, type) < 0)
> +			return -1;
> +		else
> +			return 0;
> +	}
> +
> +	/* broadcast to all secondary processes */
> +	mp_dir = opendir(mp_dir_path);
> +	if (!mp_dir) {
> +		RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
> +				mp_dir_path);
> +		rte_errno = errno;
> +		return -1;
> +	}
> +
> +	dir_fd = dirfd(mp_dir);
> +	/* lock the directory to prevent processes spinning up while we send */
> +	if (flock(dir_fd, LOCK_EX)) {
> +		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> +			mp_dir_path);
> +		rte_errno = errno;
> +		closedir(mp_dir);
> +		return -1;
> +	}
> +
> +	while ((ent = readdir(mp_dir))) {
> +		char path[PATH_MAX];
> +
> +		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
> +			continue;
> +
> +		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> +			 ent->d_name);
> +		if (send_msg(path, msg, type) < 0)
> +			ret = -1;
> +	}
> +	/* unlock the dir */
> +	flock(dir_fd, LOCK_UN);
> +
> +	/* dir_fd automatically closed on closedir */
> +	closedir(mp_dir);
> +	return ret;
> +}
> +
>   static int
>   read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
>   {
> @@ -260,12 +385,13 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
>   
>   	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
>   
> -	if (m->type == MP_REP) {
> +	if (m->type == MP_REP || m->type == MP_IGN) {
>   		pthread_mutex_lock(&sync_requests.lock);
>   		sync_req = find_sync_request(s->sun_path, msg->name);
>   		if (sync_req) {
>   			memcpy(sync_req->reply, msg, sizeof(*msg));
> -			sync_req->reply_received = 1;
> +			/* -1 indicates that we've been asked to ignore */
> +			sync_req->reply_received = m->type == MP_REP ? 1 : -1;
>   			pthread_cond_signal(&sync_req->cond);
>   		} else
>   			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
> @@ -279,10 +405,22 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
>   		action = entry->action;
>   	pthread_mutex_unlock(&mp_mutex_action);
>   
> -	if (!action)
> -		RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name);
> -	else if (action(msg, s->sun_path) < 0)
> +	if (!action) {
> +		if (m->type == MP_REQ && !internal_config.init_complete) {
> +			/* if this is a request, and init is not yet complete,
> +			 * and callback wasn't registered, we should tell the
> +			 * requester to ignore our existence because we're not
> +			 * yet ready to process this request.
> +			 */
> +			struct rte_mp_msg dummy = {0};
> +			mp_send(&dummy, s->sun_path, MP_IGN);
> +		} else {
> +			RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
> +				msg->name);
> +		}
> +	} else if (action(msg, s->sun_path) < 0) {
>   		RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
> +	}
>   }
>   
>   static void *
> @@ -419,130 +557,6 @@ rte_mp_channel_init(void)
>   	return 0;
>   }
>   
> -/**
> - * Return -1, as fail to send message and it's caused by the local side.
> - * Return 0, as fail to send message and it's caused by the remote side.
> - * Return 1, as succeed to send message.
> - *
> - */
> -static int
> -send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
> -{
> -	int snd;
> -	struct iovec iov;
> -	struct msghdr msgh;
> -	struct cmsghdr *cmsg;
> -	struct sockaddr_un dst;
> -	struct mp_msg_internal m;
> -	int fd_size = msg->num_fds * sizeof(int);
> -	char control[CMSG_SPACE(fd_size)];
> -
> -	m.type = type;
> -	memcpy(&m.msg, msg, sizeof(*msg));
> -
> -	memset(&dst, 0, sizeof(dst));
> -	dst.sun_family = AF_UNIX;
> -	snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
> -
> -	memset(&msgh, 0, sizeof(msgh));
> -	memset(control, 0, sizeof(control));
> -
> -	iov.iov_base = &m;
> -	iov.iov_len = sizeof(m) - sizeof(msg->fds);
> -
> -	msgh.msg_name = &dst;
> -	msgh.msg_namelen = sizeof(dst);
> -	msgh.msg_iov = &iov;
> -	msgh.msg_iovlen = 1;
> -	msgh.msg_control = control;
> -	msgh.msg_controllen = sizeof(control);
> -
> -	cmsg = CMSG_FIRSTHDR(&msgh);
> -	cmsg->cmsg_len = CMSG_LEN(fd_size);
> -	cmsg->cmsg_level = SOL_SOCKET;
> -	cmsg->cmsg_type = SCM_RIGHTS;
> -	memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
> -
> -	do {
> -		snd = sendmsg(mp_fd, &msgh, 0);
> -	} while (snd < 0 && errno == EINTR);
> -
> -	if (snd < 0) {
> -		rte_errno = errno;
> -		/* Check if it caused by peer process exits */
> -		if (errno == ECONNREFUSED &&
> -				rte_eal_process_type() == RTE_PROC_PRIMARY) {
> -			unlink(dst_path);
> -			return 0;
> -		}
> -		if (errno == ENOBUFS) {
> -			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
> -				dst_path);
> -			return 0;
> -		}
> -		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
> -			dst_path, strerror(errno));
> -		return -1;
> -	}
> -
> -	return 1;
> -}
> -
> -static int
> -mp_send(struct rte_mp_msg *msg, const char *peer, int type)
> -{
> -	int dir_fd, ret = 0;
> -	DIR *mp_dir;
> -	struct dirent *ent;
> -
> -	if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
> -		peer = eal_mp_socket_path();
> -
> -	if (peer) {
> -		if (send_msg(peer, msg, type) < 0)
> -			return -1;
> -		else
> -			return 0;
> -	}
> -
> -	/* broadcast to all secondary processes */
> -	mp_dir = opendir(mp_dir_path);
> -	if (!mp_dir) {
> -		RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
> -				mp_dir_path);
> -		rte_errno = errno;
> -		return -1;
> -	}
> -
> -	dir_fd = dirfd(mp_dir);
> -	/* lock the directory to prevent processes spinning up while we send */
> -	if (flock(dir_fd, LOCK_EX)) {
> -		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
> -			mp_dir_path);
> -		rte_errno = errno;
> -		closedir(mp_dir);
> -		return -1;
> -	}
> -
> -	while ((ent = readdir(mp_dir))) {
> -		char path[PATH_MAX];
> -
> -		if (fnmatch(mp_filter, ent->d_name, 0) != 0)
> -			continue;
> -
> -		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
> -			 ent->d_name);
> -		if (send_msg(path, msg, type) < 0)
> -			ret = -1;
> -	}
> -	/* unlock the dir */
> -	flock(dir_fd, LOCK_UN);
> -
> -	/* dir_fd automatically closed on closedir */
> -	closedir(mp_dir);
> -	return ret;
> -}
> -
>   static bool
>   check_input(const struct rte_mp_msg *msg)
>   {
> @@ -631,6 +645,14 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
>   		rte_errno = ETIMEDOUT;
>   		return -1;
>   	}
> +	if (sync_req.reply_received == -1) {
> +		RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
> +		/* not receiving this message is not an error, so decrement
> +		 * number of sent messages
> +		 */
> +		reply->nb_sent--;
> +		return 0;
> +	}
>   
>   	tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
>   	if (!tmp) {

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 6/6] eal: ignore messages until init is complete
  2018-03-12  1:42       ` Tan, Jianfeng
@ 2018-03-12  8:58         ` Burakov, Anatoly
  0 siblings, 0 replies; 57+ messages in thread
From: Burakov, Anatoly @ 2018-03-12  8:58 UTC (permalink / raw)
  To: Tan, Jianfeng, dev; +Cc: keith.wiles, konstantin.ananyev

On 12-Mar-18 1:42 AM, Tan, Jianfeng wrote:
> 
> On 3/8/2018 12:56 AM, Anatoly Burakov wrote:
>> If we receive messages that don't have a callback registered for
>> them, and we haven't finished initialization yet, it can be reasonably
>> inferred that we shouldn't have gotten the message in the first
>> place. Therefore, send requester a special message telling them to
>> ignore response to this request, as if this process wasn't there.
> 
> Two nits:
> - I think we could add a note in commit log that "it only applies to 
> primary to secondary request".
> - To make the change more simple, maybe we can just put a declaration of 
> function mp_send.
> 
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> 
> Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
> 

Makes sense, will do for v6. Thanks for the reviews!


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v6 0/6] Improvements for DPDK IPC
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-21 17:43         ` Thomas Monjalon
  2018-03-13 17:42       ` [PATCH v6 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
                         ` (5 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

This is an assortment of loosely related improvements to IPC, mostly
related to handling corner cases and avoiding race conditions.

Main addition is an attempt to avoid undefined behavior when receiving
messages while secondary process is initializing. It is assumed that
once callback is registered, it is safe to receive messages.

If the callback wasn't registered, then there are two choices - either
we haven't reached the stage where we register this callback (init is
not finished), or user has forgotten to register callback for this
message. The latter can only be known once initialization is complete,
so until init is complete, treat this process as not-existing if there
is no registered callback for the message. This will handle both
scenarios.

v6: - clang compile fix
    - clarify commit message for patch 6
    - simplify patch 6
v5: - added cover-letter :)
    - drop the "don't send messages to processes which haven't finished
      initializing" model added in previous version. instead, allow
      everyone to receive all messages, but check if initialization is
      completed, and check if there is a callback registered for this
      message. if there is no callback, assume we just didn't get around
      to it yet, so just send a special message to the requestor that
      it should treat this process as if it wasn't there.
v4: - make init_complete volatile
    - changed from "don't process messages until init complete" to
      "don't send messages to processes which haven't finished
      initializing", as the former would have resulted in timeouts if
      init took too long to complete
    - fixed resource leaks
    - added patch to simplify IPC timeouts handling
v3: - move init_complete until after receiving message
v2: - added patch to prevent IPC from sending messages while primary
      is initializing
    - added patch to generate filter from eal_mp_socket_path() instead
      of hardcoding the value

Anatoly Burakov (6):
  eal: add internal flag indicating init has completed
  eal: abstract away IPC socket path generation
  eal: don't hardcode socket filter value in IPC
  eal: lock IPC directory on init and send
  eal: simplify IPC sync request timeout code
  eal: ignore messages until init is complete

 lib/librte_eal/common/eal_common_options.c |   1 +
 lib/librte_eal/common/eal_common_proc.c    | 178 ++++++++++++++++++++---------
 lib/librte_eal/common/eal_internal_cfg.h   |   2 +
 lib/librte_eal/linuxapp/eal/eal.c          |   2 +
 4 files changed, 129 insertions(+), 54 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v6 1/6] eal: add internal flag indicating init has completed
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 " Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-13 20:59         ` Bruce Richardson
  2018-03-13 17:42       ` [PATCH v6 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
                         ` (4 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Currently, primary process initialization is finalized by setting
the RTE_MAGIC value in the shared config. However, it is not
possible to check whether secondary process initialization has
completed. Add such a value to internal config.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v4: make init_complete volatile
    
    This patchset is dependent upon earlier IPC fixes patchset [1].
    
    [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/

 lib/librte_eal/common/eal_common_options.c | 1 +
 lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d2..0be80cb 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -194,6 +194,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	internal_cfg->init_complete = 0;
 }
 
 static int
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc..a0082d1 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -56,6 +56,8 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	volatile unsigned int init_complete;
+	/**< indicates whether EAL has completed initialization */
 };
 extern struct internal_config internal_config; /**< Global EAL configuration. */
 
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..2ecd07b 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -669,6 +669,8 @@ rte_eal_mcfg_complete(void)
 	/* ALL shared mem_config related INIT DONE */
 	if (rte_config.process_type == RTE_PROC_PRIMARY)
 		rte_config.mem_config->magic = RTE_MAGIC;
+
+	internal_config.init_complete = 1;
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v6 2/6] eal: abstract away IPC socket path generation
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 " Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
                         ` (3 subsequent siblings)
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v5: remove lock files, leaving only socket paths code
    
    v4: replace lock files with init files

 lib/librte_eal/common/eal_common_proc.c | 48 ++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index da7930f..1aab3ac 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -91,6 +91,17 @@ find_sync_request(const char *dst, const char *act_name)
 	return r;
 }
 
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+	const char *prefix = eal_mp_socket_path();
+
+	if (strlen(name) > 0)
+		snprintf(buf, len, "%s_%s", prefix, name);
+	else
+		snprintf(buf, len, "%s", prefix);
+}
+
 int
 rte_eal_primary_proc_alive(const char *config_file_path)
 {
@@ -290,8 +301,12 @@ mp_handle(void *arg __rte_unused)
 static int
 open_socket_fd(void)
 {
+	char peer_name[PATH_MAX] = {0};
 	struct sockaddr_un un;
-	const char *prefix = eal_mp_socket_path();
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		snprintf(peer_name, sizeof(peer_name),
+				"%d_%"PRIx64, getpid(), rte_rdtsc());
 
 	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (mp_fd < 0) {
@@ -301,13 +316,11 @@ open_socket_fd(void)
 
 	memset(&un, 0, sizeof(un));
 	un.sun_family = AF_UNIX;
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
-	else {
-		snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
-			 prefix, getpid(), rte_rdtsc());
-	}
+
+	create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
 	unlink(un.sun_path); /* May still exist since last run */
+
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
@@ -342,20 +355,6 @@ unlink_sockets(const char *filter)
 	return 0;
 }
 
-static void
-unlink_socket_by_path(const char *path)
-{
-	char *filename;
-	char *fullpath = strdup(path);
-
-	if (!fullpath)
-		return;
-	filename = basename(fullpath);
-	unlink_sockets(filename);
-	free(fullpath);
-	RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
 int
 rte_mp_channel_init(void)
 {
@@ -444,10 +443,9 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 	if (snd < 0) {
 		rte_errno = errno;
 		/* Check if it caused by peer process exits */
-		if (errno == ECONNREFUSED) {
-			/* We don't unlink the primary's socket here */
-			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-				unlink_socket_by_path(dst_path);
+		if (errno == ECONNREFUSED &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			unlink(dst_path);
 			return 0;
 		}
 		if (errno == ENOBUFS) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v6 3/6] eal: don't hardcode socket filter value in IPC
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
                         ` (2 preceding siblings ...)
  2018-03-13 17:42       ` [PATCH v6 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 4/6] eal: lock IPC directory on init and send Anatoly Burakov
                         ` (2 subsequent siblings)
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Currently, filter value is hardcoded and disconnected from actual
value returned by eal_mp_socket_path(). Fix this to generate filter
value by deriving it from eal_mp_socket_path() instead.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v5: removed init files
    
    v4: added filtering for init files as well

 lib/librte_eal/common/eal_common_proc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 1aab3ac..9587211 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -359,18 +359,19 @@ int
 rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
-	char *path;
+	char path[PATH_MAX];
 	pthread_t tid;
 
-	snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
-		 internal_config.hugefile_prefix);
+	/* create filter path */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_filter, sizeof(mp_filter), "%s", basename(path));
 
-	path = strdup(eal_mp_socket_path());
-	snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
-	free(path);
+	/* path may have been modified, so recreate it */
+	create_socket_path("*", path, sizeof(path));
+	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
-	    unlink_sockets(mp_filter)) {
+			unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
 		return -1;
 	}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v6 4/6] eal: lock IPC directory on init and send
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
                         ` (3 preceding siblings ...)
  2018-03-13 17:42       ` [PATCH v6 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 6/6] eal: ignore messages until init is complete Anatoly Burakov
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

When sending IPC messages, prevent new sockets from initializing.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v5: removed init files introduced in v4
    
    v4: fixed resource leaks and added support for init files
        introduced in v4 series

 lib/librte_eal/common/eal_common_proc.c | 59 +++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 9587211..c6fef75 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -360,6 +361,7 @@ rte_mp_channel_init(void)
 {
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 	char path[PATH_MAX];
+	int dir_fd;
 	pthread_t tid;
 
 	/* create filter path */
@@ -370,19 +372,38 @@ rte_mp_channel_init(void)
 	create_socket_path("*", path, sizeof(path));
 	snprintf(mp_dir_path, sizeof(mp_dir_path), "%s", dirname(path));
 
+	/* lock the directory */
+	dir_fd = open(mp_dir_path, O_RDONLY);
+	if (dir_fd < 0) {
+		RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+			mp_dir_path, strerror(errno));
+		return -1;
+	}
+
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+			mp_dir_path, strerror(errno));
+		close(dir_fd);
+		return -1;
+	}
+
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			unlink_sockets(mp_filter)) {
 		RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+		close(dir_fd);
 		return -1;
 	}
 
-	if (open_socket_fd() < 0)
+	if (open_socket_fd() < 0) {
+		close(dir_fd);
 		return -1;
+	}
 
 	if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
 			strerror(errno));
 		close(mp_fd);
+		close(dir_fd);
 		mp_fd = -1;
 		return -1;
 	}
@@ -390,6 +411,11 @@ rte_mp_channel_init(void)
 	/* try best to set thread name */
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
 	rte_thread_setname(tid, thread_name);
+
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
+	close(dir_fd);
+
 	return 0;
 }
 
@@ -465,7 +491,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 
@@ -487,6 +513,17 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		rte_errno = errno;
 		return -1;
 	}
+
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		rte_errno = errno;
+		closedir(mp_dir);
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 
@@ -498,7 +535,10 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 		if (send_msg(path, msg, type) < 0)
 			ret = -1;
 	}
+	/* unlock the dir */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
@@ -619,7 +659,7 @@ int __rte_experimental
 rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int ret = 0;
+	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -655,6 +695,16 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
+	dir_fd = dirfd(mp_dir);
+	/* lock the directory to prevent processes spinning up while we send */
+	if (flock(dir_fd, LOCK_EX)) {
+		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+			mp_dir_path);
+		closedir(mp_dir);
+		rte_errno = errno;
+		return -1;
+	}
+
 	while ((ent = readdir(mp_dir))) {
 		char path[PATH_MAX];
 
@@ -667,7 +717,10 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		if (mp_request_one(path, req, reply, &end))
 			ret = -1;
 	}
+	/* unlock the directory */
+	flock(dir_fd, LOCK_UN);
 
+	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v6 5/6] eal: simplify IPC sync request timeout code
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
                         ` (4 preceding siblings ...)
  2018-03-13 17:42       ` [PATCH v6 4/6] eal: lock IPC directory on init and send Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  2018-03-13 17:42       ` [PATCH v6 6/6] eal: ignore messages until init is complete Anatoly Burakov
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v4: add this patch

 lib/librte_eal/common/eal_common_proc.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index c6fef75..fe27d68 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -586,7 +586,6 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
-	struct timeval now;
 	struct rte_mp_msg msg, *tmp;
 	struct sync_request sync_req, *exist;
 
@@ -618,19 +617,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 	reply->nb_sent++;
 
 	do {
-		pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts);
-		/* Check spurious wakeups */
-		if (sync_req.reply_received == 1)
-			break;
-		/* Check if time is out */
-		if (gettimeofday(&now, NULL) < 0)
-			break;
-		if (ts->tv_sec < now.tv_sec)
-			break;
-		else if (now.tv_sec == ts->tv_sec &&
-			 now.tv_usec * 1000 < ts->tv_nsec)
-			break;
-	} while (1);
+		ret = pthread_cond_timedwait(&sync_req.cond,
+				&sync_requests.lock, ts);
+	} while (ret != 0 && ret != ETIMEDOUT);
+
 	/* We got the lock now */
 	TAILQ_REMOVE(&sync_requests.requests, &sync_req, next);
 	pthread_mutex_unlock(&sync_requests.lock);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v6 6/6] eal: ignore messages until init is complete
  2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
                         ` (5 preceding siblings ...)
  2018-03-13 17:42       ` [PATCH v6 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
@ 2018-03-13 17:42       ` Anatoly Burakov
  6 siblings, 0 replies; 57+ messages in thread
From: Anatoly Burakov @ 2018-03-13 17:42 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, keith.wiles, konstantin.ananyev

If we receive messages that don't have a callback registered for
them, and we haven't finished initialization yet, it can be reasonably
inferred that we shouldn't have gotten the message in the first
place. Therefore, send requester a special message telling them to
ignore response to this request, as if this process wasn't there.

Since it is not possible for primary process to receive any messages
during initialization, this change in practice only applies to
secondary processes.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Jianfeng Tan <jianfeng.tan@intel.com>
---

Notes:
    v6: - clang fix
        - forward declare mp_send instead of code move
        - clarify commit message
    v5: add this patch
    
    No changes in mp_send and send_msg - just code move.

 lib/librte_eal/common/eal_common_proc.c | 38 ++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index fe27d68..4131b67 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -52,6 +52,7 @@ enum mp_type {
 	MP_MSG, /* Share message with peers, will not block */
 	MP_REQ, /* Request for information, Will block for a reply */
 	MP_REP, /* Response to previously-received request */
+	MP_IGN, /* Response telling requester to ignore this response */
 };
 
 struct mp_msg_internal {
@@ -78,6 +79,11 @@ static struct {
 	.lock = PTHREAD_MUTEX_INITIALIZER
 };
 
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+
 static struct sync_request *
 find_sync_request(const char *dst, const char *act_name)
 {
@@ -260,12 +266,13 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 
 	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
 
-	if (m->type == MP_REP) {
+	if (m->type == MP_REP || m->type == MP_IGN) {
 		pthread_mutex_lock(&sync_requests.lock);
 		sync_req = find_sync_request(s->sun_path, msg->name);
 		if (sync_req) {
 			memcpy(sync_req->reply, msg, sizeof(*msg));
-			sync_req->reply_received = 1;
+			/* -1 indicates that we've been asked to ignore */
+			sync_req->reply_received = m->type == MP_REP ? 1 : -1;
 			pthread_cond_signal(&sync_req->cond);
 		} else
 			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
@@ -279,10 +286,23 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 		action = entry->action;
 	pthread_mutex_unlock(&mp_mutex_action);
 
-	if (!action)
-		RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name);
-	else if (action(msg, s->sun_path) < 0)
+	if (!action) {
+		if (m->type == MP_REQ && !internal_config.init_complete) {
+			/* if this is a request, and init is not yet complete,
+			 * and callback wasn't registered, we should tell the
+			 * requester to ignore our existence because we're not
+			 * yet ready to process this request.
+			 */
+			struct rte_mp_msg dummy;
+			memset(&dummy, 0, sizeof(dummy));
+			mp_send(&dummy, s->sun_path, MP_IGN);
+		} else {
+			RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+				msg->name);
+		}
+	} else if (action(msg, s->sun_path) < 0) {
 		RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+	}
 }
 
 static void *
@@ -631,6 +651,14 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
 		rte_errno = ETIMEDOUT;
 		return -1;
 	}
+	if (sync_req.reply_received == -1) {
+		RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+		/* not receiving this message is not an error, so decrement
+		 * number of sent messages
+		 */
+		reply->nb_sent--;
+		return 0;
+	}
 
 	tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
 	if (!tmp) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH v6 1/6] eal: add internal flag indicating init has completed
  2018-03-13 17:42       ` [PATCH v6 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
@ 2018-03-13 20:59         ` Bruce Richardson
  0 siblings, 0 replies; 57+ messages in thread
From: Bruce Richardson @ 2018-03-13 20:59 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, jianfeng.tan, keith.wiles, konstantin.ananyev

On Tue, Mar 13, 2018 at 05:42:35PM +0000, Anatoly Burakov wrote:
> Currently, primary process initialization is finalized by setting
> the RTE_MAGIC value in the shared config. However, it is not
> possible to check whether secondary process initialization has
> completed. Add such a value to internal config.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> 
> Notes:
>     v4: make init_complete volatile
>     
>     This patchset is dependent upon earlier IPC fixes patchset [1].
>     
>     [1] http://dpdk.org/dev/patchwork/bundle/aburakov/IPC_Fixes/
> 
>  lib/librte_eal/common/eal_common_options.c | 1 +
>  lib/librte_eal/common/eal_internal_cfg.h   | 2 ++
>  lib/librte_eal/linuxapp/eal/eal.c          | 2 ++
>  3 files changed, 5 insertions(+)
> 
Looks harmless to me :-)

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v6 0/6] Improvements for DPDK IPC
  2018-03-13 17:42       ` [PATCH v6 " Anatoly Burakov
@ 2018-03-21 17:43         ` Thomas Monjalon
  0 siblings, 0 replies; 57+ messages in thread
From: Thomas Monjalon @ 2018-03-21 17:43 UTC (permalink / raw)
  To: Anatoly Burakov; +Cc: dev, jianfeng.tan, keith.wiles, konstantin.ananyev

> Anatoly Burakov (6):
>   eal: add internal flag indicating init has completed
>   eal: abstract away IPC socket path generation
>   eal: don't hardcode socket filter value in IPC
>   eal: lock IPC directory on init and send
>   eal: simplify IPC sync request timeout code
>   eal: ignore messages until init is complete

Applied, thanks

^ permalink raw reply	[flat|nested] 57+ messages in thread

end of thread, other threads:[~2018-03-21 17:43 UTC | newest]

Thread overview: 57+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-22 18:21 [PATCH 1/3] eal: add internal flag indicating init has completed Anatoly Burakov
2018-02-22 18:21 ` [PATCH 2/3] eal: don't process IPC messages before init finished Anatoly Burakov
2018-02-22 18:21 ` [PATCH 3/3] eal: use locks to determine if secondary process is active Anatoly Burakov
2018-03-02 15:14   ` [PATCH v4 1/5] eal: add internal flag indicating init has completed Anatoly Burakov
2018-03-07 16:56     ` [PATCH v5 0/6] Improvements for DPDK IPC Anatoly Burakov
2018-03-13 17:42       ` [PATCH v6 " Anatoly Burakov
2018-03-21 17:43         ` Thomas Monjalon
2018-03-13 17:42       ` [PATCH v6 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
2018-03-13 20:59         ` Bruce Richardson
2018-03-13 17:42       ` [PATCH v6 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
2018-03-13 17:42       ` [PATCH v6 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
2018-03-13 17:42       ` [PATCH v6 4/6] eal: lock IPC directory on init and send Anatoly Burakov
2018-03-13 17:42       ` [PATCH v6 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
2018-03-13 17:42       ` [PATCH v6 6/6] eal: ignore messages until init is complete Anatoly Burakov
2018-03-07 16:56     ` [PATCH v5 1/6] eal: add internal flag indicating init has completed Anatoly Burakov
2018-03-07 16:56     ` [PATCH v5 2/6] eal: abstract away IPC socket path generation Anatoly Burakov
2018-03-11  9:02       ` Tan, Jianfeng
2018-03-07 16:56     ` [PATCH v5 3/6] eal: don't hardcode socket filter value in IPC Anatoly Burakov
2018-03-07 16:56     ` [PATCH v5 4/6] eal: lock IPC directory on init and send Anatoly Burakov
2018-03-11  9:14       ` Tan, Jianfeng
2018-03-07 16:56     ` [PATCH v5 5/6] eal: simplify IPC sync request timeout code Anatoly Burakov
2018-03-11  9:25       ` Tan, Jianfeng
2018-03-07 16:56     ` [PATCH v5 6/6] eal: ignore messages until init is complete Anatoly Burakov
2018-03-12  1:42       ` Tan, Jianfeng
2018-03-12  8:58         ` Burakov, Anatoly
2018-03-02 15:14   ` [PATCH v4 2/5] eal: use file to check if secondary process is ready Anatoly Burakov
2018-03-06 11:03     ` Burakov, Anatoly
2018-03-02 15:14   ` [PATCH v4 3/5] eal: prevent secondary process init while sending messages Anatoly Burakov
2018-03-02 15:14   ` [PATCH v4 4/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
2018-03-02 15:14   ` [PATCH v4 5/5] eal: simplify IPC sync request timeout code Anatoly Burakov
2018-02-22 18:32 ` [PATCH 1/3] eal: add internal flag indicating init has completed Burakov, Anatoly
2018-02-27 13:23 ` [PATCH v2 1/5] " Anatoly Burakov
2018-02-27 14:35   ` [PATCH v3 " Anatoly Burakov
2018-02-28  2:12     ` Tan, Jianfeng
2018-02-28  9:43       ` Burakov, Anatoly
2018-02-27 14:35   ` [PATCH v3 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
2018-02-28  1:09     ` Tan, Jianfeng
2018-02-28  9:45       ` Burakov, Anatoly
2018-02-28  4:00     ` Wiles, Keith
2018-02-28  9:47       ` Burakov, Anatoly
2018-02-27 14:35   ` [PATCH v3 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
2018-02-28  1:26     ` Tan, Jianfeng
2018-02-28 10:15       ` Burakov, Anatoly
2018-02-28  4:17     ` Wiles, Keith
2018-02-28 10:17       ` Burakov, Anatoly
2018-02-27 14:35   ` [PATCH v3 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
2018-02-28  1:58     ` Tan, Jianfeng
2018-02-28 10:19       ` Burakov, Anatoly
2018-02-28 15:49         ` Tan, Jianfeng
2018-02-27 14:35   ` [PATCH v3 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov
2018-02-28  1:52     ` Tan, Jianfeng
2018-02-28 10:21       ` Burakov, Anatoly
2018-02-28 15:01         ` Tan, Jianfeng
2018-02-27 13:23 ` [PATCH v2 2/5] eal: don't process IPC messages before init finished Anatoly Burakov
2018-02-27 13:23 ` [PATCH v2 3/5] eal: use locks to determine if secondary process is active Anatoly Burakov
2018-02-27 13:23 ` [PATCH v2 4/5] eal: prevent secondary process init while sending messages Anatoly Burakov
2018-02-27 13:23 ` [PATCH v2 5/5] eal: don't hardcode socket filter value in IPC Anatoly Burakov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.