All of lore.kernel.org
 help / color / mirror / Atom feed
* [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions
@ 2017-04-20 17:28 Arturo Borrero Gonzalez
  2017-04-20 17:28 ` [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors Arturo Borrero Gonzalez
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-20 17:28 UTC (permalink / raw)
  To: netfilter-devel

They are shared by both sync-ftfw and sync-notrack.

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
---
 include/Makefile.am |    2 +-
 include/queue_tx.h  |    7 ++++++
 src/Makefile.am     |    2 +-
 src/queue_tx.c      |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/sync-ftfw.c     |   37 +------------------------------
 src/sync-notrack.c  |   37 +------------------------------
 6 files changed, 71 insertions(+), 74 deletions(-)
 create mode 100644 include/queue_tx.h
 create mode 100644 src/queue_tx.c

diff --git a/include/Makefile.am b/include/Makefile.am
index e81463a..84fd608 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
 		 network.h filter.h queue.h vector.h cidr.h \
 		 traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
 		 process.h origin.h internal.h external.h date.h nfct.h \
-		 helper.h myct.h stack.h systemd.h
+		 helper.h myct.h stack.h systemd.h queue_tx.h
 
diff --git a/include/queue_tx.h b/include/queue_tx.h
new file mode 100644
index 0000000..e29b1f0
--- /dev/null
+++ b/include/queue_tx.h
@@ -0,0 +1,7 @@
+#ifndef _QUEUE_TX_H_
+#define _QUEUE_TX_H_
+
+void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to);
+void tx_queue_add_ctlmsg2(uint32_t flags);
+
+#endif /* _QUEUE_TX_H_ */
diff --git a/src/Makefile.am b/src/Makefile.am
index 144c52c..39c7315 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -37,7 +37,7 @@ endif
 
 nfct_LDFLAGS = -export-dynamic @LAZY_LDFLAGS@
 
-conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
+conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c queue_tx.c rbtree.c \
 		    local.c log.c mcast.c udp.c netlink.c vector.c \
 		    filter.c fds.c event.c process.c origin.c date.c \
 		    cache.c cache-ct.c cache-exp.c \
diff --git a/src/queue_tx.c b/src/queue_tx.c
new file mode 100644
index 0000000..0c99163
--- /dev/null
+++ b/src/queue_tx.c
@@ -0,0 +1,60 @@
+/*
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdint.h>
+#include "queue_tx.h"
+#include "queue.h"
+#include "conntrackd.h"
+#include "network.h"
+
+void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
+{
+	struct queue_object *qobj;
+	struct nethdr_ack *ack;
+
+	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
+	if (qobj == NULL)
+		return;
+
+	ack		= (struct nethdr_ack *)qobj->data;
+	ack->type 	= NET_T_CTL;
+	ack->flags	= flags;
+	ack->from	= from;
+	ack->to		= to;
+
+	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
+		queue_object_free(qobj);
+}
+
+void tx_queue_add_ctlmsg2(uint32_t flags)
+{
+	struct queue_object *qobj;
+	struct nethdr *ctl;
+
+	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
+	if (qobj == NULL)
+		return;
+
+	ctl		= (struct nethdr *)qobj->data;
+	ctl->type 	= NET_T_CTL;
+	ctl->flags	= flags;
+
+	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
+		queue_object_free(qobj);
+}
diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c
index aa6838a..ce5270b 100644
--- a/src/sync-ftfw.c
+++ b/src/sync-ftfw.c
@@ -20,6 +20,7 @@
 #include "conntrackd.h"
 #include "sync.h"
 #include "queue.h"
+#include "queue_tx.h"
 #include "network.h"
 #include "alarm.h"
 #include "log.h"
@@ -95,42 +96,6 @@ static void nethdr_set_hello(struct nethdr *net)
 	}
 }
 
-static void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
-{
-	struct queue_object *qobj;
-	struct nethdr_ack *ack;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ack		= (struct nethdr_ack *)qobj->data;
-	ack->type 	= NET_T_CTL;
-	ack->flags	= flags;
-	ack->from	= from;
-	ack->to		= to;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
-static void tx_queue_add_ctlmsg2(uint32_t flags)
-{
-	struct queue_object *qobj;
-	struct nethdr *ctl;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ctl		= (struct nethdr *)qobj->data;
-	ctl->type 	= NET_T_CTL;
-	ctl->flags	= flags;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 /* this function is called from the alarm framework */
 static void do_alive_alarm(struct alarm_block *a, void *data)
 {
diff --git a/src/sync-notrack.c b/src/sync-notrack.c
index 7ade3a7..5b6814d 100644
--- a/src/sync-notrack.c
+++ b/src/sync-notrack.c
@@ -20,6 +20,7 @@
 #include "conntrackd.h"
 #include "sync.h"
 #include "queue.h"
+#include "queue_tx.h"
 #include "network.h"
 #include "log.h"
 #include "cache.h"
@@ -56,25 +57,6 @@ static struct cache_extra cache_notrack_extra = {
 	.destroy	= cache_notrack_del
 };
 
-static void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
-{
-	struct queue_object *qobj;
-	struct nethdr_ack *ack;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ack		= (struct nethdr_ack *)qobj->data;
-        ack->type	= NET_T_CTL;
-	ack->flags	= flags;
-	ack->from	= from;
-	ack->to		= to;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 static int do_cache_to_tx(void *data1, void *data2)
 {
 	struct cache_object *obj = data2;
@@ -228,23 +210,6 @@ static void notrack_enqueue(struct cache_object *obj, int query)
 		cache_object_get(obj);
 }
 
-static void tx_queue_add_ctlmsg2(uint32_t flags)
-{
-	struct queue_object *qobj;
-	struct nethdr *ctl;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ctl		= (struct nethdr *)qobj->data;
-	ctl->type	= NET_T_CTL;
-	ctl->flags	= flags;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 static void do_alive_alarm(struct alarm_block *a, void *data)
 {
 	tx_queue_add_ctlmsg2(NET_F_ALIVE);


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-04-20 17:28 [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Arturo Borrero Gonzalez
@ 2017-04-20 17:28 ` Arturo Borrero Gonzalez
  2017-04-25 11:34   ` Pablo Neira Ayuso
  2017-04-20 17:28 ` [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations Arturo Borrero Gonzalez
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-20 17:28 UTC (permalink / raw)
  To: netfilter-devel

These warnings, if they happen, should help users.

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
---
 src/channel.c  |    6 +++++-
 src/queue_tx.c |   11 +++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/channel.c b/src/channel.c
index acbfa7d..b2f114d 100644
--- a/src/channel.c
+++ b/src/channel.c
@@ -19,6 +19,7 @@
 #include "channel.h"
 #include "network.h"
 #include "queue.h"
+#include "log.h"
 
 static struct channel_ops *ops[CHANNEL_MAX];
 extern struct channel_ops channel_mcast;
@@ -161,8 +162,11 @@ static void channel_enqueue_errors(struct channel *c)
 	struct channel_error *error;
 
 	qobj = queue_object_new(Q_ELEM_ERR, sizeof(struct channel_error));
-	if (qobj == NULL)
+	if (qobj == NULL) {
+		dlog(LOG_WARNING, "could not enqueue channel errors, failed to"
+		     " allocate memory");
 		return;
+	}
 
 	error		= (struct channel_error *)qobj->data;
 	error->len	= c->buffer->len;
diff --git a/src/queue_tx.c b/src/queue_tx.c
index 0c99163..83eb111 100644
--- a/src/queue_tx.c
+++ b/src/queue_tx.c
@@ -22,6 +22,7 @@
 #include "queue.h"
 #include "conntrackd.h"
 #include "network.h"
+#include "log.h"
 
 void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
 {
@@ -29,8 +30,11 @@ void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
 	struct nethdr_ack *ack;
 
 	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
+	if (qobj == NULL) {
+		dlog(LOG_WARNING, "could not queue ACK message. Failed to "
+		     "allocate memory");
 		return;
+	}
 
 	ack		= (struct nethdr_ack *)qobj->data;
 	ack->type 	= NET_T_CTL;
@@ -48,8 +52,11 @@ void tx_queue_add_ctlmsg2(uint32_t flags)
 	struct nethdr *ctl;
 
 	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
+	if (qobj == NULL) {
+		dlog(LOG_WARNING, "could not queue CTL message. Failed to "
+		     "allocate memory");
 		return;
+	}
 
 	ctl		= (struct nethdr *)qobj->data;
 	ctl->type 	= NET_T_CTL;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations
  2017-04-20 17:28 [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Arturo Borrero Gonzalez
  2017-04-20 17:28 ` [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors Arturo Borrero Gonzalez
@ 2017-04-20 17:28 ` Arturo Borrero Gonzalez
  2017-05-08 17:52   ` Pablo Neira Ayuso
  2017-04-20 17:28 ` [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option Arturo Borrero Gonzalez
  2017-05-08 17:52 ` [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Pablo Neira Ayuso
  3 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-20 17:28 UTC (permalink / raw)
  To: netfilter-devel

Resync operations factorization. There are two:
 * resync_send	--> conntrackd -B (send bulk resync)
 * resync_req	--> conntrackd -n (request resync)

Future patches reuse this factorized code.

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
---
 include/Makefile.am |    2 +-
 include/resync.h    |    7 +++++++
 src/Makefile.am     |    2 +-
 src/resync.c        |   40 ++++++++++++++++++++++++++++++++++++++++
 src/sync-ftfw.c     |   10 +++-------
 src/sync-notrack.c  |   14 +++-----------
 6 files changed, 55 insertions(+), 20 deletions(-)
 create mode 100644 include/resync.h
 create mode 100644 src/resync.c

diff --git a/include/Makefile.am b/include/Makefile.am
index 84fd608..352054e 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
 		 network.h filter.h queue.h vector.h cidr.h \
 		 traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
 		 process.h origin.h internal.h external.h date.h nfct.h \
-		 helper.h myct.h stack.h systemd.h queue_tx.h
+		 helper.h myct.h stack.h systemd.h queue_tx.h resync.h
 
diff --git a/include/resync.h b/include/resync.h
new file mode 100644
index 0000000..5986600
--- /dev/null
+++ b/include/resync.h
@@ -0,0 +1,7 @@
+#ifndef _RESYNC_H_
+#define _RESYNC_H_
+
+void resync_req(void);
+void resync_send(int (*do_cache_to_tx)(void *data1, void *data2));
+
+#endif /*_RESYNC_H_ */
diff --git a/src/Makefile.am b/src/Makefile.am
index 39c7315..a9a8685 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -52,7 +52,7 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c queue_tx.c rbtree.c \
 		    external_cache.c external_inject.c \
 		    internal_cache.c internal_bypass.c \
 		    read_config_yy.y read_config_lex.l \
-		    stack.c
+		    stack.c resync.c
 
 if HAVE_CTHELPER
 conntrackd_SOURCES += cthelper.c helpers.c utils.c expect.c
diff --git a/src/resync.c b/src/resync.c
new file mode 100644
index 0000000..dbb2b6f
--- /dev/null
+++ b/src/resync.c
@@ -0,0 +1,40 @@
+/*
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "conntrackd.h"
+#include "network.h"
+#include "log.h"
+#include "queue_tx.h"
+#include "resync.h"
+#include "cache.h"
+
+void resync_req(void)
+{
+	dlog(LOG_NOTICE, "resync requested");
+	tx_queue_add_ctlmsg(NET_F_RESYNC, 0, 0);
+}
+
+void resync_send(int (*do_cache_to_tx)(void *data1, void *data2))
+{
+	dlog(LOG_NOTICE, "sending bulk update");
+	cache_iterate(STATE(mode)->internal->ct.data,
+		      NULL, do_cache_to_tx);
+	cache_iterate(STATE(mode)->internal->exp.data,
+		      NULL, do_cache_to_tx);
+}
diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c
index ce5270b..6fdb058 100644
--- a/src/sync-ftfw.c
+++ b/src/sync-ftfw.c
@@ -26,6 +26,7 @@
 #include "log.h"
 #include "cache.h"
 #include "fds.h"
+#include "resync.h"
 
 #include <string.h>
 #include <errno.h>
@@ -189,15 +190,10 @@ static int ftfw_local(int fd, int type, void *data)
 
 	switch(type) {
 	case REQUEST_DUMP:
-		dlog(LOG_NOTICE, "request resync");
-		tx_queue_add_ctlmsg(NET_F_RESYNC, 0, 0);
+		resync_req();
 		break;
 	case SEND_BULK:
-		dlog(LOG_NOTICE, "sending bulk update");
-		cache_iterate(STATE(mode)->internal->ct.data,
-			      NULL, do_cache_to_tx);
-		cache_iterate(STATE(mode)->internal->exp.data,
-			      NULL, do_cache_to_tx);
+		resync_send(do_cache_to_tx);
 		break;
 	case STATS_RSQUEUE:
 		ftfw_local_queue(fd);
diff --git a/src/sync-notrack.c b/src/sync-notrack.c
index 5b6814d..7ce62d9 100644
--- a/src/sync-notrack.c
+++ b/src/sync-notrack.c
@@ -25,6 +25,7 @@
 #include "log.h"
 #include "cache.h"
 #include "fds.h"
+#include "resync.h"
 
 #include <string.h>
 
@@ -103,19 +104,10 @@ static int notrack_local(int fd, int type, void *data)
 
 	switch(type) {
 	case REQUEST_DUMP:
-		dlog(LOG_NOTICE, "request resync");
-		tx_queue_add_ctlmsg(NET_F_RESYNC, 0, 0);
+		resync_req();
 		break;
 	case SEND_BULK:
-		dlog(LOG_NOTICE, "sending bulk update");
-		if (CONFIG(sync).internal_cache_disable) {
-			kernel_resync();
-		} else {
-			cache_iterate(STATE(mode)->internal->ct.data,
-				      NULL, do_cache_to_tx);
-			cache_iterate(STATE(mode)->internal->exp.data,
-				      NULL, do_cache_to_tx);
-		}
+		resync_send(do_cache_to_tx);
 		break;
 	default:
 		ret = 0;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-20 17:28 [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Arturo Borrero Gonzalez
  2017-04-20 17:28 ` [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors Arturo Borrero Gonzalez
  2017-04-20 17:28 ` [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations Arturo Borrero Gonzalez
@ 2017-04-20 17:28 ` Arturo Borrero Gonzalez
  2017-04-25 11:37   ` Pablo Neira Ayuso
  2017-05-08 17:52 ` [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Pablo Neira Ayuso
  3 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-20 17:28 UTC (permalink / raw)
  To: netfilter-devel

In some environments where both nodes of a cluster share all the conntracks,
after an initial or manual resync, the conntrack information diverges from
node to node.

I have observed that this is not due to syncronization problems, given the
link between the nodes is very stable and stats show no issues.
So, this could be due to every node of the cluster seing slighly different
traffic and flow updates, perhaps different tiemouts being applied to
the conntracks in every node.
A manual resync (using conntrackd -n) resolves these issues inmediately.

This new configuration option tells conntrackd to request a resync
with the other node, similar to what could happen manually using
the 'conntrackd -n' command.

By now this option is only valid in NOTRACK sync mode.

Example configuration:

[...]
Sync {
        Mode NOTRACK {
                DisableInternalCache on
                DisableExternalCache on
                RequestResync 30
        }
        TCP {
                IPv4_address 127.0.0.1
                IPv4_Destination_Address 127.0.0.1
                Port 3780
                Interface eth0
                SndSocketBuffer 1249280
                RcvSocketBuffer 1249280
                Checksum on
        }
        Options {
                TCPWindowTracking Off
                ExpectationSync On
        }
}
[...]

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
---
 conntrackd.conf.5     |    9 +++++++++
 include/conntrackd.h  |    1 +
 include/resync.h      |    1 +
 src/read_config_lex.l |    1 +
 src/read_config_yy.y  |    8 +++++++-
 src/resync.c          |   21 +++++++++++++++++++++
 src/run.c             |    3 +++
 7 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/conntrackd.conf.5 b/conntrackd.conf.5
index 4a4f2e2..6ac0fb6 100644
--- a/conntrackd.conf.5
+++ b/conntrackd.conf.5
@@ -195,6 +195,15 @@ messages are directly sent through the dedicated link.
 This option is set off by default.
 
 .TP
+.BI "RequestResync <seconds>"
+Request the other node a complete resync. This should help resolve
+synchronization more easily if they happen in your environment.
+
+Example: RequestResync 60
+
+This option is set off by default.
+
+.TP
 .BI "DisableExternalCache <on|off>"
 Same as in \fBFTFW\fP mode.
 
diff --git a/include/conntrackd.h b/include/conntrackd.h
index 27e43db..4cfb373 100644
--- a/include/conntrackd.h
+++ b/include/conntrackd.h
@@ -111,6 +111,7 @@ struct ct_conf {
 	int event_iterations_limit;
 	int systemd;
 	int running_mode;
+	int request_resync;
 	struct {
 		int error_queue_length;
 	} channelc;
diff --git a/include/resync.h b/include/resync.h
index 5986600..75cd7dd 100644
--- a/include/resync.h
+++ b/include/resync.h
@@ -3,5 +3,6 @@
 
 void resync_req(void);
 void resync_send(int (*do_cache_to_tx)(void *data1, void *data2));
+void resync_run_init(void);
 
 #endif /*_RESYNC_H_ */
diff --git a/src/read_config_lex.l b/src/read_config_lex.l
index a378269..664b818 100644
--- a/src/read_config_lex.l
+++ b/src/read_config_lex.l
@@ -136,6 +136,7 @@ notrack		[N|n][O|o][T|t][R|r][A|a][C|c][K|k]
 "ExpectMax"			{ return T_HELPER_EXPECT_MAX; }
 "ExpectTimeout"			{ return T_HELPER_EXPECT_TIMEOUT; }
 "Systemd"			{ return T_SYSTEMD; }
+"RequestResync"			{ return T_REQUEST_RESYNC; }
 
 {is_on}			{ return T_ON; }
 {is_off}		{ return T_OFF; }
diff --git a/src/read_config_yy.y b/src/read_config_yy.y
index 2c08d4e..0509bd3 100644
--- a/src/read_config_yy.y
+++ b/src/read_config_yy.y
@@ -81,7 +81,7 @@ enum {
 %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC
 %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_QUEUE_LEN T_HELPER_POLICY
 %token T_HELPER_EXPECT_TIMEOUT T_HELPER_EXPECT_MAX
-%token T_SYSTEMD
+%token T_SYSTEMD T_REQUEST_RESYNC
 
 %token <string> T_IP T_PATH_VAL
 %token <val> T_NUMBER
@@ -777,6 +777,7 @@ sync_mode_notrack_line: timeout
 		      | purge
 		      | disable_internal_cache
 		      | disable_external_cache
+		      | request_resync
 		      ;
 
 disable_internal_cache: T_DISABLE_INTERNAL_CACHE T_ON
@@ -804,6 +805,11 @@ resend_queue_size: T_RESEND_QUEUE_SIZE T_NUMBER
 	conf.resend_queue_size = $2;
 };
 
+request_resync: T_REQUEST_RESYNC T_NUMBER
+{
+	conf.request_resync = $2;
+};
+
 window_size: T_WINDOWSIZE T_NUMBER
 {
 	conf.window_size = $2;
diff --git a/src/resync.c b/src/resync.c
index dbb2b6f..4310d6b 100644
--- a/src/resync.c
+++ b/src/resync.c
@@ -23,6 +23,9 @@
 #include "queue_tx.h"
 #include "resync.h"
 #include "cache.h"
+#include "alarm.h"
+
+static struct alarm_block	resync_run_alarm;
 
 void resync_req(void)
 {
@@ -38,3 +41,21 @@ void resync_send(int (*do_cache_to_tx)(void *data1, void *data2))
 	cache_iterate(STATE(mode)->internal->exp.data,
 		      NULL, do_cache_to_tx);
 }
+
+static void resync_run(struct alarm_block *a, void *data)
+{
+	resync_req();
+	add_alarm(&resync_run_alarm, CONFIG(request_resync), 0);
+}
+
+void resync_run_init(void)
+{
+	if (CONFIG(request_resync) == 0)
+		return;
+
+	dlog(LOG_NOTICE, "setting up atomatic resync requests every %d "
+	     "seconds", CONFIG(request_resync));
+
+	init_alarm(&resync_run_alarm, NULL,  resync_run);
+	add_alarm(&resync_run_alarm, CONFIG(request_resync), 0);
+}
diff --git a/src/run.c b/src/run.c
index 1fe6cba..4ff2186 100644
--- a/src/run.c
+++ b/src/run.c
@@ -31,6 +31,7 @@
 #include "date.h"
 #include "internal.h"
 #include "systemd.h"
+#include "resync.h"
 
 #include <errno.h>
 #include <signal.h>
@@ -284,6 +285,8 @@ init(void)
 #endif
 	time(&STATE(stats).daemon_start_time);
 
+	resync_run_init();
+
 	dlog(LOG_NOTICE, "initialization completed");
 
 	return 0;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-04-20 17:28 ` [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors Arturo Borrero Gonzalez
@ 2017-04-25 11:34   ` Pablo Neira Ayuso
  2017-04-25 12:40     ` Arturo Borrero Gonzalez
  0 siblings, 1 reply; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-04-25 11:34 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: netfilter-devel

On Thu, Apr 20, 2017 at 07:28:06PM +0200, Arturo Borrero Gonzalez wrote:
> These warnings, if they happen, should help users.
> 
> Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
> ---
>  src/channel.c  |    6 +++++-
>  src/queue_tx.c |   11 +++++++++--
>  2 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/src/channel.c b/src/channel.c
> index acbfa7d..b2f114d 100644
> --- a/src/channel.c
> +++ b/src/channel.c
> @@ -19,6 +19,7 @@
>  #include "channel.h"
>  #include "network.h"
>  #include "queue.h"
> +#include "log.h"
>  
>  static struct channel_ops *ops[CHANNEL_MAX];
>  extern struct channel_ops channel_mcast;
> @@ -161,8 +162,11 @@ static void channel_enqueue_errors(struct channel *c)
>  	struct channel_error *error;
>  
>  	qobj = queue_object_new(Q_ELEM_ERR, sizeof(struct channel_error));
> -	if (qobj == NULL)
> +	if (qobj == NULL) {
> +		dlog(LOG_WARNING, "could not enqueue channel errors, failed to"
> +		     " allocate memory");

Did you ever hit this?

Moreover, we have stats that can be dumped via option. Better use them there?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-20 17:28 ` [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option Arturo Borrero Gonzalez
@ 2017-04-25 11:37   ` Pablo Neira Ayuso
  2017-04-25 12:46     ` Arturo Borrero Gonzalez
  0 siblings, 1 reply; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-04-25 11:37 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: netfilter-devel

On Thu, Apr 20, 2017 at 07:28:16PM +0200, Arturo Borrero Gonzalez wrote:
> In some environments where both nodes of a cluster share all the conntracks,
> after an initial or manual resync, the conntrack information diverges from
> node to node.
> 
> I have observed that this is not due to syncronization problems, given the
> link between the nodes is very stable and stats show no issues.
> So, this could be due to every node of the cluster seing slighly different
> traffic and flow updates, perhaps different tiemouts being applied to
> the conntracks in every node.
> A manual resync (using conntrackd -n) resolves these issues inmediately.
> 
> This new configuration option tells conntrackd to request a resync
> with the other node, similar to what could happen manually using
> the 'conntrackd -n' command.
> 
> By now this option is only valid in NOTRACK sync mode.
> 
> Example configuration:
> 
> [...]
> Sync {
>         Mode NOTRACK {
>                 DisableInternalCache on
>                 DisableExternalCache on
>                 RequestResync 30

This looks very similar to the timer based approach that it is already
there. Did you give it a try?

This approach doesn't solve nicely the case where you have an entry
with a large timeout that got out of sync.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-04-25 11:34   ` Pablo Neira Ayuso
@ 2017-04-25 12:40     ` Arturo Borrero Gonzalez
  2017-04-25 13:16       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-25 12:40 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailing list

On 25 April 2017 at 13:34, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Thu, Apr 20, 2017 at 07:28:06PM +0200, Arturo Borrero Gonzalez wrote:
>> These warnings, if they happen, should help users.
>>
>> Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
>> ---
>>  src/channel.c  |    6 +++++-
>>  src/queue_tx.c |   11 +++++++++--
>>  2 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/channel.c b/src/channel.c
>> index acbfa7d..b2f114d 100644
>> --- a/src/channel.c
>> +++ b/src/channel.c
>> @@ -19,6 +19,7 @@
>>  #include "channel.h"
>>  #include "network.h"
>>  #include "queue.h"
>> +#include "log.h"
>>
>>  static struct channel_ops *ops[CHANNEL_MAX];
>>  extern struct channel_ops channel_mcast;
>> @@ -161,8 +162,11 @@ static void channel_enqueue_errors(struct channel *c)
>>       struct channel_error *error;
>>
>>       qobj = queue_object_new(Q_ELEM_ERR, sizeof(struct channel_error));
>> -     if (qobj == NULL)
>> +     if (qobj == NULL) {
>> +             dlog(LOG_WARNING, "could not enqueue channel errors, failed to"
>> +                  " allocate memory");
>
> Did you ever hit this?
>

I don't know, no way to know in a production system since this happen silently.

Since conntrackd can be of critical importance in some environments I
guess it doesn't harm
to be more verbose. This concrete memory allocation failure isn't
interesting per se, but it could be related
to other more serious issues on the system.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-25 11:37   ` Pablo Neira Ayuso
@ 2017-04-25 12:46     ` Arturo Borrero Gonzalez
  2017-04-25 13:18       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-25 12:46 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailing list

On 25 April 2017 at 13:37, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Thu, Apr 20, 2017 at 07:28:16PM +0200, Arturo Borrero Gonzalez wrote:
>> In some environments where both nodes of a cluster share all the conntracks,
>> after an initial or manual resync, the conntrack information diverges from
>> node to node.
>>
>> I have observed that this is not due to syncronization problems, given the
>> link between the nodes is very stable and stats show no issues.
>> So, this could be due to every node of the cluster seing slighly different
>> traffic and flow updates, perhaps different tiemouts being applied to
>> the conntracks in every node.
>> A manual resync (using conntrackd -n) resolves these issues inmediately.
>>
>> This new configuration option tells conntrackd to request a resync
>> with the other node, similar to what could happen manually using
>> the 'conntrackd -n' command.
>>
>> By now this option is only valid in NOTRACK sync mode.
>>
>> Example configuration:
>>
>> [...]
>> Sync {
>>         Mode NOTRACK {
>>                 DisableInternalCache on
>>                 DisableExternalCache on
>>                 RequestResync 30
>
> This looks very similar to the timer based approach that it is already
> there. Did you give it a try?
>

Yes. The timer based approach is... timer based (async).

It doesn't fit in an environment where you need to sync events as soon
as they happen.

> This approach doesn't solve nicely the case where you have an entry
> with a large timeout that got out of sync.

My idea is to be able to automatically force-sync nodes every 2 o 3
minutes (in my case).
Users may choose a different time of course. What do you have in mind
for your case in concrete?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-04-25 12:40     ` Arturo Borrero Gonzalez
@ 2017-04-25 13:16       ` Pablo Neira Ayuso
  2017-05-02  8:34         ` Arturo Borrero Gonzalez
  0 siblings, 1 reply; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-04-25 13:16 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Tue, Apr 25, 2017 at 02:40:45PM +0200, Arturo Borrero Gonzalez wrote:
> On 25 April 2017 at 13:34, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Thu, Apr 20, 2017 at 07:28:06PM +0200, Arturo Borrero Gonzalez wrote:
> >> These warnings, if they happen, should help users.
> >>
> >> Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
> >> ---
> >>  src/channel.c  |    6 +++++-
> >>  src/queue_tx.c |   11 +++++++++--
> >>  2 files changed, 14 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/src/channel.c b/src/channel.c
> >> index acbfa7d..b2f114d 100644
> >> --- a/src/channel.c
> >> +++ b/src/channel.c
> >> @@ -19,6 +19,7 @@
> >>  #include "channel.h"
> >>  #include "network.h"
> >>  #include "queue.h"
> >> +#include "log.h"
> >>
> >>  static struct channel_ops *ops[CHANNEL_MAX];
> >>  extern struct channel_ops channel_mcast;
> >> @@ -161,8 +162,11 @@ static void channel_enqueue_errors(struct channel *c)
> >>       struct channel_error *error;
> >>
> >>       qobj = queue_object_new(Q_ELEM_ERR, sizeof(struct channel_error));
> >> -     if (qobj == NULL)
> >> +     if (qobj == NULL) {
> >> +             dlog(LOG_WARNING, "could not enqueue channel errors, failed to"
> >> +                  " allocate memory");
> >
> > Did you ever hit this?
> >
>
> I don't know, no way to know in a production system since this happen silently.

No problem. I just wanted to know if you're addressing a real issue or
you just found this spot with not log message when passing by.

> Since conntrackd can be of critical importance in some environments I
> guess it doesn't harm to be more verbose. This concrete memory
> allocation failure isn't interesting per se, but it could be related
> to other more serious issues on the system.

Yes, but this is going to full the logs if ever happen.

Better add stats:

        /* statistics */
        struct {
                uint64_t        msg_rcv_malformed;
                uint32_t        msg_rcv_bad_version;
                uint32_t        msg_rcv_bad_payload;
                uint32_t        msg_rcv_bad_header;
                uint32_t        msg_rcv_bad_type;
                uint32_t        msg_rcv_truncated;
                uint32_t        msg_rcv_bad_size;
                uint32_t        msg_snd_malformed;
                uint64_t        msg_rcv_lost;
                uint64_t        msg_rcv_before;
        } error;

A quick glance at the code to see how we're globaling deal with lack
of memory would be good. There's little we can do in that situation,
and in my experience this most likely point to a memory leak.

So better follow a less agressive way than filling the logs, OK? We
indeed have a way to report this via the existing -s options.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-25 12:46     ` Arturo Borrero Gonzalez
@ 2017-04-25 13:18       ` Pablo Neira Ayuso
  2017-04-26 11:32         ` Arturo Borrero Gonzalez
  0 siblings, 1 reply; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-04-25 13:18 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Tue, Apr 25, 2017 at 02:46:52PM +0200, Arturo Borrero Gonzalez wrote:
> On 25 April 2017 at 13:37, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Thu, Apr 20, 2017 at 07:28:16PM +0200, Arturo Borrero Gonzalez wrote:
> >> In some environments where both nodes of a cluster share all the conntracks,
> >> after an initial or manual resync, the conntrack information diverges from
> >> node to node.
> >>
> >> I have observed that this is not due to syncronization problems, given the
> >> link between the nodes is very stable and stats show no issues.
> >> So, this could be due to every node of the cluster seing slighly different
> >> traffic and flow updates, perhaps different tiemouts being applied to
> >> the conntracks in every node.
> >> A manual resync (using conntrackd -n) resolves these issues inmediately.
> >>
> >> This new configuration option tells conntrackd to request a resync
> >> with the other node, similar to what could happen manually using
> >> the 'conntrackd -n' command.
> >>
> >> By now this option is only valid in NOTRACK sync mode.
> >>
> >> Example configuration:
> >>
> >> [...]
> >> Sync {
> >>         Mode NOTRACK {
> >>                 DisableInternalCache on
> >>                 DisableExternalCache on
> >>                 RequestResync 30
> >
> > This looks very similar to the timer based approach that it is already
> > there. Did you give it a try?
> >
> 
> Yes. The timer based approach is... timer based (async).
> 
> It doesn't fit in an environment where you need to sync events as soon
> as they happen.

IIRC the timer based works like this:

1) If event occurs, sync message is send.
2) After some time, we send a message to tell the other peer the entry
   is still there.
3) If no message is received, then the entry expires.

> > This approach doesn't solve nicely the case where you have an entry
> > with a large timeout that got out of sync.
> 
> My idea is to be able to automatically force-sync nodes every 2 o 3
> minutes (in my case).

I see. Just wanted to know why the existing timer based doesn't fit
well for you.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-25 13:18       ` Pablo Neira Ayuso
@ 2017-04-26 11:32         ` Arturo Borrero Gonzalez
  2017-05-01  9:13           ` Pablo Neira Ayuso
  0 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-26 11:32 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailing list

On 25 April 2017 at 15:18, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>>
>> Yes. The timer based approach is... timer based (async).
>>
>> It doesn't fit in an environment where you need to sync events as soon
>> as they happen.
>
> IIRC the timer based works like this:
>
> 1) If event occurs, sync message is send.
> 2) After some time, we send a message to tell the other peer the entry
>    is still there.
> 3) If no message is received, then the entry expires.
>

the ALARM mode requires to commit the external cache instead of the
conns being directly injected into the kernel.

I think the new RequestResync method (or whatever other alternative)
provides a good tradeoff
between methods and increases general usefulness of conntrackd.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-04-26 11:32         ` Arturo Borrero Gonzalez
@ 2017-05-01  9:13           ` Pablo Neira Ayuso
  2017-05-02  8:18             ` Arturo Borrero Gonzalez
  0 siblings, 1 reply; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-01  9:13 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Wed, Apr 26, 2017 at 01:32:38PM +0200, Arturo Borrero Gonzalez wrote:
> On 25 April 2017 at 15:18, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >>
> >> Yes. The timer based approach is... timer based (async).
> >>
> >> It doesn't fit in an environment where you need to sync events as soon
> >> as they happen.
> >
> > IIRC the timer based works like this:
> >
> > 1) If event occurs, sync message is send.
> > 2) After some time, we send a message to tell the other peer the entry
> >    is still there.
> > 3) If no message is received, then the entry expires.
> >
> 
> the ALARM mode requires to commit the external cache instead of the
> conns being directly injected into the kernel.

You may want to disable the external cache with the alarm mode. The
alarm mode only needs the internal cache though, but that shouldn't be
much of a problem.

With the alarm mode, you will skip spikes in CPU consumption since
resync is expensive.  With a very large table, this results in some
sort of lazy busy polling.

> I think the new RequestResync method (or whatever other alternative)
> provides a good tradeoff between methods and increases general
> usefulness of conntrackd.

I'm trying to help here if I can give something better ;-)

Look, you should at least combine this new RequestResync with
CommitTimeout. Even if you don't explicitly request a commit command,
this sets the timeout for the entries that are pushed into the kernel.

So, if you set:

        RequestResync 30
        CommitTimeout 180

connections we don't get any information from for 180 seconds will
expire.

BTW, how are you measuring this improvement? Is that you get less logs
error messages that you reported before or so?

Thanks!

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-05-01  9:13           ` Pablo Neira Ayuso
@ 2017-05-02  8:18             ` Arturo Borrero Gonzalez
  2017-05-08 17:47               ` Pablo Neira Ayuso
  0 siblings, 1 reply; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-05-02  8:18 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailing list

On 1 May 2017 at 11:13, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>>
>> the ALARM mode requires to commit the external cache instead of the
>> conns being directly injected into the kernel.
>
> You may want to disable the external cache with the alarm mode. The
> alarm mode only needs the internal cache though, but that shouldn't be
> much of a problem.
>
> With the alarm mode, you will skip spikes in CPU consumption since
> resync is expensive.  With a very large table, this results in some
> sort of lazy busy polling.
>

I do the equivalent of this RequestResync by hand (i.e. using conntrackd -n) and
it seems to work fine, see below.

>> I think the new RequestResync method (or whatever other alternative)
>> provides a good tradeoff between methods and increases general
>> usefulness of conntrackd.
>
> I'm trying to help here if I can give something better ;-)
>
> Look, you should at least combine this new RequestResync with
> CommitTimeout. Even if you don't explicitly request a commit command,
> this sets the timeout for the entries that are pushed into the kernel.
>
> So, if you set:
>
>         RequestResync 30
>         CommitTimeout 180
>
> connections we don't get any information from for 180 seconds will
> expire.
>

It seems that CommitTimeout can't be combined with
DisableExternalCache, see the evaluate() function.

However a patch to enable this seems easy. I guess we could extend a
bit external_inject_ct_new() to allow reading the commit_timeout
instead of using 0 (similar to what cache_ct_commit_step() does,
right?)

I can add a new previous patch to the series to enable this.

> BTW, how are you measuring this improvement? Is that you get less logs
> error messages that you reported before or so?
>

What I detect is that after the initial startup/sync, the amount of
conntracks in each node diverges.
After 10 minutes, the conntracks in each node are quite different, i.e:

aborrero@node1:~ $ sudo conntrack -C
7885

aborrero@node2:~ $ sudo conntrack -C
17813

A manual 'conntrackd -n' seems to solve the problem:

aborrero@node1:~ $ sudo conntrackd -n ; sudo conntrack -C
18583

aborrero@node2:~ $ sudo conntrackd -n ; sudo conntrack -C
18473

I can understand that each node sees different traffic (is a
multi-master symmetric configuration) but still,
according to my conntrackd setup, I understand that the numbers
shouldn't show that big divergence.

Then, in this scenario, if node2 failover to node1, there are 10k
entries missing in node1, connections that will be presumably lost and
dropped by the stateful configuration of nftables.

I currently solve this by means of scripts and cron calls which is a
bit ugly, given how easy could be for conntrackd to resync by himself.

You may ask, what kind of traffic does each node see? In my current
setup, node1 sees all the IPv4 traffic and node2 sees all the IPv6
traffic (or reverse). In case of failover, a sigle node can see all
the traffic.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-04-25 13:16       ` Pablo Neira Ayuso
@ 2017-05-02  8:34         ` Arturo Borrero Gonzalez
  2017-05-02 10:03           ` Pablo Neira Ayuso
  2017-05-02 10:09           ` Pablo Neira Ayuso
  0 siblings, 2 replies; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-05-02  8:34 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Netfilter Development Mailing list

On 25 April 2017 at 15:16, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>
> Yes, but this is going to full the logs if ever happen.
>
> Better add stats:
>
>         /* statistics */
>         struct {
>                 uint64_t        msg_rcv_malformed;
>                 uint32_t        msg_rcv_bad_version;
>                 uint32_t        msg_rcv_bad_payload;
>                 uint32_t        msg_rcv_bad_header;
>                 uint32_t        msg_rcv_bad_type;
>                 uint32_t        msg_rcv_truncated;
>                 uint32_t        msg_rcv_bad_size;
>                 uint32_t        msg_snd_malformed;
>                 uint64_t        msg_rcv_lost;
>                 uint64_t        msg_rcv_before;
>         } error;
>
> A quick glance at the code to see how we're globaling deal with lack
> of memory would be good. There's little we can do in that situation,
> and in my experience this most likely point to a memory leak.
>
> So better follow a less agressive way than filling the logs, OK? We
> indeed have a way to report this via the existing -s options.

Ok, then I can drop this patch from the series and add later a couple
more of stats.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-05-02  8:34         ` Arturo Borrero Gonzalez
@ 2017-05-02 10:03           ` Pablo Neira Ayuso
  2017-05-02 10:09           ` Pablo Neira Ayuso
  1 sibling, 0 replies; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-02 10:03 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Tue, May 02, 2017 at 10:34:12AM +0200, Arturo Borrero Gonzalez wrote:
> On 25 April 2017 at 15:16, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >
> > Yes, but this is going to full the logs if ever happen.
> >
> > Better add stats:
> >
> >         /* statistics */
> >         struct {
> >                 uint64_t        msg_rcv_malformed;
> >                 uint32_t        msg_rcv_bad_version;
> >                 uint32_t        msg_rcv_bad_payload;
> >                 uint32_t        msg_rcv_bad_header;
> >                 uint32_t        msg_rcv_bad_type;
> >                 uint32_t        msg_rcv_truncated;
> >                 uint32_t        msg_rcv_bad_size;
> >                 uint32_t        msg_snd_malformed;
> >                 uint64_t        msg_rcv_lost;
> >                 uint64_t        msg_rcv_before;
> >         } error;
> >
> > A quick glance at the code to see how we're globaling deal with lack
> > of memory would be good. There's little we can do in that situation,
> > and in my experience this most likely point to a memory leak.
> >
> > So better follow a less agressive way than filling the logs, OK? We
> > indeed have a way to report this via the existing -s options.
> 
> Ok, then I can drop this patch from the series and add later a couple
> more of stats.

Great. Thanks Arturo.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors
  2017-05-02  8:34         ` Arturo Borrero Gonzalez
  2017-05-02 10:03           ` Pablo Neira Ayuso
@ 2017-05-02 10:09           ` Pablo Neira Ayuso
  1 sibling, 0 replies; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-02 10:09 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Tue, May 02, 2017 at 10:34:12AM +0200, Arturo Borrero Gonzalez wrote:
> On 25 April 2017 at 15:16, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >
> > Yes, but this is going to full the logs if ever happen.
> >
> > Better add stats:
> >
> >         /* statistics */
> >         struct {
> >                 uint64_t        msg_rcv_malformed;
> >                 uint32_t        msg_rcv_bad_version;
> >                 uint32_t        msg_rcv_bad_payload;
> >                 uint32_t        msg_rcv_bad_header;
> >                 uint32_t        msg_rcv_bad_type;
> >                 uint32_t        msg_rcv_truncated;
> >                 uint32_t        msg_rcv_bad_size;
> >                 uint32_t        msg_snd_malformed;
> >                 uint64_t        msg_rcv_lost;
> >                 uint64_t        msg_rcv_before;
> >         } error;
> >
> > A quick glance at the code to see how we're globaling deal with lack
> > of memory would be good. There's little we can do in that situation,
> > and in my experience this most likely point to a memory leak.
> >
> > So better follow a less agressive way than filling the logs, OK? We
> > indeed have a way to report this via the existing -s options.
> 
> Ok, then I can drop this patch from the series and add later a couple
> more of stats.

Please do. Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option
  2017-05-02  8:18             ` Arturo Borrero Gonzalez
@ 2017-05-08 17:47               ` Pablo Neira Ayuso
  0 siblings, 0 replies; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-08 17:47 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: Netfilter Development Mailing list

On Tue, May 02, 2017 at 10:18:55AM +0200, Arturo Borrero Gonzalez wrote:
> On 1 May 2017 at 11:13, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >>
> >> the ALARM mode requires to commit the external cache instead of the
> >> conns being directly injected into the kernel.
> >
> > You may want to disable the external cache with the alarm mode. The
> > alarm mode only needs the internal cache though, but that shouldn't be
> > much of a problem.
> >
> > With the alarm mode, you will skip spikes in CPU consumption since
> > resync is expensive.  With a very large table, this results in some
> > sort of lazy busy polling.
> >
> 
> I do the equivalent of this RequestResync by hand (i.e. using conntrackd -n) and
> it seems to work fine, see below.

OK.

> >> I think the new RequestResync method (or whatever other alternative)
> >> provides a good tradeoff between methods and increases general
> >> usefulness of conntrackd.
> >
> > I'm trying to help here if I can give something better ;-)
> >
> > Look, you should at least combine this new RequestResync with
> > CommitTimeout. Even if you don't explicitly request a commit command,
> > this sets the timeout for the entries that are pushed into the kernel.
> >
> > So, if you set:
> >
> >         RequestResync 30
> >         CommitTimeout 180
> >
> > connections we don't get any information from for 180 seconds will
> > expire.
> >
> 
> It seems that CommitTimeout can't be combined with
> DisableExternalCache, see the evaluate() function.
>
> However a patch to enable this seems easy. I guess we could extend a
> bit external_inject_ct_new() to allow reading the commit_timeout
> instead of using 0 (similar to what cache_ct_commit_step() does,
> right?)
> 
> I can add a new previous patch to the series to enable this.
> 
> > BTW, how are you measuring this improvement? Is that you get less logs
> > error messages that you reported before or so?
> >
> 
> What I detect is that after the initial startup/sync, the amount of
> conntracks in each node diverges.
> After 10 minutes, the conntracks in each node are quite different, i.e:
> 
> aborrero@node1:~ $ sudo conntrack -C
> 7885
> 
> aborrero@node2:~ $ sudo conntrack -C
> 17813
> 
> A manual 'conntrackd -n' seems to solve the problem:
> 
> aborrero@node1:~ $ sudo conntrackd -n ; sudo conntrack -C
> 18583
> 
> aborrero@node2:~ $ sudo conntrackd -n ; sudo conntrack -C
> 18473
> 
> I can understand that each node sees different traffic (is a
> multi-master symmetric configuration) but still,
> according to my conntrackd setup, I understand that the numbers
> shouldn't show that big divergence.
>
> Then, in this scenario, if node2 failover to node1, there are 10k
> entries missing in node1, connections that will be presumably lost and
> dropped by the stateful configuration of nftables.
> 
> I currently solve this by means of scripts and cron calls which is a
> bit ugly, given how easy could be for conntrackd to resync by himself.
> 
> You may ask, what kind of traffic does each node see? In my current
> setup, node1 sees all the IPv4 traffic and node2 sees all the IPv6
> traffic (or reverse). In case of failover, a sigle node can see all
> the traffic.

OK, so there is no assymmetric path at all as node1 sees IPv4 traffic
coming both in original and reply direction.

This is strange, there is probably a more fundamental bug here, I
would like that we're not papering this with a new option.

I'm going to reproduce this in my testbed and get back to you.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions
  2017-04-20 17:28 [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Arturo Borrero Gonzalez
                   ` (2 preceding siblings ...)
  2017-04-20 17:28 ` [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option Arturo Borrero Gonzalez
@ 2017-05-08 17:52 ` Pablo Neira Ayuso
  3 siblings, 0 replies; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-08 17:52 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: netfilter-devel

On Thu, Apr 20, 2017 at 07:28:00PM +0200, Arturo Borrero Gonzalez wrote:
> They are shared by both sync-ftfw and sync-notrack.

Applied this 1/4 patch, thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations
  2017-04-20 17:28 ` [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations Arturo Borrero Gonzalez
@ 2017-05-08 17:52   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 20+ messages in thread
From: Pablo Neira Ayuso @ 2017-05-08 17:52 UTC (permalink / raw)
  To: Arturo Borrero Gonzalez; +Cc: netfilter-devel

On Thu, Apr 20, 2017 at 07:28:11PM +0200, Arturo Borrero Gonzalez wrote:
> Resync operations factorization. There are two:
>  * resync_send	--> conntrackd -B (send bulk resync)
>  * resync_req	--> conntrackd -n (request resync)

Applied this 3/4 patch too.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions
@ 2017-04-20 16:40 Arturo Borrero Gonzalez
  0 siblings, 0 replies; 20+ messages in thread
From: Arturo Borrero Gonzalez @ 2017-04-20 16:40 UTC (permalink / raw)
  To: netfilter-devel

They are shared by both sync-ftfw and sync-notrack.

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
---
 include/Makefile.am |    2 +-
 include/queue_tx.h  |    7 ++++++
 src/Makefile.am     |    2 +-
 src/queue_tx.c      |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/sync-ftfw.c     |   37 +------------------------------
 src/sync-notrack.c  |   37 +------------------------------
 6 files changed, 71 insertions(+), 74 deletions(-)
 create mode 100644 include/queue_tx.h
 create mode 100644 src/queue_tx.c

diff --git a/include/Makefile.am b/include/Makefile.am
index e81463a..84fd608 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
 		 network.h filter.h queue.h vector.h cidr.h \
 		 traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
 		 process.h origin.h internal.h external.h date.h nfct.h \
-		 helper.h myct.h stack.h systemd.h
+		 helper.h myct.h stack.h systemd.h queue_tx.h
 
diff --git a/include/queue_tx.h b/include/queue_tx.h
new file mode 100644
index 0000000..e29b1f0
--- /dev/null
+++ b/include/queue_tx.h
@@ -0,0 +1,7 @@
+#ifndef _QUEUE_TX_H_
+#define _QUEUE_TX_H_
+
+void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to);
+void tx_queue_add_ctlmsg2(uint32_t flags);
+
+#endif /* _QUEUE_TX_H_ */
diff --git a/src/Makefile.am b/src/Makefile.am
index 144c52c..39c7315 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -37,7 +37,7 @@ endif
 
 nfct_LDFLAGS = -export-dynamic @LAZY_LDFLAGS@
 
-conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
+conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c queue_tx.c rbtree.c \
 		    local.c log.c mcast.c udp.c netlink.c vector.c \
 		    filter.c fds.c event.c process.c origin.c date.c \
 		    cache.c cache-ct.c cache-exp.c \
diff --git a/src/queue_tx.c b/src/queue_tx.c
new file mode 100644
index 0000000..0c99163
--- /dev/null
+++ b/src/queue_tx.c
@@ -0,0 +1,60 @@
+/*
+ * (C) 2006-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdint.h>
+#include "queue_tx.h"
+#include "queue.h"
+#include "conntrackd.h"
+#include "network.h"
+
+void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
+{
+	struct queue_object *qobj;
+	struct nethdr_ack *ack;
+
+	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
+	if (qobj == NULL)
+		return;
+
+	ack		= (struct nethdr_ack *)qobj->data;
+	ack->type 	= NET_T_CTL;
+	ack->flags	= flags;
+	ack->from	= from;
+	ack->to		= to;
+
+	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
+		queue_object_free(qobj);
+}
+
+void tx_queue_add_ctlmsg2(uint32_t flags)
+{
+	struct queue_object *qobj;
+	struct nethdr *ctl;
+
+	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
+	if (qobj == NULL)
+		return;
+
+	ctl		= (struct nethdr *)qobj->data;
+	ctl->type 	= NET_T_CTL;
+	ctl->flags	= flags;
+
+	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
+		queue_object_free(qobj);
+}
diff --git a/src/sync-ftfw.c b/src/sync-ftfw.c
index aa6838a..ce5270b 100644
--- a/src/sync-ftfw.c
+++ b/src/sync-ftfw.c
@@ -20,6 +20,7 @@
 #include "conntrackd.h"
 #include "sync.h"
 #include "queue.h"
+#include "queue_tx.h"
 #include "network.h"
 #include "alarm.h"
 #include "log.h"
@@ -95,42 +96,6 @@ static void nethdr_set_hello(struct nethdr *net)
 	}
 }
 
-static void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
-{
-	struct queue_object *qobj;
-	struct nethdr_ack *ack;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ack		= (struct nethdr_ack *)qobj->data;
-	ack->type 	= NET_T_CTL;
-	ack->flags	= flags;
-	ack->from	= from;
-	ack->to		= to;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
-static void tx_queue_add_ctlmsg2(uint32_t flags)
-{
-	struct queue_object *qobj;
-	struct nethdr *ctl;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ctl		= (struct nethdr *)qobj->data;
-	ctl->type 	= NET_T_CTL;
-	ctl->flags	= flags;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 /* this function is called from the alarm framework */
 static void do_alive_alarm(struct alarm_block *a, void *data)
 {
diff --git a/src/sync-notrack.c b/src/sync-notrack.c
index 7ade3a7..5b6814d 100644
--- a/src/sync-notrack.c
+++ b/src/sync-notrack.c
@@ -20,6 +20,7 @@
 #include "conntrackd.h"
 #include "sync.h"
 #include "queue.h"
+#include "queue_tx.h"
 #include "network.h"
 #include "log.h"
 #include "cache.h"
@@ -56,25 +57,6 @@ static struct cache_extra cache_notrack_extra = {
 	.destroy	= cache_notrack_del
 };
 
-static void tx_queue_add_ctlmsg(uint32_t flags, uint32_t from, uint32_t to)
-{
-	struct queue_object *qobj;
-	struct nethdr_ack *ack;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ack		= (struct nethdr_ack *)qobj->data;
-        ack->type	= NET_T_CTL;
-	ack->flags	= flags;
-	ack->from	= from;
-	ack->to		= to;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 static int do_cache_to_tx(void *data1, void *data2)
 {
 	struct cache_object *obj = data2;
@@ -228,23 +210,6 @@ static void notrack_enqueue(struct cache_object *obj, int query)
 		cache_object_get(obj);
 }
 
-static void tx_queue_add_ctlmsg2(uint32_t flags)
-{
-	struct queue_object *qobj;
-	struct nethdr *ctl;
-
-	qobj = queue_object_new(Q_ELEM_CTL, sizeof(struct nethdr_ack));
-	if (qobj == NULL)
-		return;
-
-	ctl		= (struct nethdr *)qobj->data;
-	ctl->type	= NET_T_CTL;
-	ctl->flags	= flags;
-
-	if (queue_add(STATE_SYNC(tx_queue), &qobj->qnode) < 0)
-		queue_object_free(qobj);
-}
-
 static void do_alive_alarm(struct alarm_block *a, void *data)
 {
 	tx_queue_add_ctlmsg2(NET_F_ALIVE);


^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-05-08 17:53 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-20 17:28 [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Arturo Borrero Gonzalez
2017-04-20 17:28 ` [conntrack-tools PATCH 2/4] conntrackd: warn users about queue allocation errors Arturo Borrero Gonzalez
2017-04-25 11:34   ` Pablo Neira Ayuso
2017-04-25 12:40     ` Arturo Borrero Gonzalez
2017-04-25 13:16       ` Pablo Neira Ayuso
2017-05-02  8:34         ` Arturo Borrero Gonzalez
2017-05-02 10:03           ` Pablo Neira Ayuso
2017-05-02 10:09           ` Pablo Neira Ayuso
2017-04-20 17:28 ` [conntrack-tools PATCH 3/4] conntrackd: factorize resync operations Arturo Borrero Gonzalez
2017-05-08 17:52   ` Pablo Neira Ayuso
2017-04-20 17:28 ` [conntrack-tools PATCH 4/4] conntrackd: introduce RequestResync option Arturo Borrero Gonzalez
2017-04-25 11:37   ` Pablo Neira Ayuso
2017-04-25 12:46     ` Arturo Borrero Gonzalez
2017-04-25 13:18       ` Pablo Neira Ayuso
2017-04-26 11:32         ` Arturo Borrero Gonzalez
2017-05-01  9:13           ` Pablo Neira Ayuso
2017-05-02  8:18             ` Arturo Borrero Gonzalez
2017-05-08 17:47               ` Pablo Neira Ayuso
2017-05-08 17:52 ` [conntrack-tools PATCH 1/4] conntrackd: factorice tx_queue functions Pablo Neira Ayuso
  -- strict thread matches above, loose matches on Subject: below --
2017-04-20 16:40 Arturo Borrero Gonzalez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.