All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] netfilter: arptables: add mcmangle target
@ 2009-01-28 14:58 Pablo Neira Ayuso
  2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-01-28 14:58 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the mcmangle target for arptables which allows
altering the source hardware address in ARP with a multicast
hardware address. This target can be used to make a switch flood
packets to the ports that use the same MAC multicast address. This
is useful to deploy load-sharing clusters in environments in which
the switch does not provide a way to flood packets to several
ports.

Since all the nodes receives the same packets, each decides if
it handles the packet based on hashing approach (See the `cluster'
iptables match that comes with this patchset).

Theoretically, the use of the reserved VRRP hardware address should
be fine for this, however, switches generally treat this hardware
address space as normal unicast hardware address. Thus, in practise,
it is not possible to have two nodes with the same VRRP hardware
address.

Please, note that this target violates RFC 1812 (section 3.3.2) since
an ethernet device must not use a multicast link address.

An example of the use of this target:

arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
arptables -I INPUT -i eth0 --h-length 6 --destination-mac \
	01:00:5e:00:01:01 -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

Where 00:zz:yy:xx:5a:27 is the original hardware address of this
node. Note that the mcmangle target registers an entry in the multicast
list that is required to get this working:

$ cat /proc/net/dev_mcast | grep eth0 | head -1
2    eth0            1     0     01005e000101

You need the PKTTYPE iptables target (included in this patchset) to
set skb->type to PACKET_HOST. Otherwise, you would be only able to
ICMP ping nodes in the network ;).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter_arp/arpt_mcmangle.h |   14 ++++
 net/ipv4/netfilter/Kconfig                  |   12 +++
 net/ipv4/netfilter/Makefile                 |    1 
 net/ipv4/netfilter/arpt_mcmangle.c          |   98 +++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter_arp/arpt_mcmangle.h
 create mode 100644 net/ipv4/netfilter/arpt_mcmangle.c

diff --git a/include/linux/netfilter_arp/arpt_mcmangle.h b/include/linux/netfilter_arp/arpt_mcmangle.h
new file mode 100644
index 0000000..d14a1ab
--- /dev/null
+++ b/include/linux/netfilter_arp/arpt_mcmangle.h
@@ -0,0 +1,14 @@
+#ifndef _ARPT_MCMANGLE_H
+#define _ARPT_MCMANGLE_H
+#include <linux/netfilter_arp/arp_tables.h>
+
+struct net_device;
+
+struct arpt_mcmangle
+{
+	char ifname[IFNAMSIZ];
+	char mc_devaddr[ETH_ALEN];
+	struct net_device  __attribute__((aligned(8))) *dev;
+};
+
+#endif /* _ARPT_MANGLE_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1d..50f38b2 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -392,6 +392,18 @@ config IP_NF_ARP_MANGLE
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
+config IP_NF_ARP_MCMANGLE
+	tristate "ARP multicast address mangling"
+	help
+	  Allows altering the source unicast hardware address in ARP messages
+	  with a multicast hardware address. This target is useful to make a
+	  switch flood to all ports whose ethernet device have a multicast
+	  hardware address. Please, see that this target violates RFC 1812
+	  (section 3.3.2) since an ethernet device must not use a multicast
+	  link address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # IP_NF_ARPTABLES
 
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650..5c4cc3e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
 obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+obj-$(CONFIG_IP_NF_ARP_MCMANGLE) += arpt_mcmangle.o
 
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
diff --git a/net/ipv4/netfilter/arpt_mcmangle.c b/net/ipv4/netfilter/arpt_mcmangle.c
new file mode 100644
index 0000000..c82367a
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mcmangle.c
@@ -0,0 +1,98 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter_arp/arpt_mcmangle.h>
+
+static unsigned int
+arpt_mcmangle_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct arpt_mcmangle *mangle = par->targinfo;
+	const struct arphdr *arp;
+	unsigned char *arpptr;
+	int hln;
+
+	if (!skb_make_writable(skb, skb->len))
+		return NF_DROP;
+
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
+	hln = arp->ar_hln;
+
+	/* We assume that pln and hln were checked in the match */
+	if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+	    (arpptr + hln > skb_tail_pointer(skb))) {
+	    	return NF_DROP;
+	}
+	memcpy(arpptr, mangle->mc_devaddr, hln);
+
+	return NF_ACCEPT;
+}
+
+static bool
+arpt_mcmangle_checkentry(const struct xt_tgchk_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	struct net_device *dev;
+
+	if (!(mangle->mc_devaddr[0] & 0x01)) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong multicast address\n");
+		return false;
+	}
+	dev = dev_get_by_name(&init_net, mangle->ifname);
+	if (dev == NULL) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong `%s' interface\n",
+			mangle->ifname);
+		return false;
+	}
+	mangle->dev = dev;
+	if (dev_mc_add(dev, mangle->mc_devaddr, ETH_ALEN, 0) < 0) {
+		printk(KERN_ERR "arpt_mcmangle: cannot set multicast "
+				"address\n");
+		return false;
+	}
+	return true;
+}
+
+static void
+arpt_mcmangle_destroy(const struct xt_tgdtor_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	dev_mc_delete(mangle->dev, mangle->mc_devaddr, ETH_ALEN, 0);
+	dev_put(mangle->dev);
+}
+
+static struct xt_target arpt_mcmangle_reg __read_mostly = {
+	.name		= "mcmangle",
+	.family		= NFPROTO_ARP,
+	.target		= arpt_mcmangle_tg,
+	.checkentry	= arpt_mcmangle_checkentry,
+	.destroy	= arpt_mcmangle_destroy,
+	.targetsize	= sizeof(struct arpt_mcmangle),
+	.hooks		= (1 << NF_ARP_OUT),
+	.me		= THIS_MODULE,
+};
+
+static int __init arpt_mcmangle_init(void)
+{
+	return xt_register_target(&arpt_mcmangle_reg);
+}
+
+static void __exit arpt_mcmangle_fini(void)
+{
+	xt_unregister_target(&arpt_mcmangle_reg);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arptables: arp multicast mangle target");
+module_init(arpt_mcmangle_init);
+module_exit(arpt_mcmangle_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-01-28 14:58 [PATCH 1/3] netfilter: arptables: add mcmangle target Pablo Neira Ayuso
@ 2009-01-28 14:58 ` Pablo Neira Ayuso
  2009-01-28 16:11   ` Jan Engelhardt
  2009-02-09 15:13   ` Patrick McHardy
  2009-01-28 14:58 ` [PATCH 3/3] netfilter: xtables: add cluster match Pablo Neira Ayuso
  2009-02-09 15:11 ` [PATCH 1/3] netfilter: arptables: add mcmangle target Patrick McHardy
  2 siblings, 2 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-01-28 14:58 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the PKTTYPE target which can be used to mangle the
skbuff packet type field. This target is useful in conjunction with
the arptables mcmangle target to TCP working again when a
multicast hardware address is used. An example of its use:

iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
	-j PKTTYPE --to-pkt-type unicast

Given the following arptables rule-set:

arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6
	\ --mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
arptables -I INPUT --h-length 6 --destination-mac 01:00:5e:00:01:01
	\ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

See arptables mcmangle target for further information.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter/xt_PKTTYPE.h |    8 ++++
 net/netfilter/Kconfig                |   18 ++++++++++
 net/netfilter/Makefile               |    1 +
 net/netfilter/xt_PKTTYPE.c           |   61 ++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_PKTTYPE.h
 create mode 100644 net/netfilter/xt_PKTTYPE.c

diff --git a/include/linux/netfilter/xt_PKTTYPE.h b/include/linux/netfilter/xt_PKTTYPE.h
new file mode 100644
index 0000000..cc67cbf
--- /dev/null
+++ b/include/linux/netfilter/xt_PKTTYPE.h
@@ -0,0 +1,8 @@
+#ifndef _XT_PKTTYPE_TARGET_H
+#define _XT_PKTTYPE_TARGET_H
+
+struct xt_pkttype_target_info {
+	u_int8_t pkt_type;
+};
+
+#endif /* _XT_PKTTYPE_TARGET_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 25dcef9..9ed1ccf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -489,6 +489,24 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
 	  TCP options from TCP packets.
 
+config NETFILTER_XT_TARGET_PKTTYPE
+	tristate  '"PKTTYPE" target support'
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  The PKTTYPE target allows you to change the link layer packet type.
+	  This target is useful if you have set up a multicast MAC address (via
+	  arptables) for a given interface and you want the packets to reach
+	  the layer 4 stack (which would drop packet tagged as multicast
+	  from the link layer).
+
+	  This target can be used in conjunction with arptables and the cluster
+	  match to setup cluster of stateful firewalls which are connected
+	  through a switch.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index da3d909..dd43ba9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_PKTTYPE) += xt_PKTTYPE.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
diff --git a/net/netfilter/xt_PKTTYPE.c b/net/netfilter/xt_PKTTYPE.c
new file mode 100644
index 0000000..db68dc4
--- /dev/null
+++ b/net/netfilter/xt_PKTTYPE.c
@@ -0,0 +1,61 @@
+/*
+ * (C) 2008 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/xt_PKTTYPE.h>
+
+static unsigned int
+xt_pkttype_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_pkttype_target_info *info = par->targinfo;
+
+	skb->pkt_type = info->pkt_type;
+
+	return XT_CONTINUE;
+}
+
+static struct xt_target xt_pkttype_target[] __read_mostly = {
+	{
+		.family		= AF_INET,
+		.name		= "PKTTYPE",
+		.table		= "mangle",
+		.target		= xt_pkttype_tg,
+		.targetsize	= sizeof(struct xt_pkttype_target_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.family		= AF_INET6,
+		.name		= "PKTTYPE",
+		.table		= "mangle",
+		.target		= xt_pkttype_tg,
+		.targetsize	= sizeof(struct xt_pkttype_target_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_pkttype_tg_init(void)
+{
+	return xt_register_targets(xt_pkttype_target,
+				   ARRAY_SIZE(xt_pkttype_target));
+}
+
+static void __exit xt_pkttype_tg_fini(void)
+{
+	xt_unregister_targets(xt_pkttype_target, ARRAY_SIZE(xt_pkttype_target));
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: xt_PKTTYPE target");
+MODULE_ALIAS("ipt_PKTTYPE");
+MODULE_ALIAS("ip6t_PKTTYPE");
+module_init(xt_pkttype_tg_init);
+module_exit(xt_pkttype_tg_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/3] netfilter: xtables: add cluster match
  2009-01-28 14:58 [PATCH 1/3] netfilter: arptables: add mcmangle target Pablo Neira Ayuso
  2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
@ 2009-01-28 14:58 ` Pablo Neira Ayuso
  2009-01-28 16:07   ` Jan Engelhardt
  2009-02-09 15:25   ` Patrick McHardy
  2009-02-09 15:11 ` [PATCH 1/3] netfilter: arptables: add mcmangle target Patrick McHardy
  2 siblings, 2 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-01-28 14:58 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the iptables cluster match. This match can be used
to deploy gateway and back-end load-sharing clusters. Assuming that
all the nodes see all packets (see arptables mcmangle target and
PKTTYPE iptables targets on how to do that), the cluster match
decides if this node has to handle a packet given:

	jhash(source IP) % nodeID == 0

For related connections, the master conntrack is used. An example of
its use for a gateway cluster, in one of the cluster nodes:

iptables -I PREROUTING -t mangle -i eth1 -m cluster \
	--cluster-total-nodes 2 --cluster-local-node 1 \
	--cluster-proc-name eth1 -j MARK --set-mark 0xffff
iptables -A PREROUTING -t mangle -i eth1 \
	-m mark ! --mark 0xffff -j DROP
iptables -A PREROUTING -t mangle -i eth2 -m cluster \
	--cluster-total-nodes 2 --cluster-local-node 1 \
	--cluster-proc-name eth2 -j MARK --set-mark 0xffff
iptables -A PREROUTING -t mangle -i eth2 \
	-m mark ! --mark 0xffff -j DROP

And the following rule-set to make all nodes see all the packets:

arptables -I OUTPUT -o eth1 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth1
arptables -I INPUT -i eth1 --h-length 6 \
	--destination-mac 01:00:5e:00:01:01 \
	-j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
arptables -I OUTPUT -o eth2 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:02 --mc-mangle-dev eth2
arptables -I INPUT -i eth2 --h-length 6 \
	--destination-mac 01:00:5e:00:01:02 \
	-j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
	-j PKTTYPE --to-pkt-type unicast

In the case of TCP connections, pickup facility has to be disabled
to avoid marking TCP ACK packets coming in the reply direction as
valid.

echo 0 > /proc/sys/net/netfilter/nf_conntrack_tcp_loose

The match also provides a /proc entry under:

/proc/sys/net/netfilter/cluster/$PROC_NAME

where PROC_NAME is set via --cluster-proc-name. This is useful to
include possible cluster reconfigurations via fail-over scripts.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter/xt_cluster.h |   21 ++
 net/netfilter/Kconfig                |   17 ++
 net/netfilter/Makefile               |    1 
 net/netfilter/xt_cluster.c           |  333 ++++++++++++++++++++++++++++++++++
 4 files changed, 372 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_cluster.h
 create mode 100644 net/netfilter/xt_cluster.c

diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h
new file mode 100644
index 0000000..85ab07f
--- /dev/null
+++ b/include/linux/netfilter/xt_cluster.h
@@ -0,0 +1,21 @@
+#ifndef _XT_CLUSTER_MATCH_H
+#define _XT_CLUSTER_MATCH_H
+
+struct proc_dir_entry;
+
+enum xt_cluster_flags {
+	XT_CLUSTER_F_INV = 0,
+};
+
+struct xt_cluster_match_info {
+	u_int16_t		total_nodes;
+	u_int16_t		node_id;
+	u_int32_t		hash_seed;
+	char			proc_name[16];
+	unsigned int		flags;
+
+	/* Used internally by the kernel */
+	void			*data __attribute__((aligned(8)));
+};
+
+#endif /* _XT_CLUSTER_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9ed1ccf..33d37d9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -872,6 +872,23 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
+config NETFILTER_XT_MATCH_CLUSTER
+	tristate '"cluster" match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option allows you to build work-load-sharing clusters of
+	  network servers/stateful firewalls without having a dedicated
+	  load-balancing router/server/switch. Basically, this match returns
+	  true when the packet must be handled by this cluster node. Thus,
+	  all nodes see all packets and this match decides which node handles
+	  what packets. The work-load sharing algorithm is based on source
+	  address hashing.
+
+	  If you say Y here, try `iptables -m cluster --help` for
+	  more information. See the PKTTYPE target and the mcmangle arptables
+	  target on how to make your nodes see all packets. You can also have
+	  a look at man iptables(8) for some examples on the usage.
+
 endif # NETFILTER_XTABLES
 
 endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index dd43ba9..29049e4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 
 # IPVS
 obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
new file mode 100644
index 0000000..998ba11
--- /dev/null
+++ b/net/netfilter/xt_cluster.c
@@ -0,0 +1,333 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/xt_cluster.h>
+
+struct xt_cluster_internal {
+	unsigned long		node_mask;
+	struct proc_dir_entry	*proc;
+	atomic_t		use;
+};
+
+static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct)
+{
+	return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+}
+
+static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
+{
+	return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info)
+{
+	return jhash_1word(ip, info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info)
+{
+	return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed);
+}
+
+static inline u_int32_t
+xt_cluster_hash(const struct nf_conn *ct,
+		const struct xt_cluster_match_info *info)
+{
+	u_int32_t hash = 0;
+
+	switch(nf_ct_l3num(ct)) {
+	case AF_INET:
+		hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info);
+		break;
+	case AF_INET6:
+		hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	return (((u64)hash * info->total_nodes) >> 32);
+}
+
+static bool
+xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	const struct xt_cluster_match_info *info = par->matchinfo;
+	const struct xt_cluster_internal *internal = info->data;
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned long hash;
+	bool inv = !!(info->flags & XT_CLUSTER_F_INV);
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return false;
+
+	if (ct == &nf_conntrack_untracked)
+		return false;
+
+	if (ct->master)
+		hash = xt_cluster_hash(ct->master, info);
+	else
+		hash = xt_cluster_hash(ct, info);
+
+	return test_bit(hash, &internal->node_mask) ^ inv;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *xt_cluster_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos == 0) {
+		struct xt_cluster_internal *data =
+			(struct xt_cluster_internal *)s->private;
+
+		return &data->node_mask;
+	} else {
+		*pos = 0;
+		return NULL;
+	}
+}
+
+static void *xt_cluster_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void xt_cluster_seq_stop(struct seq_file *s, void *v) {}
+
+static int xt_cluster_seq_show(struct seq_file *s, void *v)
+{
+	unsigned long *mask = (unsigned long *)v;
+	seq_printf(s, "0x%.8lx\n", *mask);
+	return 0;
+}
+
+static struct seq_operations xt_cluster_seq_ops = {
+	.start	= xt_cluster_seq_start,
+	.next	= xt_cluster_seq_next,
+	.stop	= xt_cluster_seq_stop,
+	.show	= xt_cluster_seq_show
+};
+
+#define XT_CLUSTER_PROC_WRITELEN	10
+
+static ssize_t 
+xt_cluster_write_proc(struct file *file, const char __user *input,
+		      size_t size, loff_t *ofs)
+{
+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        struct xt_cluster_internal *info = pde->data;
+	char buffer[XT_CLUSTER_PROC_WRITELEN+1];
+	unsigned int new_node_id;
+
+	if (copy_from_user(buffer, input, XT_CLUSTER_PROC_WRITELEN))
+		return -EFAULT;
+
+	switch(*buffer) {
+	case '+':
+		new_node_id = simple_strtoul(buffer+1, NULL, 10);
+		if (!new_node_id || new_node_id > sizeof(info->node_mask)*8)
+			return -EIO;
+		printk(KERN_NOTICE "cluster: adding node %u\n", new_node_id);
+		set_bit(new_node_id-1, &info->node_mask);
+		break;
+	case '-':
+		new_node_id = simple_strtoul(buffer+1, NULL, 10);
+		if (!new_node_id || new_node_id > sizeof(info->node_mask)*8)
+			return -EIO;
+		printk(KERN_NOTICE "cluster: deleting node %u\n", new_node_id);
+		clear_bit(new_node_id-1, &info->node_mask);
+		break;
+	default:
+		return -EIO;
+	}
+
+	return size;
+}
+
+static int xt_cluster_open_proc(struct inode *inode, struct file *file)
+{
+	int ret;
+	
+	ret = seq_open(file, &xt_cluster_seq_ops);
+	if (!ret) {
+		struct seq_file *seq = file->private_data;
+		const struct proc_dir_entry *pde = PDE(inode);
+		struct xt_cluster_match_info *info = pde->data;
+
+		seq->private = info;
+	}
+	return ret;
+};
+
+static struct proc_dir_entry *proc_cluster;
+static const struct file_operations xt_cluster_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= xt_cluster_open_proc,
+	.release	= seq_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.write		= xt_cluster_write_proc,
+};
+
+static bool
+xt_cluster_proc_entry_exist(struct proc_dir_entry *dir, const char *name)
+{
+	struct proc_dir_entry *tmp;
+
+	for (tmp = dir->subdir; tmp; tmp = tmp->next) {
+		if (strcmp(tmp->name, name) == 0)
+			return true;
+	}
+	return false;
+}
+
+static bool xt_cluster_proc_init(struct xt_cluster_match_info *info)
+{
+	struct xt_cluster_internal *internal = info->data;
+
+	BUG_ON(info->data == NULL);
+
+	if (xt_cluster_proc_entry_exist(proc_cluster, info->proc_name)) {
+		printk(KERN_ERR "xt_cluster: proc entry entry `%s' "
+				"already exists\n", info->proc_name);
+		return false;
+	}
+	internal->proc = proc_create_data(info->proc_name, 
+					  S_IWUSR|S_IRUSR, 
+					  proc_cluster,
+					  &xt_cluster_proc_fops, 
+					  info->data);
+	if (!internal->proc) {
+		printk(KERN_ERR "xt_cluster: cannot create proc entry `%s'\n",
+				info->proc_name);
+		return false;
+	}
+	return true;
+}
+#endif /* CONFIG_PROC_FS */
+
+static bool xt_cluster_internal_init(struct xt_cluster_match_info *info)
+{
+	struct xt_cluster_internal *data;
+
+	data = kzalloc(sizeof(struct xt_cluster_internal), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR "xt_cluster: OOM\n");
+		return false;
+	}
+	info->data = data;
+
+#ifdef CONFIG_PROC_FS
+	if (!xt_cluster_proc_init(info)) {
+		kfree(data);
+		return false;
+	}
+#endif
+	atomic_set(&data->use, 1);
+	data->node_mask = (1 << (info->node_id - 1));
+
+	return true;
+}
+
+static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_cluster_match_info *info = par->matchinfo;
+	struct xt_cluster_internal *data = info->data;
+
+	if (info->node_id > info->total_nodes) {
+		printk(KERN_ERR "xt_cluster: the id of this node cannot be "
+				"higher than the total number of nodes\n");
+		return false;
+	}
+
+	if (!info->data) {
+		if (!xt_cluster_internal_init(info))
+			return false;
+	} else
+		atomic_inc(&data->use);
+
+	return true;
+}
+
+static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_cluster_match_info *info = par->matchinfo;
+	struct xt_cluster_internal *data = info->data;
+
+	if (atomic_dec_and_test(&data->use)) {
+#ifdef CONFIG_PROC_FS
+		remove_proc_entry(info->proc_name, proc_cluster);
+#endif
+		kfree(info->data);
+	}
+}
+
+static struct xt_match xt_cluster_match[] __read_mostly = {
+	{
+		.name		= "cluster",
+		.family		= AF_INET,
+		.match		= xt_cluster_mt,
+		.checkentry	= xt_cluster_mt_checkentry,
+		.destroy	= xt_cluster_mt_destroy,
+		.matchsize	= sizeof(struct xt_cluster_match_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "cluster",
+		.family		= AF_INET6,
+		.match		= xt_cluster_mt,
+		.checkentry	= xt_cluster_mt_checkentry,
+		.destroy	= xt_cluster_mt_destroy,
+		.matchsize	= sizeof(struct xt_cluster_match_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_cluster_mt_init(void)
+{
+	int ret;
+
+#ifdef CONFIG_PROC_FS
+	proc_cluster = proc_mkdir("cluster", proc_net_netfilter);
+	if (!proc_cluster)
+		return -ENOMEM;
+#endif
+	ret = xt_register_matches(xt_cluster_match,
+				  ARRAY_SIZE(xt_cluster_match));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void __exit xt_cluster_mt_fini(void)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("cluster", proc_net_netfilter);
+#endif
+	xt_unregister_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match));
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: xt_cluster match");
+MODULE_ALIAS("ipt_cluster");
+MODULE_ALIAS("ip6t_cluster");
+module_init(xt_cluster_mt_init);
+module_exit(xt_cluster_mt_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] netfilter: xtables: add cluster match
  2009-01-28 14:58 ` [PATCH 3/3] netfilter: xtables: add cluster match Pablo Neira Ayuso
@ 2009-01-28 16:07   ` Jan Engelhardt
  2009-01-28 16:38     ` Pablo Neira Ayuso
  2009-02-09 15:25   ` Patrick McHardy
  1 sibling, 1 reply; 27+ messages in thread
From: Jan Engelhardt @ 2009-01-28 16:07 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, kaber


On Wednesday 2009-01-28 15:58, Pablo Neira Ayuso wrote:

>This patch adds the iptables cluster match. This match can be used
>to deploy gateway and back-end load-sharing clusters. Assuming that
>all the nodes see all packets
> (see arptables mcmangle target and PKTTYPE iptables targets on how
>to do that), the cluster match decides if this node has to handle a
>packet given:

Is not this what CLUSTERIP is essentially supposed to do?

>
>	jhash(source IP) % nodeID == 0

>+struct xt_cluster_match_info {
>+	u_int16_t		total_nodes;
>+	u_int16_t		node_id;
>+	u_int32_t		hash_seed;
>+	char			proc_name[16];
>+	unsigned int		flags;

Ouch, don't use unfixated types like unsigned int.

>+config NETFILTER_XT_MATCH_CLUSTER
>+	tristate '"cluster" match support'
>+	depends on NETFILTER_ADVANCED
>+	---help---
>+	  This option allows you to build work-load-sharing clusters of
>+	  network servers/stateful firewalls without having a dedicated
>+	  load-balancing router/server/switch. Basically, this match returns
>+	  true when the packet must be handled by this cluster node. Thus,
>+	  all nodes see all packets and this match decides which node handles
>+	  what packets. The work-load sharing algorithm is based on source
>+	  address hashing.
>+
>+	  If you say Y here, try `iptables -m cluster --help` for
>+	  more information. See the PKTTYPE target and the mcmangle arptables
>+	  target on how to make your nodes see all packets. You can also have
>+	  a look at man iptables(8) for some examples on the usage.
>+
> endif # NETFILTER_XTABLES

Please keep the list alphabetical.

> endmenu
>diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
>index dd43ba9..29049e4 100644
>--- a/net/netfilter/Makefile
>+++ b/net/netfilter/Makefile
>@@ -90,6 +90,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
> obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
> obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
> obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
>+obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o

Similarly.

>+struct xt_cluster_internal {
>+	unsigned long		node_mask;

In the back of my head I seem to remember a cpumask_t.

>+	struct proc_dir_entry	*proc;
>+	atomic_t		use;
>+};
>+
>+#ifdef CONFIG_PROC_FS
>+static void *xt_cluster_seq_start(struct seq_file *s, loff_t *pos)
>+{
>+	if (*pos == 0) {
>+		struct xt_cluster_internal *data =
>+			(struct xt_cluster_internal *)s->private;

Redundant cast ;)

>+static int xt_cluster_seq_show(struct seq_file *s, void *v)
>+{
>+	unsigned long *mask = (unsigned long *)v;

Same.

>+static struct seq_operations xt_cluster_seq_ops = {
static const struct ...

>+#define XT_CLUSTER_PROC_WRITELEN	10
>+
>+static ssize_t 
>+xt_cluster_write_proc(struct file *file, const char __user *input,
>+		      size_t size, loff_t *ofs)
>+{
>+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
>+        struct xt_cluster_internal *info = pde->data;

  ^^

>+	char buffer[XT_CLUSTER_PROC_WRITELEN+1];
>+	unsigned int new_node_id;
>+
>+	if (copy_from_user(buffer, input, XT_CLUSTER_PROC_WRITELEN))
>+		return -EFAULT;
>+
>+	switch(*buffer) {
>+	case '+':
>+		new_node_id = simple_strtoul(buffer+1, NULL, 10);

Perhaps strict_strtoul would be preferred?

>+	case '-':
>+		new_node_id = simple_strtoul(buffer+1, NULL, 10);

>+static struct xt_match xt_cluster_match[] __read_mostly = {
>+	{
>+		.name		= "cluster",
>+		.family		= AF_INET,
>+		.match		= xt_cluster_mt,
>+		.checkentry	= xt_cluster_mt_checkentry,
>+		.destroy	= xt_cluster_mt_destroy,
>+		.matchsize	= sizeof(struct xt_cluster_match_info),
>+		.me		= THIS_MODULE,
>+	},
>+	{
>+		.name		= "cluster",
>+		.family		= AF_INET6,
>+		.match		= xt_cluster_mt,
>+		.checkentry	= xt_cluster_mt_checkentry,
>+		.destroy	= xt_cluster_mt_destroy,
>+		.matchsize	= sizeof(struct xt_cluster_match_info),
>+		.me		= THIS_MODULE,
>+	},
>+};

First, NFPROTO_IPV4 instead of AF_INET, and NFPROTO_IPV6 instead of
AF_INET6.
Since these are the same, too, they can be combined with
.family = NFPROTO_UNSPEC.

>+static int __init xt_cluster_mt_init(void)
>+{
>+	int ret;
>+
>+#ifdef CONFIG_PROC_FS
>+	proc_cluster = proc_mkdir("cluster", proc_net_netfilter);
>+	if (!proc_cluster)
>+		return -ENOMEM;
>+#endif
>+	ret = xt_register_matches(xt_cluster_match,
>+				  ARRAY_SIZE(xt_cluster_match));
>+	if (ret < 0)
>+		return ret;

When register_match fails, you still have to remove the proc directory.

>+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
>+MODULE_LICENSE("GPL");
>+MODULE_DESCRIPTION("Xtables: xt_cluster match");

A better short-description would be nice (the user could have probably 
guessed already that xt_cluster.ko is a cluster match..),
maybe something like "cluster multicast filtering"?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
@ 2009-01-28 16:11   ` Jan Engelhardt
  2009-01-28 16:51     ` Pablo Neira Ayuso
  2009-02-09 15:13   ` Patrick McHardy
  1 sibling, 1 reply; 27+ messages in thread
From: Jan Engelhardt @ 2009-01-28 16:11 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, kaber


On Wednesday 2009-01-28 15:58, Pablo Neira Ayuso wrote:

>This patch adds the PKTTYPE target which can be used to mangle the
>skbuff packet type field. This target is useful in conjunction with
>the arptables mcmangle target to TCP working again when a
>multicast hardware address is used. An example of its use:
>
>iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
>	-j PKTTYPE --to-pkt-type unicast

Are not packets not destined for 224/4 - assuming standard route setups -
unicast anyway?

>@@ -489,6 +489,24 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
> 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
> 	  TCP options from TCP packets.
> 
>+config NETFILTER_XT_TARGET_PKTTYPE

Please adhere to alphabetical ordering. (Andrew Morton mentioned it
time and again that it causes the least merge conflicts where
conflicts would happen if multiple people always add to the last
item.) Also, searching for "PKTTYPE"/("cluster") in `make menuconfig`
in the otherwise-sorted Kconfig/Makefile list could provide confusing
if it was out-of-order.

>+	  If you want to compile it as a module, say M here and read
>+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.

A lovely paragraph, but I think it has lost its purpose given that
NETFILTER_ADVANCED can now be general-tagged "if unsure, pick N" and
the rest will be automagic.

>+++ b/net/netfilter/Makefile
>@@ -55,6 +55,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
> obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
> obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
> obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
>+obj-$(CONFIG_NETFILTER_XT_TARGET_PKTTYPE) += xt_PKTTYPE.o
> 
>+static struct xt_target xt_pkttype_target[] __read_mostly = {
>+	{
>+		.family		= AF_INET,
>+		.name		= "PKTTYPE",
>+		.table		= "mangle",
>+		.target		= xt_pkttype_tg,
>+		.targetsize	= sizeof(struct xt_pkttype_target_info),
>+		.me		= THIS_MODULE,
>+	},
>+	{
>+		.family		= AF_INET6,
>+		.name		= "PKTTYPE",
>+		.table		= "mangle",
>+		.target		= xt_pkttype_tg,
>+		.targetsize	= sizeof(struct xt_pkttype_target_info),
>+		.me		= THIS_MODULE,
>+	},
>+};

Combine to NFPROTO_UNSPEC.

>+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
>+MODULE_LICENSE("GPL");
>+MODULE_DESCRIPTION("Xtables: xt_PKTTYPE target");

I propose
MODULE_DESCRIPTION("Xtables: set skbuff packet type");
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] netfilter: xtables: add cluster match
  2009-01-28 16:07   ` Jan Engelhardt
@ 2009-01-28 16:38     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-01-28 16:38 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel, kaber

Jan Engelhardt wrote:
> On Wednesday 2009-01-28 15:58, Pablo Neira Ayuso wrote:
> 
>> This patch adds the iptables cluster match. This match can be used
>> to deploy gateway and back-end load-sharing clusters. Assuming that
>> all the nodes see all packets
>> (see arptables mcmangle target and PKTTYPE iptables targets on how
>> to do that), the cluster match decides if this node has to handle a
>> packet given:
> 
> Is not this what CLUSTERIP is essentially supposed to do?

No, you can't deploy load-sharing setups for gateways as the -d
parameter is mandatory, CLUSTERIP only works for back-end nodes and have
a couple of significant problems. Indeed, these stuff supersedes CLUSTERIP.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-01-28 16:11   ` Jan Engelhardt
@ 2009-01-28 16:51     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-01-28 16:51 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel, kaber

Jan Engelhardt wrote:
> On Wednesday 2009-01-28 15:58, Pablo Neira Ayuso wrote:
> 
>> This patch adds the PKTTYPE target which can be used to mangle the
>> skbuff packet type field. This target is useful in conjunction with
>> the arptables mcmangle target to TCP working again when a
>> multicast hardware address is used. An example of its use:
>>
>> iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
>> 	-j PKTTYPE --to-pkt-type unicast
> 
> Are not packets not destined for 224/4 - assuming standard route setups -
> unicast anyway?

Not if you use the mcmangle arptables target to use a multicast hardware
address instead of the normal unicast, in that case, the link layer set
them to PACKET_MULTICAST. Indeed, the only use that I can find for this
target at the moment is to combine it with mcmangle.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] netfilter: arptables: add mcmangle target
  2009-01-28 14:58 [PATCH 1/3] netfilter: arptables: add mcmangle target Pablo Neira Ayuso
  2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
  2009-01-28 14:58 ` [PATCH 3/3] netfilter: xtables: add cluster match Pablo Neira Ayuso
@ 2009-02-09 15:11 ` Patrick McHardy
  2009-02-09 23:13   ` Pablo Neira Ayuso
  2 siblings, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-09 15:11 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Pablo Neira Ayuso wrote:
> This patch adds the mcmangle target for arptables which allows
> altering the source hardware address in ARP with a multicast
> hardware address. This target can be used to make a switch flood
> packets to the ports that use the same MAC multicast address. This
> is useful to deploy load-sharing clusters in environments in which
> the switch does not provide a way to flood packets to several
> ports.

Great name. Took me a bit to realize the connection to "multicast" :)

> Since all the nodes receives the same packets, each decides if
> it handles the packet based on hashing approach (See the `cluster'
> iptables match that comes with this patchset).
> 
> Theoretically, the use of the reserved VRRP hardware address should
> be fine for this, however, switches generally treat this hardware
> address space as normal unicast hardware address. Thus, in practise,
> it is not possible to have two nodes with the same VRRP hardware
> address.
> 
> Please, note that this target violates RFC 1812 (section 3.3.2) since
> an ethernet device must not use a multicast link address.
> 
> An example of the use of this target:
> 
> arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6 \
> 	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
> arptables -I INPUT -i eth0 --h-length 6 --destination-mac \
> 	01:00:5e:00:01:01 -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

Wouldn't it be more generically usable if it was a simple "set
either source or destination mac address to any value" target?
In fact thats what it seems to be (or is there a multicast check?),
so perhaps we should call it S/DNAT for consistency with iptables
and ebtables?

I'm wondering though why the device is needed as a parameter ..
ah I see:

+	if (dev_mc_add(dev, mangle->mc_devaddr, ETH_ALEN, 0) < 0) {
+		printk(KERN_ERR "arpt_mcmangle: cannot set multicast "
+				"address\n");
+		return false;
+	}

Continuing the idea of a generic ARP address mangling target,
this would have to be done in userspace using SIOCADDMULTI.

I would also prefer that approach because the multicast mangling
seems a bit like a hack which only works when the requesting
host accepts a multicast MAC address in the ARP reply.



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
  2009-01-28 16:11   ` Jan Engelhardt
@ 2009-02-09 15:13   ` Patrick McHardy
  2009-02-09 23:15     ` Pablo Neira Ayuso
  1 sibling, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-09 15:13 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Pablo Neira Ayuso wrote:
> This patch adds the PKTTYPE target which can be used to mangle the
> skbuff packet type field. This target is useful in conjunction with
> the arptables mcmangle target to TCP working again when a
> multicast hardware address is used. An example of its use:
> 
> iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
> 	-j PKTTYPE --to-pkt-type unicast
> 
> Given the following arptables rule-set:
> 
> arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6
> 	\ --mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
> arptables -I INPUT --h-length 6 --destination-mac 01:00:5e:00:01:01
> 	\ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
> 
> See arptables mcmangle target for further information.

That one refers to this patch :) So you're actually communicating
using TCP and multicast? Why don't you use UDP, which works fine
using multicast without pkttype mangling?




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] netfilter: xtables: add cluster match
  2009-01-28 14:58 ` [PATCH 3/3] netfilter: xtables: add cluster match Pablo Neira Ayuso
  2009-01-28 16:07   ` Jan Engelhardt
@ 2009-02-09 15:25   ` Patrick McHardy
  2009-02-09 23:23     ` Pablo Neira Ayuso
  1 sibling, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-09 15:25 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Pablo Neira Ayuso wrote:
> This patch adds the iptables cluster match. This match can be used
> to deploy gateway and back-end load-sharing clusters. Assuming that
> all the nodes see all packets (see arptables mcmangle target and
> PKTTYPE iptables targets on how to do that), the cluster match
> decides if this node has to handle a packet given:
> 
> 	jhash(source IP) % nodeID == 0
> 
> For related connections, the master conntrack is used. An example of
> its use for a gateway cluster, in one of the cluster nodes:
> 
> iptables -I PREROUTING -t mangle -i eth1 -m cluster \
> 	--cluster-total-nodes 2 --cluster-local-node 1 \
> 	--cluster-proc-name eth1 -j MARK --set-mark 0xffff
> iptables -A PREROUTING -t mangle -i eth1 \
> 	-m mark ! --mark 0xffff -j DROP
> iptables -A PREROUTING -t mangle -i eth2 -m cluster \
> 	--cluster-total-nodes 2 --cluster-local-node 1 \
> 	--cluster-proc-name eth2 -j MARK --set-mark 0xffff
> iptables -A PREROUTING -t mangle -i eth2 \

While its simple and probably gives good distribution, using the
source IP like this has the major disadvantage that when one node
fails, all nodes need to change rulesets and the existing mapping
of connections becomes at least partially invalid.

Also using conntrack data implies that this can't be used in the
raw table, which seems like the best place to drop foreign packets.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] netfilter: arptables: add mcmangle target
  2009-02-09 15:11 ` [PATCH 1/3] netfilter: arptables: add mcmangle target Patrick McHardy
@ 2009-02-09 23:13   ` Pablo Neira Ayuso
  2009-02-10 11:16     ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-09 23:13 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

Patrick McHardy wrote:
> Great name. Took me a bit to realize the connection to "multicast" :)

I thought about mcnugget target, but it seems to be already copyrighted ;)

> Continuing the idea of a generic ARP address mangling target,
> this would have to be done in userspace using SIOCADDMULTI.
> 
> I would also prefer that approach because the multicast mangling
> seems a bit like a hack which only works when the requesting
> host accepts a multicast MAC address in the ARP reply.

Indeed, I didn't know about SIOCADDMULTI, great. I'll do it like you
have proposed. Thanks.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-09 15:13   ` Patrick McHardy
@ 2009-02-09 23:15     ` Pablo Neira Ayuso
  2009-02-10 14:03       ` Patrick McHardy
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-09 23:15 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> This patch adds the PKTTYPE target which can be used to mangle the
>> skbuff packet type field. This target is useful in conjunction with
>> the arptables mcmangle target to TCP working again when a
>> multicast hardware address is used. An example of its use:
>>
>> iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
>>     -j PKTTYPE --to-pkt-type unicast
>>
>> Given the following arptables rule-set:
>>
>> arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6
>>     \ --mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
>> arptables -I INPUT --h-length 6 --destination-mac 01:00:5e:00:01:01
>>     \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
>>
>> See arptables mcmangle target for further information.
> 
> That one refers to this patch :) So you're actually communicating
> using TCP and multicast? Why don't you use UDP, which works fine
> using multicast without pkttype mangling?

If the netdevice uses multicast MAC address, the link layer sets skbuff
pkttype to PACKET_MULTICAST and TCP (among others) doesn't like this.
This target is required to make TCP work again if a multicast MAC
address is used.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] netfilter: xtables: add cluster match
  2009-02-09 15:25   ` Patrick McHardy
@ 2009-02-09 23:23     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-09 23:23 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> This patch adds the iptables cluster match. This match can be used
>> to deploy gateway and back-end load-sharing clusters. Assuming that
>> all the nodes see all packets (see arptables mcmangle target and
>> PKTTYPE iptables targets on how to do that), the cluster match
>> decides if this node has to handle a packet given:
>>
>>     jhash(source IP) % nodeID == 0
>>
>> For related connections, the master conntrack is used. An example of
>> its use for a gateway cluster, in one of the cluster nodes:
>>
>> iptables -I PREROUTING -t mangle -i eth1 -m cluster \
>>     --cluster-total-nodes 2 --cluster-local-node 1 \
>>     --cluster-proc-name eth1 -j MARK --set-mark 0xffff
>> iptables -A PREROUTING -t mangle -i eth1 \
>>     -m mark ! --mark 0xffff -j DROP
>> iptables -A PREROUTING -t mangle -i eth2 -m cluster \
>>     --cluster-total-nodes 2 --cluster-local-node 1 \
>>     --cluster-proc-name eth2 -j MARK --set-mark 0xffff
>> iptables -A PREROUTING -t mangle -i eth2 \
> 
> While its simple and probably gives good distribution, using the
> source IP like this has the major disadvantage that when one node
> fails, all nodes need to change rulesets and the existing mapping
> of connections becomes at least partially invalid.

But this is the same problem that people have in fault-tolerant
primary-backup stateful firewall setups. That's why the conntrack-tools
are there, just in case that you want fault-tolerant stateful firewalls ;).

> Also using conntrack data implies that this can't be used in the
> raw table, which seems like the best place to drop foreign packets.

Indeed, but this relies on the conntrack data to hash the same tuple for
all the packets.

Look at the numbers, they are not that bad, I'm getting ~45% more
performance while having my two firewalls running (before one was idle
in my primary-backup setup).

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] netfilter: arptables: add mcmangle target
  2009-02-09 23:13   ` Pablo Neira Ayuso
@ 2009-02-10 11:16     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-10 11:16 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Great name. Took me a bit to realize the connection to "multicast" :)
> 
> I thought about mcnugget target, but it seems to be already copyrighted ;)
> 
>> Continuing the idea of a generic ARP address mangling target,
>> this would have to be done in userspace using SIOCADDMULTI.
>>
>> I would also prefer that approach because the multicast mangling
>> seems a bit like a hack which only works when the requesting
>> host accepts a multicast MAC address in the ARP reply.
> 
> Indeed, I didn't know about SIOCADDMULTI, great. I'll do it like you
> have proposed. Thanks.

Hey, just to let you know ;). The 'ip' tool already allows adding static
multicast MAC addresses via SIOCADDMULTI. The following commands do the
trick:

ip maddr add 01:00:5e:00:01:01 dev eth1
arptables -I OUTPUT -o eth1 --h-length 6 \
-j mangle --mangle-mac-s 01:00:5e:00:01:01
arptables -I INPUT -i eth1 --h-length 6 --destination-mac \
01:00:5e:00:01:01 -j mangle --mangle-mac-d $REAL_HWADDR1

Still the PKTTYPE iptables target that I posted is needed to make TCP
and friends work in a devide that uses a multicast MAC.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-09 23:15     ` Pablo Neira Ayuso
@ 2009-02-10 14:03       ` Patrick McHardy
  2009-02-10 14:18         ` Jozsef Kadlecsik
  0 siblings, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-10 14:03 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Pablo Neira Ayuso wrote:
>>> This patch adds the PKTTYPE target which can be used to mangle the
>>> skbuff packet type field. This target is useful in conjunction with
>>> the arptables mcmangle target to TCP working again when a
>>> multicast hardware address is used. An example of its use:
>>>
>>> iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
>>>     -j PKTTYPE --to-pkt-type unicast
>>>
>>> Given the following arptables rule-set:
>>>
>>> arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6
>>>     \ --mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
>>> arptables -I INPUT --h-length 6 --destination-mac 01:00:5e:00:01:01
>>>     \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
>>>
>>> See arptables mcmangle target for further information.
>> That one refers to this patch :) So you're actually communicating
>> using TCP and multicast? Why don't you use UDP, which works fine
>> using multicast without pkttype mangling?
> 
> If the netdevice uses multicast MAC address, the link layer sets skbuff
> pkttype to PACKET_MULTICAST and TCP (among others) doesn't like this.
> This target is required to make TCP work again if a multicast MAC
> address is used.

Yes, I know, I'm just wondering why you're using TCP at all for
synchronizing. Its not for traffic from the Internet I assume
since the node it ends up on is unknown to the outside anyways.

It really seems pretty hackish to add netfilter modules to work
around valid checks in the stack. I'd prefer if we can come up
with a nicer way that offers you the same functionality.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-10 14:03       ` Patrick McHardy
@ 2009-02-10 14:18         ` Jozsef Kadlecsik
  2009-02-10 14:22           ` Patrick McHardy
  0 siblings, 1 reply; 27+ messages in thread
From: Jozsef Kadlecsik @ 2009-02-10 14:18 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Pablo Neira Ayuso, netfilter-devel

On Tue, 10 Feb 2009, Patrick McHardy wrote:

> Pablo Neira Ayuso wrote:
> > Patrick McHardy wrote:
> > > Pablo Neira Ayuso wrote:
> > > > This patch adds the PKTTYPE target which can be used to mangle the
> > > > skbuff packet type field. This target is useful in conjunction with
> > > > the arptables mcmangle target to TCP working again when a
> > > > multicast hardware address is used. An example of its use:
> > > > 
> > > > iptables -I PREROUTING ! -s 224.0.0.0/4 -t mangle \
> > > >     -j PKTTYPE --to-pkt-type unicast
> > > > 
> > > > Given the following arptables rule-set:
> > > > 
> > > > arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6
> > > >     \ --mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
> > > > arptables -I INPUT --h-length 6 --destination-mac 01:00:5e:00:01:01
> > > >     \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27
> > > > 
> > > > See arptables mcmangle target for further information.
> > > That one refers to this patch :) So you're actually communicating
> > > using TCP and multicast? Why don't you use UDP, which works fine
> > > using multicast without pkttype mangling?
> > 
> > If the netdevice uses multicast MAC address, the link layer sets skbuff
> > pkttype to PACKET_MULTICAST and TCP (among others) doesn't like this.
> > This target is required to make TCP work again if a multicast MAC
> > address is used.
> 
> Yes, I know, I'm just wondering why you're using TCP at all for
> synchronizing. Its not for traffic from the Internet I assume
> since the node it ends up on is unknown to the outside anyways.

No, that's not the syncronizing traffic, but the "normal" TCP traffic to 
be filtered by the firewalls, which have got multicast MAC addresses on 
their interfaces.
 
Best regards,
Jzosef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlec@mail.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-10 14:18         ` Jozsef Kadlecsik
@ 2009-02-10 14:22           ` Patrick McHardy
  2009-02-10 18:12             ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-10 14:22 UTC (permalink / raw)
  To: Jozsef Kadlecsik; +Cc: Pablo Neira Ayuso, netfilter-devel

Jozsef Kadlecsik wrote:
> On Tue, 10 Feb 2009, Patrick McHardy wrote:
> 
>> Yes, I know, I'm just wondering why you're using TCP at all for
>> synchronizing. Its not for traffic from the Internet I assume
>> since the node it ends up on is unknown to the outside anyways.
> 
> No, that's not the syncronizing traffic, but the "normal" TCP traffic to 
> be filtered by the firewalls, which have got multicast MAC addresses on 
> their interfaces.

Multicast traffic is accepted for forwarding just fine, its just
local TCP delivery thats refusing it. So it can't be forwarded
traffic.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-10 14:22           ` Patrick McHardy
@ 2009-02-10 18:12             ` Pablo Neira Ayuso
  2009-02-11 12:26               ` Patrick McHardy
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-10 18:12 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Jozsef Kadlecsik, netfilter-devel

Patrick McHardy wrote:
> Jozsef Kadlecsik wrote:
>> On Tue, 10 Feb 2009, Patrick McHardy wrote:
>>
>>> Yes, I know, I'm just wondering why you're using TCP at all for
>>> synchronizing. Its not for traffic from the Internet I assume
>>> since the node it ends up on is unknown to the outside anyways.
>>
>> No, that's not the syncronizing traffic, but the "normal" TCP traffic
>> to be filtered by the firewalls, which have got multicast MAC
>> addresses on their interfaces.
> 
> Multicast traffic is accepted for forwarding just fine, its just
> local TCP delivery thats refusing it. So it can't be forwarded
> traffic.

You usually have some administration facility (like ssh) that would
break. Please, think that this can be also used to replace CLUSTERIP (to
be used in back-end servers, not only stateful firewalls).

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-10 18:12             ` Pablo Neira Ayuso
@ 2009-02-11 12:26               ` Patrick McHardy
  2009-02-11 14:19                 ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-11 12:26 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Jozsef Kadlecsik, netfilter-devel

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Jozsef Kadlecsik wrote:
>>> On Tue, 10 Feb 2009, Patrick McHardy wrote:
>>>
>>>> Yes, I know, I'm just wondering why you're using TCP at all for
>>>> synchronizing. Its not for traffic from the Internet I assume
>>>> since the node it ends up on is unknown to the outside anyways.
>>> No, that's not the syncronizing traffic, but the "normal" TCP traffic
>>> to be filtered by the firewalls, which have got multicast MAC
>>> addresses on their interfaces.
>> Multicast traffic is accepted for forwarding just fine, its just
>> local TCP delivery thats refusing it. So it can't be forwarded
>> traffic.
> 
> You usually have some administration facility (like ssh) that would
> break. Please, think that this can be also used to replace CLUSTERIP (to
> be used in back-end servers, not only stateful firewalls).

I see. Still, this module has only one purpose, which is to circumvent
valid checks to make a different hack (although a nicer one) work.
Perhaps simply move it into the cluster match (yes yes, I know). That
way we can also avoid a bit of the new-module overhead.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-11 12:26               ` Patrick McHardy
@ 2009-02-11 14:19                 ` Pablo Neira Ayuso
  2009-02-11 14:35                   ` Patrick McHardy
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-11 14:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Jozsef Kadlecsik, netfilter-devel

Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> Patrick McHardy wrote:
>>> Jozsef Kadlecsik wrote:
>>>> On Tue, 10 Feb 2009, Patrick McHardy wrote:
>>>>
>>>>> Yes, I know, I'm just wondering why you're using TCP at all for
>>>>> synchronizing. Its not for traffic from the Internet I assume
>>>>> since the node it ends up on is unknown to the outside anyways.
>>>> No, that's not the syncronizing traffic, but the "normal" TCP traffic
>>>> to be filtered by the firewalls, which have got multicast MAC
>>>> addresses on their interfaces.
>>> Multicast traffic is accepted for forwarding just fine, its just
>>> local TCP delivery thats refusing it. So it can't be forwarded
>>> traffic.
>>
>> You usually have some administration facility (like ssh) that would
>> break. Please, think that this can be also used to replace CLUSTERIP (to
>> be used in back-end servers, not only stateful firewalls).
> 
> I see. Still, this module has only one purpose, which is to circumvent
> valid checks to make a different hack (although a nicer one) work.
> Perhaps simply move it into the cluster match (yes yes, I know). That
> way we can also avoid a bit of the new-module overhead.

Then, the cluster match would become the target match, since skbuff
cannot be modified from matches (they are passed as const). Becoming a
target means less flexibility :(

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-11 14:19                 ` Pablo Neira Ayuso
@ 2009-02-11 14:35                   ` Patrick McHardy
  2009-02-11 14:51                     ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Patrick McHardy @ 2009-02-11 14:35 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Jozsef Kadlecsik, netfilter-devel

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Pablo Neira Ayuso wrote:
>>> Patrick McHardy wrote:
>>>> Jozsef Kadlecsik wrote:
>>>>> On Tue, 10 Feb 2009, Patrick McHardy wrote:
>>>>>
>>>>>> Yes, I know, I'm just wondering why you're using TCP at all for
>>>>>> synchronizing. Its not for traffic from the Internet I assume
>>>>>> since the node it ends up on is unknown to the outside anyways.
>>>>> No, that's not the syncronizing traffic, but the "normal" TCP traffic
>>>>> to be filtered by the firewalls, which have got multicast MAC
>>>>> addresses on their interfaces.
>>>> Multicast traffic is accepted for forwarding just fine, its just
>>>> local TCP delivery thats refusing it. So it can't be forwarded
>>>> traffic.
>>> You usually have some administration facility (like ssh) that would
>>> break. Please, think that this can be also used to replace CLUSTERIP (to
>>> be used in back-end servers, not only stateful firewalls).
>> I see. Still, this module has only one purpose, which is to circumvent
>> valid checks to make a different hack (although a nicer one) work.
>> Perhaps simply move it into the cluster match (yes yes, I know). That
>> way we can also avoid a bit of the new-module overhead.
> 
> Then, the cluster match would become the target match, since skbuff
> cannot be modified from matches (they are passed as const). Becoming a
> target means less flexibility :(

Well, a cast should "fix" that :) But feel free to suggest a
better method that doesn't need to expose this as a standalone
feature.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-11 14:35                   ` Patrick McHardy
@ 2009-02-11 14:51                     ` Pablo Neira Ayuso
  2009-02-11 14:54                       ` Patrick McHardy
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-11 14:51 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Jozsef Kadlecsik, netfilter-devel

Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> Patrick McHardy wrote:
>>> Pablo Neira Ayuso wrote:
>>>> Patrick McHardy wrote:
>>>>> Jozsef Kadlecsik wrote:
>>>>>> On Tue, 10 Feb 2009, Patrick McHardy wrote:
>>>>>>
>>>>>>> Yes, I know, I'm just wondering why you're using TCP at all for
>>>>>>> synchronizing. Its not for traffic from the Internet I assume
>>>>>>> since the node it ends up on is unknown to the outside anyways.
>>>>>> No, that's not the syncronizing traffic, but the "normal" TCP traffic
>>>>>> to be filtered by the firewalls, which have got multicast MAC
>>>>>> addresses on their interfaces.
>>>>> Multicast traffic is accepted for forwarding just fine, its just
>>>>> local TCP delivery thats refusing it. So it can't be forwarded
>>>>> traffic.
>>>> You usually have some administration facility (like ssh) that would
>>>> break. Please, think that this can be also used to replace CLUSTERIP
>>>> (to
>>>> be used in back-end servers, not only stateful firewalls).
>>> I see. Still, this module has only one purpose, which is to circumvent
>>> valid checks to make a different hack (although a nicer one) work.
>>> Perhaps simply move it into the cluster match (yes yes, I know). That
>>> way we can also avoid a bit of the new-module overhead.
>>
>> Then, the cluster match would become the target match, since skbuff
>> cannot be modified from matches (they are passed as const). Becoming a
>> target means less flexibility :(
> 
> Well, a cast should "fix" that :) But feel free to suggest a
> better method that doesn't need to expose this as a standalone
> feature.

Hm, I forgot another point that is the fact that the PKTTYPE target is
not always required. Actually, if the switch can flood the same packets
to a set of nodes that are part of the cluster via "port mirroring" or
if the switch interprets the VRRP reserved MAC address range correctly
(I did not find any yet), the nodes in the cluster would not need to use
with a multicast MAC address, in that case, the PKTTYPE target would not
be required.

Well, I'll do the hackish solution if that relieves the possible extra
bloat of one target that indeed only has this purpose ;). I'll send you
a new version of the patch asap.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] netfilter: xtables: add PKTTYPE target
  2009-02-11 14:51                     ` Pablo Neira Ayuso
@ 2009-02-11 14:54                       ` Patrick McHardy
  0 siblings, 0 replies; 27+ messages in thread
From: Patrick McHardy @ 2009-02-11 14:54 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Jozsef Kadlecsik, netfilter-devel

Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Well, a cast should "fix" that :) But feel free to suggest a
>> better method that doesn't need to expose this as a standalone
>> feature.
> 
> Hm, I forgot another point that is the fact that the PKTTYPE target is
> not always required. Actually, if the switch can flood the same packets
> to a set of nodes that are part of the cluster via "port mirroring" or
> if the switch interprets the VRRP reserved MAC address range correctly
> (I did not find any yet), the nodes in the cluster would not need to use
> with a multicast MAC address, in that case, the PKTTYPE target would not
> be required.
> 
> Well, I'll do the hackish solution if that relieves the possible extra
> bloat of one target that indeed only has this purpose ;). I'll send you
> a new version of the patch asap.

Yeah, its a bit hackish too. Ideally we find a cleaner way, but the
best I could come up with (use MACVLAN to make the multicast address
handled as unicast) isn't any cleaner itself.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/3] netfilter: arptables: add mcmangle target
@ 2009-02-06  7:41 Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-06  7:41 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the mcmangle target for arptables which allows
altering the source hardware address in ARP with a multicast
hardware address. This target can be used to make a switch flood
packets to the ports that use the same MAC multicast address. This
is useful to deploy load-sharing clusters in environments in which
the switch does not provide a way to flood packets to several
ports.

Since all the nodes receives the same packets, each decides if
it handles the packet based on hashing approach (See the `cluster'
iptables match that comes with this patchset).

Theoretically, the use of the reserved VRRP hardware address should
be fine for this, however, switches generally treat this hardware
address space as normal unicast hardware address. Thus, in practise,
it is not possible to have two nodes with the same VRRP hardware
address.

Please, note that this target violates RFC 1812 (section 3.3.2) since
an ethernet device must not use a multicast link address.

An example of the use of this target:

arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
arptables -I INPUT -i eth0 --h-length 6 --destination-mac \
	01:00:5e:00:01:01 -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

Where 00:zz:yy:xx:5a:27 is the original hardware address of this
node. Note that the mcmangle target registers an entry in the multicast
list that is required to get this working:

$ cat /proc/net/dev_mcast | grep eth0 | head -1
2    eth0            1     0     01005e000101

You need the PKTTYPE iptables target (included in this patchset) to
set skb->type to PACKET_HOST. Otherwise, you would be only able to
ICMP ping nodes in the network ;).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter_arp/arpt_mcmangle.h |   14 ++++
 net/ipv4/netfilter/Kconfig                  |   12 +++
 net/ipv4/netfilter/Makefile                 |    1 
 net/ipv4/netfilter/arpt_mcmangle.c          |   98 +++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter_arp/arpt_mcmangle.h
 create mode 100644 net/ipv4/netfilter/arpt_mcmangle.c

diff --git a/include/linux/netfilter_arp/arpt_mcmangle.h b/include/linux/netfilter_arp/arpt_mcmangle.h
new file mode 100644
index 0000000..d14a1ab
--- /dev/null
+++ b/include/linux/netfilter_arp/arpt_mcmangle.h
@@ -0,0 +1,14 @@
+#ifndef _ARPT_MCMANGLE_H
+#define _ARPT_MCMANGLE_H
+#include <linux/netfilter_arp/arp_tables.h>
+
+struct net_device;
+
+struct arpt_mcmangle
+{
+	char ifname[IFNAMSIZ];
+	char mc_devaddr[ETH_ALEN];
+	struct net_device  __attribute__((aligned(8))) *dev;
+};
+
+#endif /* _ARPT_MANGLE_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1d..50f38b2 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -392,6 +392,18 @@ config IP_NF_ARP_MANGLE
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
+config IP_NF_ARP_MCMANGLE
+	tristate "ARP multicast address mangling"
+	help
+	  Allows altering the source unicast hardware address in ARP messages
+	  with a multicast hardware address. This target is useful to make a
+	  switch flood to all ports whose ethernet device have a multicast
+	  hardware address. Please, see that this target violates RFC 1812
+	  (section 3.3.2) since an ethernet device must not use a multicast
+	  link address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # IP_NF_ARPTABLES
 
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650..5c4cc3e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
 obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+obj-$(CONFIG_IP_NF_ARP_MCMANGLE) += arpt_mcmangle.o
 
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
diff --git a/net/ipv4/netfilter/arpt_mcmangle.c b/net/ipv4/netfilter/arpt_mcmangle.c
new file mode 100644
index 0000000..3d981d1
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mcmangle.c
@@ -0,0 +1,98 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter_arp/arpt_mcmangle.h>
+
+static unsigned int
+arpt_mcmangle_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct arpt_mcmangle *mangle = par->targinfo;
+	const struct arphdr *arp;
+	unsigned char *arpptr;
+	int hln;
+
+	if (!skb_make_writable(skb, skb->len))
+		return NF_DROP;
+
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
+	hln = arp->ar_hln;
+
+	/* We assume that pln and hln were checked in the match */
+	if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+	    (arpptr + hln > skb_tail_pointer(skb))) {
+		return NF_DROP;
+	}
+	memcpy(arpptr, mangle->mc_devaddr, hln);
+
+	return NF_ACCEPT;
+}
+
+static bool
+arpt_mcmangle_checkentry(const struct xt_tgchk_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	struct net_device *dev;
+
+	if (!(mangle->mc_devaddr[0] & 0x01)) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong multicast address\n");
+		return false;
+	}
+	dev = dev_get_by_name(&init_net, mangle->ifname);
+	if (dev == NULL) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong `%s' interface\n",
+			mangle->ifname);
+		return false;
+	}
+	mangle->dev = dev;
+	if (dev_mc_add(dev, mangle->mc_devaddr, ETH_ALEN, 0) < 0) {
+		printk(KERN_ERR "arpt_mcmangle: cannot set multicast "
+				"address\n");
+		return false;
+	}
+	return true;
+}
+
+static void
+arpt_mcmangle_destroy(const struct xt_tgdtor_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	dev_mc_delete(mangle->dev, mangle->mc_devaddr, ETH_ALEN, 0);
+	dev_put(mangle->dev);
+}
+
+static struct xt_target arpt_mcmangle_reg __read_mostly = {
+	.name		= "mcmangle",
+	.family		= NFPROTO_ARP,
+	.target		= arpt_mcmangle_tg,
+	.checkentry	= arpt_mcmangle_checkentry,
+	.destroy	= arpt_mcmangle_destroy,
+	.targetsize	= sizeof(struct arpt_mcmangle),
+	.hooks		= (1 << NF_ARP_OUT),
+	.me		= THIS_MODULE,
+};
+
+static int __init arpt_mcmangle_init(void)
+{
+	return xt_register_target(&arpt_mcmangle_reg);
+}
+
+static void __exit arpt_mcmangle_fini(void)
+{
+	xt_unregister_target(&arpt_mcmangle_reg);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arptables: arp multicast mangle target");
+module_init(arpt_mcmangle_init);
+module_exit(arpt_mcmangle_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] netfilter: arptables: add mcmangle target
  2009-02-05 17:26 Pablo Neira Ayuso
@ 2009-02-05 17:41 ` Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-05 17:41 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

Damn crap, the smtp server that I'm using is broken. I'll retry later.

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/3] netfilter: arptables: add mcmangle target
@ 2009-02-05 17:26 Pablo Neira Ayuso
  2009-02-05 17:41 ` Pablo Neira Ayuso
  0 siblings, 1 reply; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-05 17:26 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the mcmangle target for arptables which allows
altering the source hardware address in ARP with a multicast
hardware address. This target can be used to make a switch flood
packets to the ports that use the same MAC multicast address. This
is useful to deploy load-sharing clusters in environments in which
the switch does not provide a way to flood packets to several
ports.

Since all the nodes receives the same packets, each decides if
it handles the packet based on hashing approach (See the `cluster'
iptables match that comes with this patchset).

Theoretically, the use of the reserved VRRP hardware address should
be fine for this, however, switches generally treat this hardware
address space as normal unicast hardware address. Thus, in practise,
it is not possible to have two nodes with the same VRRP hardware
address.

Please, note that this target violates RFC 1812 (section 3.3.2) since
an ethernet device must not use a multicast link address.

An example of the use of this target:

arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
arptables -I INPUT -i eth0 --h-length 6 --destination-mac \
	01:00:5e:00:01:01 -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

Where 00:zz:yy:xx:5a:27 is the original hardware address of this
node. Note that the mcmangle target registers an entry in the multicast
list that is required to get this working:

$ cat /proc/net/dev_mcast | grep eth0 | head -1
2    eth0            1     0     01005e000101

You need the PKTTYPE iptables target (included in this patchset) to
set skb->type to PACKET_HOST. Otherwise, you would be only able to
ICMP ping nodes in the network ;).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter_arp/arpt_mcmangle.h |   14 ++++
 net/ipv4/netfilter/Kconfig                  |   12 +++
 net/ipv4/netfilter/Makefile                 |    1 
 net/ipv4/netfilter/arpt_mcmangle.c          |   98 +++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter_arp/arpt_mcmangle.h
 create mode 100644 net/ipv4/netfilter/arpt_mcmangle.c

diff --git a/include/linux/netfilter_arp/arpt_mcmangle.h b/include/linux/netfilter_arp/arpt_mcmangle.h
new file mode 100644
index 0000000..d14a1ab
--- /dev/null
+++ b/include/linux/netfilter_arp/arpt_mcmangle.h
@@ -0,0 +1,14 @@
+#ifndef _ARPT_MCMANGLE_H
+#define _ARPT_MCMANGLE_H
+#include <linux/netfilter_arp/arp_tables.h>
+
+struct net_device;
+
+struct arpt_mcmangle
+{
+	char ifname[IFNAMSIZ];
+	char mc_devaddr[ETH_ALEN];
+	struct net_device  __attribute__((aligned(8))) *dev;
+};
+
+#endif /* _ARPT_MANGLE_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1d..50f38b2 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -392,6 +392,18 @@ config IP_NF_ARP_MANGLE
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
+config IP_NF_ARP_MCMANGLE
+	tristate "ARP multicast address mangling"
+	help
+	  Allows altering the source unicast hardware address in ARP messages
+	  with a multicast hardware address. This target is useful to make a
+	  switch flood to all ports whose ethernet device have a multicast
+	  hardware address. Please, see that this target violates RFC 1812
+	  (section 3.3.2) since an ethernet device must not use a multicast
+	  link address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # IP_NF_ARPTABLES
 
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650..5c4cc3e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
 obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+obj-$(CONFIG_IP_NF_ARP_MCMANGLE) += arpt_mcmangle.o
 
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
diff --git a/net/ipv4/netfilter/arpt_mcmangle.c b/net/ipv4/netfilter/arpt_mcmangle.c
new file mode 100644
index 0000000..3d981d1
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mcmangle.c
@@ -0,0 +1,98 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter_arp/arpt_mcmangle.h>
+
+static unsigned int
+arpt_mcmangle_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct arpt_mcmangle *mangle = par->targinfo;
+	const struct arphdr *arp;
+	unsigned char *arpptr;
+	int hln;
+
+	if (!skb_make_writable(skb, skb->len))
+		return NF_DROP;
+
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
+	hln = arp->ar_hln;
+
+	/* We assume that pln and hln were checked in the match */
+	if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+	    (arpptr + hln > skb_tail_pointer(skb))) {
+		return NF_DROP;
+	}
+	memcpy(arpptr, mangle->mc_devaddr, hln);
+
+	return NF_ACCEPT;
+}
+
+static bool
+arpt_mcmangle_checkentry(const struct xt_tgchk_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	struct net_device *dev;
+
+	if (!(mangle->mc_devaddr[0] & 0x01)) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong multicast address\n");
+		return false;
+	}
+	dev = dev_get_by_name(&init_net, mangle->ifname);
+	if (dev == NULL) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong `%s' interface\n",
+			mangle->ifname);
+		return false;
+	}
+	mangle->dev = dev;
+	if (dev_mc_add(dev, mangle->mc_devaddr, ETH_ALEN, 0) < 0) {
+		printk(KERN_ERR "arpt_mcmangle: cannot set multicast "
+				"address\n");
+		return false;
+	}
+	return true;
+}
+
+static void
+arpt_mcmangle_destroy(const struct xt_tgdtor_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	dev_mc_delete(mangle->dev, mangle->mc_devaddr, ETH_ALEN, 0);
+	dev_put(mangle->dev);
+}
+
+static struct xt_target arpt_mcmangle_reg __read_mostly = {
+	.name		= "mcmangle",
+	.family		= NFPROTO_ARP,
+	.target		= arpt_mcmangle_tg,
+	.checkentry	= arpt_mcmangle_checkentry,
+	.destroy	= arpt_mcmangle_destroy,
+	.targetsize	= sizeof(struct arpt_mcmangle),
+	.hooks		= (1 << NF_ARP_OUT),
+	.me		= THIS_MODULE,
+};
+
+static int __init arpt_mcmangle_init(void)
+{
+	return xt_register_target(&arpt_mcmangle_reg);
+}
+
+static void __exit arpt_mcmangle_fini(void)
+{
+	xt_unregister_target(&arpt_mcmangle_reg);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arptables: arp multicast mangle target");
+module_init(arpt_mcmangle_init);
+module_exit(arpt_mcmangle_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 1/3] netfilter: arptables: add mcmangle target
@ 2009-02-05 17:23 Pablo Neira Ayuso
  0 siblings, 0 replies; 27+ messages in thread
From: Pablo Neira Ayuso @ 2009-02-05 17:23 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds the mcmangle target for arptables which allows
altering the source hardware address in ARP with a multicast
hardware address. This target can be used to make a switch flood
packets to the ports that use the same MAC multicast address. This
is useful to deploy load-sharing clusters in environments in which
the switch does not provide a way to flood packets to several
ports.

Since all the nodes receives the same packets, each decides if
it handles the packet based on hashing approach (See the `cluster'
iptables match that comes with this patchset).

Theoretically, the use of the reserved VRRP hardware address should
be fine for this, however, switches generally treat this hardware
address space as normal unicast hardware address. Thus, in practise,
it is not possible to have two nodes with the same VRRP hardware
address.

Please, note that this target violates RFC 1812 (section 3.3.2) since
an ethernet device must not use a multicast link address.

An example of the use of this target:

arptables -I OUTPUT -o eth0 -j mcmangle --h-length 6 \
	--mc-mangle-mac 01:00:5e:00:01:01 --mc-mangle-dev eth0
arptables -I INPUT -i eth0 --h-length 6 --destination-mac \
	01:00:5e:00:01:01 -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27

Where 00:zz:yy:xx:5a:27 is the original hardware address of this
node. Note that the mcmangle target registers an entry in the multicast
list that is required to get this working:

$ cat /proc/net/dev_mcast | grep eth0 | head -1
2    eth0            1     0     01005e000101

You need the PKTTYPE iptables target (included in this patchset) to
set skb->type to PACKET_HOST. Otherwise, you would be only able to
ICMP ping nodes in the network ;).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---

 include/linux/netfilter_arp/arpt_mcmangle.h |   14 ++++
 net/ipv4/netfilter/Kconfig                  |   12 +++
 net/ipv4/netfilter/Makefile                 |    1 
 net/ipv4/netfilter/arpt_mcmangle.c          |   98 +++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter_arp/arpt_mcmangle.h
 create mode 100644 net/ipv4/netfilter/arpt_mcmangle.c

diff --git a/include/linux/netfilter_arp/arpt_mcmangle.h b/include/linux/netfilter_arp/arpt_mcmangle.h
new file mode 100644
index 0000000..d14a1ab
--- /dev/null
+++ b/include/linux/netfilter_arp/arpt_mcmangle.h
@@ -0,0 +1,14 @@
+#ifndef _ARPT_MCMANGLE_H
+#define _ARPT_MCMANGLE_H
+#include <linux/netfilter_arp/arp_tables.h>
+
+struct net_device;
+
+struct arpt_mcmangle
+{
+	char ifname[IFNAMSIZ];
+	char mc_devaddr[ETH_ALEN];
+	struct net_device  __attribute__((aligned(8))) *dev;
+};
+
+#endif /* _ARPT_MANGLE_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1d..50f38b2 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -392,6 +392,18 @@ config IP_NF_ARP_MANGLE
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
+config IP_NF_ARP_MCMANGLE
+	tristate "ARP multicast address mangling"
+	help
+	  Allows altering the source unicast hardware address in ARP messages
+	  with a multicast hardware address. This target is useful to make a
+	  switch flood to all ports whose ethernet device have a multicast
+	  hardware address. Please, see that this target violates RFC 1812
+	  (section 3.3.2) since an ethernet device must not use a multicast
+	  link address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # IP_NF_ARPTABLES
 
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650..5c4cc3e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
 obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+obj-$(CONFIG_IP_NF_ARP_MCMANGLE) += arpt_mcmangle.o
 
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
diff --git a/net/ipv4/netfilter/arpt_mcmangle.c b/net/ipv4/netfilter/arpt_mcmangle.c
new file mode 100644
index 0000000..3d981d1
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mcmangle.c
@@ -0,0 +1,98 @@
+/*
+ * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter_arp/arpt_mcmangle.h>
+
+static unsigned int
+arpt_mcmangle_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct arpt_mcmangle *mangle = par->targinfo;
+	const struct arphdr *arp;
+	unsigned char *arpptr;
+	int hln;
+
+	if (!skb_make_writable(skb, skb->len))
+		return NF_DROP;
+
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
+	hln = arp->ar_hln;
+
+	/* We assume that pln and hln were checked in the match */
+	if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+	    (arpptr + hln > skb_tail_pointer(skb))) {
+		return NF_DROP;
+	}
+	memcpy(arpptr, mangle->mc_devaddr, hln);
+
+	return NF_ACCEPT;
+}
+
+static bool
+arpt_mcmangle_checkentry(const struct xt_tgchk_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	struct net_device *dev;
+
+	if (!(mangle->mc_devaddr[0] & 0x01)) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong multicast address\n");
+		return false;
+	}
+	dev = dev_get_by_name(&init_net, mangle->ifname);
+	if (dev == NULL) {
+		printk(KERN_WARNING "arpt_mcmangle: wrong `%s' interface\n",
+			mangle->ifname);
+		return false;
+	}
+	mangle->dev = dev;
+	if (dev_mc_add(dev, mangle->mc_devaddr, ETH_ALEN, 0) < 0) {
+		printk(KERN_ERR "arpt_mcmangle: cannot set multicast "
+				"address\n");
+		return false;
+	}
+	return true;
+}
+
+static void
+arpt_mcmangle_destroy(const struct xt_tgdtor_param *par)
+{
+	struct arpt_mcmangle *mangle = par->targinfo;
+	dev_mc_delete(mangle->dev, mangle->mc_devaddr, ETH_ALEN, 0);
+	dev_put(mangle->dev);
+}
+
+static struct xt_target arpt_mcmangle_reg __read_mostly = {
+	.name		= "mcmangle",
+	.family		= NFPROTO_ARP,
+	.target		= arpt_mcmangle_tg,
+	.checkentry	= arpt_mcmangle_checkentry,
+	.destroy	= arpt_mcmangle_destroy,
+	.targetsize	= sizeof(struct arpt_mcmangle),
+	.hooks		= (1 << NF_ARP_OUT),
+	.me		= THIS_MODULE,
+};
+
+static int __init arpt_mcmangle_init(void)
+{
+	return xt_register_target(&arpt_mcmangle_reg);
+}
+
+static void __exit arpt_mcmangle_fini(void)
+{
+	xt_unregister_target(&arpt_mcmangle_reg);
+}
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arptables: arp multicast mangle target");
+module_init(arpt_mcmangle_init);
+module_exit(arpt_mcmangle_fini);


^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2009-02-11 14:55 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-28 14:58 [PATCH 1/3] netfilter: arptables: add mcmangle target Pablo Neira Ayuso
2009-01-28 14:58 ` [PATCH 2/3] netfilter: xtables: add PKTTYPE target Pablo Neira Ayuso
2009-01-28 16:11   ` Jan Engelhardt
2009-01-28 16:51     ` Pablo Neira Ayuso
2009-02-09 15:13   ` Patrick McHardy
2009-02-09 23:15     ` Pablo Neira Ayuso
2009-02-10 14:03       ` Patrick McHardy
2009-02-10 14:18         ` Jozsef Kadlecsik
2009-02-10 14:22           ` Patrick McHardy
2009-02-10 18:12             ` Pablo Neira Ayuso
2009-02-11 12:26               ` Patrick McHardy
2009-02-11 14:19                 ` Pablo Neira Ayuso
2009-02-11 14:35                   ` Patrick McHardy
2009-02-11 14:51                     ` Pablo Neira Ayuso
2009-02-11 14:54                       ` Patrick McHardy
2009-01-28 14:58 ` [PATCH 3/3] netfilter: xtables: add cluster match Pablo Neira Ayuso
2009-01-28 16:07   ` Jan Engelhardt
2009-01-28 16:38     ` Pablo Neira Ayuso
2009-02-09 15:25   ` Patrick McHardy
2009-02-09 23:23     ` Pablo Neira Ayuso
2009-02-09 15:11 ` [PATCH 1/3] netfilter: arptables: add mcmangle target Patrick McHardy
2009-02-09 23:13   ` Pablo Neira Ayuso
2009-02-10 11:16     ` Pablo Neira Ayuso
2009-02-05 17:23 Pablo Neira Ayuso
2009-02-05 17:26 Pablo Neira Ayuso
2009-02-05 17:41 ` Pablo Neira Ayuso
2009-02-06  7:41 Pablo Neira Ayuso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.