* [PATCH nf-next-2.6] netfilter: add xt_cpu match @ 2010-07-22 14:03 Eric Dumazet 2010-07-22 14:19 ` Jan Engelhardt 0 siblings, 1 reply; 11+ messages in thread From: Eric Dumazet @ 2010-07-22 14:03 UTC (permalink / raw) To: Patrick McHardy; +Cc: Netfilter Development Mailinglist, netdev This match is a bit strange, being packet content agnostic... Still, in some situations a CPU match permits a better spreading of connections, or select targets only for a given cpu. With Remote Packet Steering or multiqueue NIC and appropriate IRQ affinities, we can distribute trafic on available cpus, per session. (all RX packets for a given flow is handled by a given cpu) Some legacy applications being not SMP friendly, one way to scale a server is to run multiple copies of them. Instead of randomly choosing an instance, we can use the cpu number as a key so that softirq handler for a whole instance is running on a single cpu, maximizing cache effects in TCP/UDP stacks. Using NAT for example, a four ways machine might run four copies of server application, using a separate listening port for each instance, but still presenting an unique external port : iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ -j REDIRECT --to-port 8080 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ -j REDIRECT --to-port 8081 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ -j REDIRECT --to-port 8082 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ -j REDIRECT --to-port 8083 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> --- include/linux/netfilter/Kbuild | 1 include/linux/netfilter/xt_cpu.h | 8 +++ net/netfilter/Kconfig | 9 ++++ net/netfilter/Makefile | 1 net/netfilter/xt_cpu.c | 65 +++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+) diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index bb103f4..5c39a56 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -34,6 +34,7 @@ header-y += xt_helper.h header-y += xt_length.h header-y += xt_limit.h header-y += xt_mac.h +header-y += xt_cpu.h header-y += xt_mark.h header-y += xt_multiport.h header-y += xt_osf.h diff --git a/include/linux/netfilter/xt_cpu.h b/include/linux/netfilter/xt_cpu.h index e69de29..fdf4202 100644 --- a/include/linux/netfilter/xt_cpu.h +++ b/include/linux/netfilter/xt_cpu.h @@ -0,0 +1,8 @@ +#ifndef _XT_CPU_H +#define _XT_CPU_H + +struct xt_cpu_info { + unsigned int cpu; + int invert; +}; +#endif /*_XT_MAC_H*/ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa2f106..85b07bd 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -754,6 +754,15 @@ config NETFILTER_XT_MATCH_MAC To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e28420a..0fe7efd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index e69de29..23d5a76 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,65 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 -j REDIRECT --to-port 8080 + * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 -j REDIRECT --to-port 8081 + * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 -j REDIRECT --to-port 8082 + * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 -j REDIRECT --to-port 8083 + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_cpu.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); +MODULE_DESCRIPTION("Xtables: CPU match"); + +/* + * Yes, packet content is not interesting for us, we only take care + * of cpu handling this packet + */ +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + bool ret; + + ret = info->cpu == smp_processor_id(); + ret ^= info->invert; + return ret; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next-2.6] netfilter: add xt_cpu match 2010-07-22 14:03 [PATCH nf-next-2.6] netfilter: add xt_cpu match Eric Dumazet @ 2010-07-22 14:19 ` Jan Engelhardt 2010-07-22 15:18 ` Eric Dumazet 0 siblings, 1 reply; 11+ messages in thread From: Jan Engelhardt @ 2010-07-22 14:19 UTC (permalink / raw) To: Eric Dumazet; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev On Thursday 2010-07-22 16:03, Eric Dumazet wrote: >This match is a bit strange, being packet content agnostic... >+/* >+ * Yes, packet content is not interesting for us, we only take care >+ * of cpu handling this packet >+ */ That is not so strange after all, we have many packet agnostic matches: xt_time, xt_condition, xt_IDLETIMER, xt_iface. So this little comment looks a bit redundant. Or it seems that academia can't come up with enough new protocols in time that we have to resort to do -m coffeemaker :) >@@ -0,0 +1,8 @@ >+#ifndef _XT_CPU_H >+#define _XT_CPU_H >+ >+struct xt_cpu_info { >+ unsigned int cpu; >+ int invert; >+}; >+#endif /*_XT_MAC_H*/ Please take a read in "Writing Netfilter Modules" e-book :-) It will tell you that types other than fixed ones are a no-no. >diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile >index e28420a..0fe7efd 100644 >--- a/net/netfilter/Makefile >+++ b/net/netfilter/Makefile >@@ -79,6 +79,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o > obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o > obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o > obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o >+obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o > obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o > obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o > obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o Try to keep it alphabetic (KConfig too). >+ * >+ * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 -j REDIRECT --to-port 8080 >+ * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 -j REDIRECT --to-port 8081 >+ * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 -j REDIRECT --to-port 8082 >+ * iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 -j REDIRECT --to-port 8083 >+ * >+ */ Well the commands you already have presented in the commit log, and the most efficient place for these is actually the manpage. >+static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) >+{ >+ const struct xt_cpu_info *info = par->matchinfo; >+ bool ret; >+ >+ ret = info->cpu == smp_processor_id(); >+ ret ^= info->invert; >+ return ret; >+} Looks simple enough that it could do it in a single line, return (info->cpu == smp_processor_id()) ^ !!info->invert; ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next-2.6] netfilter: add xt_cpu match 2010-07-22 14:19 ` Jan Engelhardt @ 2010-07-22 15:18 ` Eric Dumazet 2010-07-22 15:39 ` Jan Engelhardt 2010-07-23 11:00 ` Patrick McHardy 0 siblings, 2 replies; 11+ messages in thread From: Eric Dumazet @ 2010-07-22 15:18 UTC (permalink / raw) To: Jan Engelhardt; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev Le jeudi 22 juillet 2010 à 16:19 +0200, Jan Engelhardt a écrit : > On Thursday 2010-07-22 16:03, Eric Dumazet wrote: > > >This match is a bit strange, being packet content agnostic... > >+/* > >+ * Yes, packet content is not interesting for us, we only take care > >+ * of cpu handling this packet > >+ */ > > That is not so strange after all, we have many packet agnostic matches: > xt_time, xt_condition, xt_IDLETIMER, xt_iface. > So this little comment looks a bit redundant. > > Or it seems that academia can't come up with enough new protocols in time that > we have to resort to do -m coffeemaker :) > > >@@ -0,0 +1,8 @@ > >+#ifndef _XT_CPU_H > >+#define _XT_CPU_H > >+ > >+struct xt_cpu_info { > >+ unsigned int cpu; > >+ int invert; > >+}; > >+#endif /*_XT_MAC_H*/ > > Please take a read in "Writing Netfilter Modules" e-book :-) > It will tell you that types other than fixed ones are a no-no. Ok, let's do that, but I doubt sizeof(int) can be different than 4 on a Linux 2.6 host right now. I prefer not doing the !!info->invert, and do the check only once. Thanks [PATCH nf-next-2.6] netfilter: add xt_cpu match In some situations a CPU match permits a better spreading of connections, or select targets only for a given cpu. With Remote Packet Steering or multiqueue NIC and appropriate IRQ affinities, we can distribute trafic on available cpus, per session. (all RX packets for a given flow is handled by a given cpu) Some legacy applications being not SMP friendly, one way to scale a server is to run multiple copies of them. Instead of randomly choosing an instance, we can use the cpu number as a key so that softirq handler for a whole instance is running on a single cpu, maximizing cache effects in TCP/UDP stacks. Using NAT for example, a four ways machine might run four copies of server application, using a separate listening port for each instance, but still presenting an unique external port : iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ -j REDIRECT --to-port 8080 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ -j REDIRECT --to-port 8081 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ -j REDIRECT --to-port 8082 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ -j REDIRECT --to-port 8083 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> --- include/linux/netfilter/Kbuild | 3 - include/linux/netfilter/xt_cpu.h | 11 +++++ net/netfilter/Kconfig | 9 ++++ net/netfilter/Makefile | 1 net/netfilter/xt_cpu.c | 63 +++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index bb103f4..1041a1d 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -19,12 +19,13 @@ header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h +header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h -header-y += xt_cluster.h +header-y += xt_cpu.h header-y += xt_dccp.h header-y += xt_dscp.h header-y += xt_esp.h diff --git a/include/linux/netfilter/xt_cpu.h b/include/linux/netfilter/xt_cpu.h index e69de29..93c7f11 100644 --- a/include/linux/netfilter/xt_cpu.h +++ b/include/linux/netfilter/xt_cpu.h @@ -0,0 +1,11 @@ +#ifndef _XT_CPU_H +#define _XT_CPU_H + +#include <linux/types.h> + +struct xt_cpu_info { + __u32 cpu; + __u32 invert; +}; + +#endif /*_XT_CPU_H*/ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa2f106..523e8d0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -647,6 +647,15 @@ config NETFILTER_XT_MATCH_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e28420a..6da84c3 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index e69de29..b39db8a 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,63 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_cpu.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); +MODULE_DESCRIPTION("Xtables: CPU match"); + +static int cpu_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + return 0; +} + +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + return (info->cpu == smp_processor_id()) ^ info->invert; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cpu_mt_check, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next-2.6] netfilter: add xt_cpu match 2010-07-22 15:18 ` Eric Dumazet @ 2010-07-22 15:39 ` Jan Engelhardt 2010-07-22 16:24 ` Eric Dumazet 2010-07-23 11:00 ` Patrick McHardy 1 sibling, 1 reply; 11+ messages in thread From: Jan Engelhardt @ 2010-07-22 15:39 UTC (permalink / raw) To: Eric Dumazet; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev On Thursday 2010-07-22 17:18, Eric Dumazet wrote: >Le jeudi 22 juillet 2010 à 16:19 +0200, Jan Engelhardt a écrit : >> On Thursday 2010-07-22 16:03, Eric Dumazet wrote: >> >> >This match is a bit strange, being packet content agnostic... >> >+/* >> >+ * Yes, packet content is not interesting for us, we only take care >> >+ * of cpu handling this packet >> >+ */ >> >> That is not so strange after all, we have many packet agnostic matches: >> xt_time, xt_condition, xt_IDLETIMER, xt_iface. >> So this little comment looks a bit redundant. >> >> Or it seems that academia can't come up with enough new protocols in time that >> we have to resort to do -m coffeemaker :) >> >> >@@ -0,0 +1,8 @@ >> >+#ifndef _XT_CPU_H >> >+#define _XT_CPU_H >> >+ >> >+struct xt_cpu_info { >> >+ unsigned int cpu; >> >+ int invert; >> >+}; >> >+#endif /*_XT_MAC_H*/ >> >> Please take a read in "Writing Netfilter Modules" e-book :-) >> It will tell you that types other than fixed ones are a no-no. > >Ok, let's do that, but I doubt sizeof(int) can be different than 4 on a >Linux 2.6 host right now. Never say never. "long" already bit people in the past, and now we have that CONFIG_COMPAT stuff. If invert is the only flag, perhaps it makes sense to use __u8 for it. >I prefer not doing the !!info->invert, and do the check only once. >+static int cpu_mt_check(const struct xt_mtchk_param *par) >+{ >+ const struct xt_cpu_info *info = par->matchinfo; >+ >+ if (info->invert & ~1) >+ return -EINVAL; >+ return 0; >+} >+ >+static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) >+{ >+ const struct xt_cpu_info *info = par->matchinfo; >+ >+ return (info->cpu == smp_processor_id()) ^ info->invert; >+} That works nicely indeed. Do you anticipate any future flags? ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next-2.6] netfilter: add xt_cpu match 2010-07-22 15:39 ` Jan Engelhardt @ 2010-07-22 16:24 ` Eric Dumazet 0 siblings, 0 replies; 11+ messages in thread From: Eric Dumazet @ 2010-07-22 16:24 UTC (permalink / raw) To: Jan Engelhardt; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev Le jeudi 22 juillet 2010 à 17:39 +0200, Jan Engelhardt a écrit : > Never say never. "long" already bit people in the past, and now we > have that CONFIG_COMPAT stuff. > I know pretty well the "long" problem, I received one of the first alpha machine ever built in the world (DEC 3000 AXP, with a fast 133 MHz cpu ;) ), before I began to use Linux :) > If invert is the only flag, perhaps it makes sense to use __u8 > for it. > Quite frankly it brings more problems than plain u32 - Possible security problems (padding bytes). Not applicable to iptables. - Some arches have slow byte/short accesses (21064 for example :) ) "int" is the natural type, fast on all arches. - Given alignment requirements of iptables rules, using less than 32bits here saves no ram. But I dont care that much. I even see compiler doesnt want to use a XOR instruction : 00000018 <cpu_mt>: 18: 55 push %ebp 19: 8b 42 04 mov 0x4(%edx),%eax 1c: 64 8b 15 00 00 00 00 mov %fs:0x0,%edx 23: 89 e5 mov %esp,%ebp 25: 5d pop %ebp 26: 39 10 cmp %edx,(%eax) 28: 0f 94 c2 sete %dl 2b: 0f b6 d2 movzbl %dl,%edx 2e: 3b 50 04 cmp 0x4(%eax),%edx 31: 0f 95 c0 setne %al 34: c3 ret -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH nf-next-2.6] netfilter: add xt_cpu match 2010-07-22 15:18 ` Eric Dumazet 2010-07-22 15:39 ` Jan Engelhardt @ 2010-07-23 11:00 ` Patrick McHardy 2010-07-23 13:43 ` [PATCH iptables] extension: " Eric Dumazet 1 sibling, 1 reply; 11+ messages in thread From: Patrick McHardy @ 2010-07-23 11:00 UTC (permalink / raw) To: Eric Dumazet; +Cc: Jan Engelhardt, Netfilter Development Mailinglist, netdev Am 22.07.2010 17:18, schrieb Eric Dumazet: > [PATCH nf-next-2.6] netfilter: add xt_cpu match > > In some situations a CPU match permits a better spreading of > connections, or select targets only for a given cpu. > > With Remote Packet Steering or multiqueue NIC and appropriate IRQ > affinities, we can distribute trafic on available cpus, per session. > (all RX packets for a given flow is handled by a given cpu) > > Some legacy applications being not SMP friendly, one way to scale a > server is to run multiple copies of them. > > Instead of randomly choosing an instance, we can use the cpu number as a > key so that softirq handler for a whole instance is running on a single > cpu, maximizing cache effects in TCP/UDP stacks. > > Using NAT for example, a four ways machine might run four copies of > server application, using a separate listening port for each instance, > but still presenting an unique external port : > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ > -j REDIRECT --to-port 8080 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ > -j REDIRECT --to-port 8081 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ > -j REDIRECT --to-port 8082 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ > -j REDIRECT --to-port 8083 > Applied, thanks Eric. ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH iptables] extension: add xt_cpu match 2010-07-23 11:00 ` Patrick McHardy @ 2010-07-23 13:43 ` Eric Dumazet 2010-07-23 14:13 ` Patrick McHardy 2010-07-23 16:46 ` Jan Engelhardt 0 siblings, 2 replies; 11+ messages in thread From: Eric Dumazet @ 2010-07-23 13:43 UTC (permalink / raw) To: Patrick McHardy; +Cc: Jan Engelhardt, Netfilter Development Mailinglist, netdev Patrick, Here is iptables extension for xt_cpu match. I put same changelog than kernel one, tell me if its ok or not ;) Thanks [PATCH iptables] extension: add xt_cpu match Kernel 2.6.36 supports xt_cpu match In some situations a CPU match permits a better spreading of connections, or select targets only for a given cpu. With Remote Packet Steering or multiqueue NIC and appropriate IRQ affinities, we can distribute trafic on available cpus, per session. (all RX packets for a given flow are handled by a given cpu) Some legacy applications being not SMP friendly, one way to scale a server is to run multiple copies of them. Instead of randomly choosing an instance, we can use the cpu number as a key so that softirq handler for a whole instance is running on a single cpu, maximizing cache effects in TCP/UDP stacks. Using NAT for example, a four ways machine might run four copies of server application, using a separate listening port for each instance, but still presenting an unique external port : iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ -j REDIRECT --to-port 8080 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ -j REDIRECT --to-port 8081 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ -j REDIRECT --to-port 8082 iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ -j REDIRECT --to-port 8083 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> --- extensions/libxt_cpu.c | 98 +++++++++++++++++++++++++++++ extensions/libxt_cpu.man | 16 ++++ include/linux/netfilter/xt_cpu.h | 11 +++ 3 files changed, 125 insertions(+) diff --git a/extensions/libxt_cpu.c b/extensions/libxt_cpu.c index e69de29..869998d 100644 --- a/extensions/libxt_cpu.c +++ b/extensions/libxt_cpu.c @@ -0,0 +1,98 @@ +/* Shared library add-on to iptables to add CPU match support. */ +#include <stdio.h> +#include <netdb.h> +#include <string.h> +#include <stdlib.h> +#include <getopt.h> +#include <xtables.h> +#include <linux/netfilter/xt_cpu.h> + +static void cpu_help(void) +{ + printf( +"cpu match options:\n" +"[!] --cpu number Match CPU number\n"); +} + +static const struct option cpu_opts[] = { + { "cpu", 1, NULL, '1' }, + { .name = NULL } +}; + +static void +parse_cpu(const char *s, struct xt_cpu_info *info) +{ + unsigned int cpu; + char *end; + + if (!xtables_strtoui(s, &end, &cpu, 0, UINT32_MAX)) + xtables_param_act(XTF_BAD_VALUE, "cpu", "--cpu", s); + + if (*end != '\0') + xtables_param_act(XTF_BAD_VALUE, "cpu", "--cpu", s); + + info->cpu = cpu; +} + +static int +cpu_parse(int c, char **argv, int invert, unsigned int *flags, + const void *entry, struct xt_entry_match **match) +{ + struct xt_cpu_info *cpuinfo = (struct xt_cpu_info *)(*match)->data; + + switch (c) { + case '1': + xtables_check_inverse(optarg, &invert, &optind, 0, argv); + parse_cpu(optarg, cpuinfo); + if (invert) + cpuinfo->invert = 1; + *flags = 1; + break; + + default: + return 0; + } + + return 1; +} + +static void cpu_check(unsigned int flags) +{ + if (!flags) + xtables_error(PARAMETER_PROBLEM, + "You must specify `--cpu'"); +} + +static void +cpu_print(const void *ip, const struct xt_entry_match *match, int numeric) +{ + const struct xt_cpu_info *info = (void *)match->data; + + printf("cpu %s%u ", info->invert ? "! ":"", info->cpu); +} + +static void cpu_save(const void *ip, const struct xt_entry_match *match) +{ + const struct xt_cpu_info *info = (void *)match->data; + + printf("%s--cpu %u ", info->invert ? "! ":"", info->cpu); +} + +static struct xtables_match cpu_match = { + .family = NFPROTO_UNSPEC, + .name = "cpu", + .version = XTABLES_VERSION, + .size = XT_ALIGN(sizeof(struct xt_cpu_info)), + .userspacesize = XT_ALIGN(sizeof(struct xt_cpu_info)), + .help = cpu_help, + .parse = cpu_parse, + .final_check = cpu_check, + .print = cpu_print, + .save = cpu_save, + .extra_opts = cpu_opts, +}; + +void _init(void) +{ + xtables_register_match(&cpu_match); +} diff --git a/extensions/libxt_cpu.man b/extensions/libxt_cpu.man index e69de29..f42ac7a 100644 --- a/extensions/libxt_cpu.man +++ b/extensions/libxt_cpu.man @@ -0,0 +1,16 @@ +.TP +[\fB!\fP] \fB\-\-cpu\fP \fInumber\fP + +Match cpu handling this packet. cpus are numbered from 0 to NR_CPUS-1 +Can be used in combination with RPS (Remote Packet Steering) or +multiqueue NICS to spread network traffic on different queues. +.PP +Example: +.PP +iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 0 + \-j REDIRECT \-\-to\-port 8080 +.PP +iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 1 + \-j REDIRECT \-\-to\-port 8081 +.PP +Available since linux 2.6.36 diff --git a/include/linux/netfilter/xt_cpu.h b/include/linux/netfilter/xt_cpu.h index e69de29..93c7f11 100644 --- a/include/linux/netfilter/xt_cpu.h +++ b/include/linux/netfilter/xt_cpu.h @@ -0,0 +1,11 @@ +#ifndef _XT_CPU_H +#define _XT_CPU_H + +#include <linux/types.h> + +struct xt_cpu_info { + __u32 cpu; + __u32 invert; +}; + +#endif /*_XT_CPU_H*/ ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH iptables] extension: add xt_cpu match 2010-07-23 13:43 ` [PATCH iptables] extension: " Eric Dumazet @ 2010-07-23 14:13 ` Patrick McHardy 2010-07-23 16:46 ` Jan Engelhardt 1 sibling, 0 replies; 11+ messages in thread From: Patrick McHardy @ 2010-07-23 14:13 UTC (permalink / raw) To: Eric Dumazet; +Cc: Jan Engelhardt, Netfilter Development Mailinglist, netdev On 23.07.2010 15:43, Eric Dumazet wrote: > extension: add xt_cpu match > > Kernel 2.6.36 supports xt_cpu match > > In some situations a CPU match permits a better spreading of > connections, or select targets only for a given cpu. > > With Remote Packet Steering or multiqueue NIC and appropriate IRQ > affinities, we can distribute trafic on available cpus, per session. > (all RX packets for a given flow are handled by a given cpu) > > Some legacy applications being not SMP friendly, one way to scale a > server is to run multiple copies of them. > > Instead of randomly choosing an instance, we can use the cpu number as a > key so that softirq handler for a whole instance is running on a single > cpu, maximizing cache effects in TCP/UDP stacks. > > Using NAT for example, a four ways machine might run four copies of > server application, using a separate listening port for each instance, > but still presenting an unique external port : > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \ > -j REDIRECT --to-port 8080 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \ > -j REDIRECT --to-port 8081 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \ > -j REDIRECT --to-port 8082 > > iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \ > -j REDIRECT --to-port 8083 > Applied to the iptables-next branch, thanks Eric. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH iptables] extension: add xt_cpu match 2010-07-23 13:43 ` [PATCH iptables] extension: " Eric Dumazet 2010-07-23 14:13 ` Patrick McHardy @ 2010-07-23 16:46 ` Jan Engelhardt 2010-07-23 17:30 ` Eric Dumazet 1 sibling, 1 reply; 11+ messages in thread From: Jan Engelhardt @ 2010-07-23 16:46 UTC (permalink / raw) To: Eric Dumazet; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev On Friday 2010-07-23 15:43, Eric Dumazet wrote: >+ >+static const struct option cpu_opts[] = { >+ { "cpu", 1, NULL, '1' }, >+ { .name = NULL } >+}; I will never understand that sort of style mix logic. Why the C99 initializer only on the sentinel? { {.name = "cpu", .has_arg = true, .val = '1'}, {NULL}, }; >+cpu_print(const void *ip, const struct xt_entry_match *match, int numeric) >+{ >+ const struct xt_cpu_info *info = (void *)match->data; >+ >+ printf("cpu %s%u ", info->invert ? "! ":"", info->cpu); >+} >+ >+static void cpu_save(const void *ip, const struct xt_entry_match *match) >+{ >+ const struct xt_cpu_info *info = (void *)match->data; >+ >+ printf("%s--cpu %u ", info->invert ? "! ":"", info->cpu); >+} Using if (info->invert) would save the empty string. >diff --git a/extensions/libxt_cpu.man b/extensions/libxt_cpu.man >index e69de29..f42ac7a 100644 >--- a/extensions/libxt_cpu.man >+++ b/extensions/libxt_cpu.man >@@ -0,0 +1,16 @@ >+.TP >+[\fB!\fP] \fB\-\-cpu\fP \fInumber\fP >+ >+Match cpu handling this packet. cpus are numbered from 0 to NR_CPUS-1 Unwanted blank line. >+Can be used in combination with RPS (Remote Packet Steering) or >+multiqueue NICS to spread network traffic on different queues. >+.PP >+Example: >+.PP >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 0 >+ \-j REDIRECT \-\-to\-port 8080 Unwanted indent. >+.PP >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 1 >+ \-j REDIRECT \-\-to\-port 8081 >+.PP >+Available since linux 2.6.36 Linux. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH iptables] extension: add xt_cpu match 2010-07-23 16:46 ` Jan Engelhardt @ 2010-07-23 17:30 ` Eric Dumazet 2010-07-23 17:53 ` Jan Engelhardt 0 siblings, 1 reply; 11+ messages in thread From: Eric Dumazet @ 2010-07-23 17:30 UTC (permalink / raw) To: Jan Engelhardt; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev Le vendredi 23 juillet 2010 à 18:46 +0200, Jan Engelhardt a écrit : > On Friday 2010-07-23 15:43, Eric Dumazet wrote: > >+ > >+static const struct option cpu_opts[] = { > >+ { "cpu", 1, NULL, '1' }, > >+ { .name = NULL } > >+}; > > I will never understand that sort of style mix logic. Why the > C99 initializer only on the sentinel? > > { > {.name = "cpu", .has_arg = true, .val = '1'}, > {NULL}, > }; > copy/paste from another module ? > >+cpu_print(const void *ip, const struct xt_entry_match *match, int numeric) > >+{ > >+ const struct xt_cpu_info *info = (void *)match->data; > >+ > >+ printf("cpu %s%u ", info->invert ? "! ":"", info->cpu); > >+} > >+ > >+static void cpu_save(const void *ip, const struct xt_entry_match *match) > >+{ > >+ const struct xt_cpu_info *info = (void *)match->data; > >+ > >+ printf("%s--cpu %u ", info->invert ? "! ":"", info->cpu); > >+} > > Using if (info->invert) would save the empty string. > Not sure what you mean. You want to save an empty string (1 byte long), and add multiple printf() calls ? > >diff --git a/extensions/libxt_cpu.man b/extensions/libxt_cpu.man > >index e69de29..f42ac7a 100644 > >--- a/extensions/libxt_cpu.man > >+++ b/extensions/libxt_cpu.man > >@@ -0,0 +1,16 @@ > >+.TP > >+[\fB!\fP] \fB\-\-cpu\fP \fInumber\fP > >+ > >+Match cpu handling this packet. cpus are numbered from 0 to NR_CPUS-1 > > Unwanted blank line. > > >+Can be used in combination with RPS (Remote Packet Steering) or > >+multiqueue NICS to spread network traffic on different queues. > >+.PP > >+Example: > >+.PP > >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 0 > >+ \-j REDIRECT \-\-to\-port 8080 > > Unwanted indent. > > >+.PP > >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 1 > >+ \-j REDIRECT \-\-to\-port 8081 > >+.PP > >+Available since linux 2.6.36 > > Linux. OK ;) I'll provide a cleanup patch, not only to xt_cpu but all other iptables modules that dont meet your coding style requirements ;) Thanks -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH iptables] extension: add xt_cpu match 2010-07-23 17:30 ` Eric Dumazet @ 2010-07-23 17:53 ` Jan Engelhardt 0 siblings, 0 replies; 11+ messages in thread From: Jan Engelhardt @ 2010-07-23 17:53 UTC (permalink / raw) To: Eric Dumazet; +Cc: Patrick McHardy, Netfilter Development Mailinglist, netdev On Friday 2010-07-23 19:30, Eric Dumazet wrote: >> >+ >> >+static const struct option cpu_opts[] = { >> >+ { "cpu", 1, NULL, '1' }, >> >+ { .name = NULL } >> >+}; >> >> I will never understand that sort of style mix logic. Why the >> C99 initializer only on the sentinel? >> >> { >> {.name = "cpu", .has_arg = true, .val = '1'}, >> {NULL}, >> }; >> > >copy/paste from another module ? > > >> >diff --git a/extensions/libxt_cpu.man b/extensions/libxt_cpu.man >> >index e69de29..f42ac7a 100644 >> >--- a/extensions/libxt_cpu.man >> >+++ b/extensions/libxt_cpu.man >> >@@ -0,0 +1,16 @@ >> >+.TP >> >+[\fB!\fP] \fB\-\-cpu\fP \fInumber\fP >> >+ >> >+Match cpu handling this packet. cpus are numbered from 0 to NR_CPUS-1 >> >> Unwanted blank line. >> >> >+Can be used in combination with RPS (Remote Packet Steering) or >> >+multiqueue NICS to spread network traffic on different queues. >> >+.PP >> >+Example: >> >+.PP >> >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 0 >> >+ \-j REDIRECT \-\-to\-port 8080 >> >> Unwanted indent. >> >> >+.PP >> >+iptables \-t nat \-A PREROUTING \-p tcp \-\-dport 80 \-m cpu \-\-cpu 1 >> >+ \-j REDIRECT \-\-to\-port 8081 >> >+.PP >> >+Available since linux 2.6.36 >> >> Linux. > > >OK ;) > >I'll provide a cleanup patch, not only to xt_cpu but all other iptables >modules that dont meet your coding style requirements ;) Well nah I'm already on it myself, given Patrick has already imported the patches. ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-07-23 17:53 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2010-07-22 14:03 [PATCH nf-next-2.6] netfilter: add xt_cpu match Eric Dumazet 2010-07-22 14:19 ` Jan Engelhardt 2010-07-22 15:18 ` Eric Dumazet 2010-07-22 15:39 ` Jan Engelhardt 2010-07-22 16:24 ` Eric Dumazet 2010-07-23 11:00 ` Patrick McHardy 2010-07-23 13:43 ` [PATCH iptables] extension: " Eric Dumazet 2010-07-23 14:13 ` Patrick McHardy 2010-07-23 16:46 ` Jan Engelhardt 2010-07-23 17:30 ` Eric Dumazet 2010-07-23 17:53 ` Jan Engelhardt
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.