All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
@ 2011-07-01 14:44 Rainer Weikusat
  2011-07-18 16:21 ` Patrick McHardy
  0 siblings, 1 reply; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-01 14:44 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>

Presently, the nfnetlink_log.c file contains only very nominal support
for network namespaces: While it is possible to create sockets which
should theoretically receive NFLOG originated messages in arbitrary
network namespaces, there is only one table of nfulnl_instance
structures in the kernel and all log messages sent via __nfulnl_send
are forced into the init_net namespace so that only sockets created
in this namespace will ever actually receive log data. Likewise, the
nfulnl_rcv_nl_event notification callback won't destroy logging
instances created by processes in other network namespace upon process
death. The patch included below changes the code to use a logging
instance table per network namespace, to send messages generated from
within a specific namespace to sockets also belonging to this
namespace and to destroy logging instances created from other network
namespaces than init_net when cleaning up after a logging process
terminated. It doesn't touch the code dealing with nfnetlink_log /proc
files which thus remain restricted to the init_net namespace because
this isn't really needed in order to get per-namespace logging and
would require changes to other files, in particular, nf_log.c

Signed-Off-By: Rainer Weikusat <rweikusat@mobileactivedefense.com>
---
This is a feature needed for the main product of my present employer
and the patch is published here in the hope that it is more generally
useful as well. A more thorough change of the logging infrastructure
is unforunately way beyond the amount of time I'm allowed to spend on
this.

diff -prNu nf-2.6/net/netfilter/nfnetlink_log.c nf-2.6.patched//net/netfilter/nfnetlink_log.c
--- nf-2.6/net/netfilter/nfnetlink_log.c	2011-07-01 14:08:21.833369919 +0100
+++ nf-2.6.patched//net/netfilter/nfnetlink_log.c	2011-07-01 14:57:01.277536330 +0100
@@ -39,6 +39,12 @@
 #include "../bridge/br_private.h"
 #endif
 
+#ifdef CONFIG_NET_NS
+#define NET_NS 1
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#endif
+
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
@@ -47,6 +53,18 @@
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
 
+#define INSTANCE_BUCKETS	16
+
+struct nfulnl_instances {
+	spinlock_t lock;
+	atomic_t global_seq;
+	struct hlist_head table[INSTANCE_BUCKETS];
+	unsigned hash_init;
+#ifdef NET_NS
+	struct net *net;
+#endif
+};
+
 struct nfulnl_instance {
 	struct hlist_node hlist;	/* global list of instances */
 	spinlock_t lock;
@@ -67,14 +85,92 @@ struct nfulnl_instance {
 	u_int16_t flags;
 	u_int8_t copy_mode;
 	struct rcu_head rcu;
+#ifdef NET_NS
+	struct nfulnl_instances *instances;
+#endif
 };
 
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
+#ifndef NET_NS
+static struct nfulnl_instances instances;
 
-#define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
-static unsigned int hash_init;
+static inline struct nfulnl_instances *
+instances_via_inst(struct nfulnl_instance *inst)
+{
+	(void)inst;
+	return &instances;
+}
+
+static inline struct nfulnl_instances *
+instances_via_netlink_notify(struct netlink_notify *n)
+{
+	(void)n;
+	return &instances;
+}
+
+static inline struct nfulnl_instances *
+instances_via_skb(struct sk_buff const *skb)
+{
+	(void)skb;
+	return &instances;
+}
+
+static inline struct net *inst_net(struct nfulnl_instance *inst)
+{
+	(void)inst;
+	return &init_net;
+}
+#else
+static int nfulnl_net_id;
+
+static inline struct nfulnl_instances *instances_via_net(struct net *net)
+{
+	return net_generic(net, nfulnl_net_id);
+}
+
+static inline struct nfulnl_instances *
+instances_via_inst(struct nfulnl_instance *inst)
+{
+	return inst->instances;
+}
+
+static inline struct nfulnl_instances *
+instances_via_netlink_notify(struct netlink_notify *n)
+{
+	return instances_via_net(n->net);
+}
+
+static struct nfulnl_instances *instances_via_skb(struct sk_buff const *skb)
+{
+	struct sock *sk;
+	struct net_device *dev;
+	struct net *net;
+
+	net = NULL;
+
+	sk = skb->sk;
+	if (sk)
+		net = sock_net(sk);
+
+	if (!net) {
+		dev = skb->dev;
+		if (dev)
+			net = dev_net(dev);
+	}
+
+	if (!net) {
+		PRINTR(KERN_WARNING "%s: could determine net ns for %p\n",
+		       __func__, skb);
+		return NULL;
+	}
+
+	return instances_via_net(net);
+}
+
+static inline struct net *inst_net(struct nfulnl_instance *inst)
+{
+	return instances_via_inst(inst)->net;
+}
+#endif
 
 static inline u_int8_t instance_hashfn(u_int16_t group_num)
 {
@@ -82,13 +178,13 @@ static inline u_int8_t instance_hashfn(u
 }
 
 static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct hlist_head *head;
 	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(group_num)];
+	head = &instances->table[instance_hashfn(group_num)];
 	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
@@ -103,12 +199,15 @@ instance_get(struct nfulnl_instance *ins
 }
 
 static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
+	if (!instances)
+		return NULL;
+
 	rcu_read_lock_bh();
-	inst = __instance_lookup(group_num);
+	inst = __instance_lookup(instances, group_num);
 	if (inst && !atomic_inc_not_zero(&inst->use))
 		inst = NULL;
 	rcu_read_unlock_bh();
@@ -132,13 +231,14 @@ instance_put(struct nfulnl_instance *ins
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid)
+instance_create(struct nfulnl_instances *instances,
+		u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
 	int err;
 
-	spin_lock_bh(&instances_lock);
-	if (__instance_lookup(group_num)) {
+	spin_lock_bh(&instances->lock);
+	if (__instance_lookup(instances, group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -172,14 +272,17 @@ instance_create(u_int16_t group_num, int
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
 	hlist_add_head_rcu(&inst->hlist,
-		       &instance_table[instance_hashfn(group_num)]);
+			   &instances->table[instance_hashfn(group_num)]);
 
-	spin_unlock_bh(&instances_lock);
+#ifdef NET_NS
+	inst->instances = instances;
+#endif
+	spin_unlock_bh(&instances->lock);
 
 	return inst;
 
 out_unlock:
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 	return ERR_PTR(err);
 }
 
@@ -208,16 +311,17 @@ __instance_destroy(struct nfulnl_instanc
 }
 
 static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfulnl_instances *instances,
+		 struct nfulnl_instance *inst)
 {
-	spin_lock_bh(&instances_lock);
+	spin_lock_bh(&instances->lock);
 	__instance_destroy(inst);
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 }
 
 static int
 nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
-		  unsigned int range)
+		unsigned int range)
 {
 	int status = 0;
 
@@ -308,7 +412,7 @@ nfulnl_alloc_skb(unsigned int inst_size,
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
 		pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
-			inst_size);
+			  inst_size);
 
 		if (n > pkt_size) {
 			/* try to allocate only as much as we need for current
@@ -334,7 +438,7 @@ __nfulnl_send(struct nfulnl_instance *in
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+	status = nfnetlink_unicast(inst->skb, inst_net(inst), inst->peer_pid,
 				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
@@ -368,6 +472,11 @@ nfulnl_timer(unsigned long data)
 
 /* This is an inline function, we don't really care about a long
  * list of arguments */
+static inline atomic_t *global_seq_for(nfulnl_instances *inst)
+{
+	return &instances_via_inst(inst)->global_seq;
+}
+
 static inline int
 __build_packet_message(struct nfulnl_instance *inst,
 			const struct sk_buff *skb,
@@ -505,7 +614,7 @@ __build_packet_message(struct nfulnl_ins
 	/* global sequence number */
 	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
 		NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
-			     htonl(atomic_inc_return(&global_seq)));
+			     htonl(atomic_inc_return(global_seq_for(inst)));
 
 	if (data_len) {
 		struct nlattr *nla;
@@ -567,7 +676,8 @@ nfulnl_log_packet(u_int8_t pf,
 	else
 		li = &default_loginfo;
 
-	inst = instance_lookup_get(li->u.ulog.group);
+	inst = instance_lookup_get(instances_via_skb(skb),
+				   li->u.ulog.group);
 	if (!inst)
 		return;
 
@@ -675,27 +785,29 @@ EXPORT_SYMBOL_GPL(nfulnl_log_packet);
 
 static int
 nfulnl_rcv_nl_event(struct notifier_block *this,
-		   unsigned long event, void *ptr)
+		    unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfulnl_instances *instances;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
+		instances = instances_via_netlink_notify(n);
+
 		/* destroy all instances for this pid */
-		spin_lock_bh(&instances_lock);
+		spin_lock_bh(&instances->lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &instances->table[i];
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((net_eq(n->net, &init_net)) &&
-				    (n->pid == inst->peer_pid))
+				if (n->pid == inst->peer_pid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances->lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -734,6 +846,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
+	struct nfulnl_instances *instances;
 	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
@@ -752,7 +865,11 @@ nfulnl_recv_config(struct sock *ctnl, st
 		}
 	}
 
-	inst = instance_lookup_get(group_num);
+	instances = instances_via_skb(skb);
+	if (!instances)
+		return -ENODEV;
+
+	inst = instance_lookup_get(instances, group_num);
 	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
 		goto out_put;
@@ -766,7 +883,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out_put;
 			}
 
-			inst = instance_create(group_num,
+			inst = instance_create(instances, group_num,
 					       NETLINK_CB(skb).pid);
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
@@ -779,7 +896,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out;
 			}
 
-			instance_destroy(inst);
+			instance_destroy(instances, inst);
 			goto out_put;
 		default:
 			ret = -ENOTSUPP;
@@ -862,17 +979,30 @@ static const struct nfnetlink_subsystem
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct nfulnl_instances *instances;
 	unsigned int bucket;
 };
 
+static inline struct nfulnl_instances *instances_for_seq(void)
+{
+#ifdef NET_NS
+	return instances_via_net(&init_net);
+#else
+	return &instances;
+#endif
+}
+
 static struct hlist_node *get_first(struct iter_state *st)
 {
 	if (!st)
 		return NULL;
 
+	st->instances = instances_for_seq();
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		if (!hlist_empty(&st->instances->table[st->bucket]))
+			return rcu_dereference_bh(
+				hlist_first_rcu(
+					&st->instances->table[st->bucket]));
 	}
 	return NULL;
 }
@@ -884,7 +1014,8 @@ static struct hlist_node *get_next(struc
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		h = rcu_dereference_bh(
+			hlist_first_rcu(&st->instances->table[st->bucket]));
 	}
 	return h;
 }
@@ -953,17 +1084,69 @@ static const struct file_operations nful
 
 #endif /* PROC_FS */
 
+#ifdef NET_NS
+static int nfulnl_net_init(struct net *net)
+{
+	struct nfulnl_instances *instances;
+	int i;
+
+	instances = instances_via_net(net);
+	instances->net = net;
+	spin_lock_init(&instances->lock);
+
+	i = 0;
+	while (i < INSTANCE_BUCKETS) {
+		INIT_HLIST_HEAD(instances->table + i);
+		++i;
+	}
+
+	return 0;
+}
+
+static void nfulnl_net_exit(struct net *net)
+{
+	struct nfulnl_instances *instances;
+	int i;
+
+	instances = instances_via_net(net);
+
+	i = 0;
+	while (i < INSTANCE_BUCKETS) {
+		if (!hlist_empty(instances->table + i))
+			printk(KERN_WARNING "%s: slot %d not empty\n",
+			       __func__, i);
+		++i;
+	}
+}
+
+static struct pernet_operations nfulnl_net_ops = {
+	.init =		nfulnl_net_init,
+	.exit =		nfulnl_net_exit,
+	.id =		&nfulnl_net_id,
+	.size =		sizeof(struct nfulnl_instances)
+};
+#endif /* NET_NS */
+
 static int __init nfnetlink_log_init(void)
 {
-	int i, status = -ENOMEM;
+	int status = -ENOMEM;
+
+#ifndef NET_NS
+	int i;
 
+	spin_lock_init(&instances.lock);
 	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+		INIT_HLIST_HEAD(&instances.table[i]);
 
 	/* it's not really all that important to have a random value, so
 	 * we can do this from the init function, even if there hasn't
 	 * been that much entropy yet */
-	get_random_bytes(&hash_init, sizeof(hash_init));
+	get_random_bytes(&instances.hash_init, sizeof(instances.hash_init));
+#else
+	status = register_pernet_subsys(&nfulnl_net_ops);
+	if (status)
+		return status;
+#endif
 
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -998,6 +1181,10 @@ cleanup_netlink_notifier:
 
 static void __exit nfnetlink_log_fini(void)
 {
+#ifdef NET_NS
+	unregister_pernet_subsys(&nfulnl_net_ops);
+#endif
+
 	nf_log_unregister(&nfulnl_logger);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nfnetlink_log", proc_net_netfilter);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-01 14:44 [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c Rainer Weikusat
@ 2011-07-18 16:21 ` Patrick McHardy
  2011-07-18 17:56   ` Rainer Weikusat
  0 siblings, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2011-07-18 16:21 UTC (permalink / raw)
  To: Rainer Weikusat; +Cc: netfilter-devel, linux-kernel

On 01.07.2011 16:44, Rainer Weikusat wrote:
> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
> 
> Presently, the nfnetlink_log.c file contains only very nominal support
> for network namespaces: While it is possible to create sockets which
> should theoretically receive NFLOG originated messages in arbitrary
> network namespaces, there is only one table of nfulnl_instance
> structures in the kernel and all log messages sent via __nfulnl_send
> are forced into the init_net namespace so that only sockets created
> in this namespace will ever actually receive log data. Likewise, the
> nfulnl_rcv_nl_event notification callback won't destroy logging
> instances created by processes in other network namespace upon process
> death. The patch included below changes the code to use a logging
> instance table per network namespace, to send messages generated from
> within a specific namespace to sockets also belonging to this
> namespace and to destroy logging instances created from other network
> namespaces than init_net when cleaning up after a logging process
> terminated. It doesn't touch the code dealing with nfnetlink_log /proc
> files which thus remain restricted to the init_net namespace because
> this isn't really needed in order to get per-namespace logging and
> would require changes to other files, in particular, nf_log.c
> 
> Signed-Off-By: Rainer Weikusat <rweikusat@mobileactivedefense.com>
> ---
> This is a feature needed for the main product of my present employer
> and the patch is published here in the hope that it is more generally
> useful as well. A more thorough change of the logging infrastructure
> is unforunately way beyond the amount of time I'm allowed to spend on
> this.
> 
> diff -prNu nf-2.6/net/netfilter/nfnetlink_log.c nf-2.6.patched//net/netfilter/nfnetlink_log.c
> --- nf-2.6/net/netfilter/nfnetlink_log.c	2011-07-01 14:08:21.833369919 +0100
> +++ nf-2.6.patched//net/netfilter/nfnetlink_log.c	2011-07-01 14:57:01.277536330 +0100
> @@ -39,6 +39,12 @@
>  #include "../bridge/br_private.h"
>  #endif
>  
> +#ifdef CONFIG_NET_NS
> +#define NET_NS 1
> +#include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#endif
> +
>  #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
>  #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
>  #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
> @@ -47,6 +53,18 @@
>  #define PRINTR(x, args...)	do { if (net_ratelimit()) \
>  				     printk(x, ## args); } while (0);
>  
> +#define INSTANCE_BUCKETS	16
> +
> +struct nfulnl_instances {
> +	spinlock_t lock;
> +	atomic_t global_seq;
> +	struct hlist_head table[INSTANCE_BUCKETS];
> +	unsigned hash_init;
> +#ifdef NET_NS
> +	struct net *net;
> +#endif
> +};
> +
>  struct nfulnl_instance {
>  	struct hlist_node hlist;	/* global list of instances */
>  	spinlock_t lock;
> @@ -67,14 +85,92 @@ struct nfulnl_instance {
>  	u_int16_t flags;
>  	u_int8_t copy_mode;
>  	struct rcu_head rcu;
> +#ifdef NET_NS
> +	struct nfulnl_instances *instances;
> +#endif

This seems odd, the usual way is to add the global data to the
net-ns structure.

> +#ifndef NET_NS
> +static struct nfulnl_instances instances;
>  
> -#define INSTANCE_BUCKETS	16
> -static struct hlist_head instance_table[INSTANCE_BUCKETS];
> -static unsigned int hash_init;
> +static inline struct nfulnl_instances *
> +instances_via_inst(struct nfulnl_instance *inst)
> +{
> +	(void)inst;
> +	return &instances;
> +}

... then you don't need all this because it will automatically
use the structures from init_net when CONFIG_NET_NS=n. Basically
everything depending on CONFIG_NET_NS is wrong, this is handled
automatically if you're using the API the proper way. A simple
example would be nfnetlink.c.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 16:21 ` Patrick McHardy
@ 2011-07-18 17:56   ` Rainer Weikusat
  2011-07-18 19:11     ` Rainer Weikusat
  2011-07-18 19:19     ` Alexey Dobriyan
  0 siblings, 2 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-18 17:56 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Rainer Weikusat, netfilter-devel, linux-kernel

Patrick McHardy <kaber@trash.net> writes:
> On 01.07.2011 16:44, Rainer Weikusat wrote:
>> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
>> 
>> Presently, the nfnetlink_log.c file contains only very nominal support
>> for network namespaces: While it is possible to create sockets which
>> should theoretically receive NFLOG originated messages in arbitrary
>> network namespaces, there is only one table of nfulnl_instance
>> structures in the kernel and all log messages sent via __nfulnl_send
>> are forced into the init_net namespace so that only sockets created
>> in this namespace will ever actually receive log data.

[...]

>> +#define INSTANCE_BUCKETS	16
>> +
>> +struct nfulnl_instances {
>> +	spinlock_t lock;
>> +	atomic_t global_seq;
>> +	struct hlist_head table[INSTANCE_BUCKETS];
>> +	unsigned hash_init;
>> +#ifdef NET_NS
>> +	struct net *net;
>> +#endif
>> +};
>> +
>>  struct nfulnl_instance {
>>  	struct hlist_node hlist;	/* global list of instances */
>>  	spinlock_t lock;
>> @@ -67,14 +85,92 @@ struct nfulnl_instance {
>>  	u_int16_t flags;
>>  	u_int8_t copy_mode;
>>  	struct rcu_head rcu;
>> +#ifdef NET_NS
>> +	struct nfulnl_instances *instances;
>> +#endif
>
> This seems odd, the usual way is to add the global data to the
> net-ns structure.

Since a facility for having 'per subsystem' network namespace specific
data exists, there seems to be little reason to not use it. The
interface is defined in include/net/netns/generic.h and documented as

 * Generic net pointers are to be used by modules to put some private
 * stuff on the struct net without explicit struct net modification

>> +#ifndef NET_NS
>> +static struct nfulnl_instances instances;
>>  
>> -#define INSTANCE_BUCKETS	16
>> -static struct hlist_head instance_table[INSTANCE_BUCKETS];
>> -static unsigned int hash_init;
>> +static inline struct nfulnl_instances *
>> +instances_via_inst(struct nfulnl_instance *inst)
>> +{
>> +	(void)inst;
>> +	return &instances;
>> +}
>
> ... then you don't need all this because it will automatically
> use the structures from init_net when CONFIG_NET_NS=n

But then, the more complicated access method via struct net will
always be used while anything resembling network namespace support
will be removed at compile-time if the kernel is configured without
network namespace support for this code. Which happened to be the
point of this exercise.

> Basically everything depending on CONFIG_NET_NS is wrong,
> this is handled automatically if you're using the API the proper
> way.

There is no such thing as 'an API which can be used in the proper way'
here for the simple reason that not even functional documentation on
this exists (AFAIK), let alone documentation about usage policies
someone would like to mandate.





^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 17:56   ` Rainer Weikusat
@ 2011-07-18 19:11     ` Rainer Weikusat
  2011-07-18 19:19     ` Alexey Dobriyan
  1 sibling, 0 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-18 19:11 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Rainer Weikusat, netfilter-devel, linux-kernel

Rainer Weikusat <rw@sapphire.mobileactivedefense.com> writes:
> Patrick McHardy <kaber@trash.net> writes:

[...]

>>> +#define INSTANCE_BUCKETS	16
>>> +
>>> +struct nfulnl_instances {
>>> +	spinlock_t lock;
>>> +	atomic_t global_seq;
>>> +	struct hlist_head table[INSTANCE_BUCKETS];
>>> +	unsigned hash_init;
>>> +#ifdef NET_NS
>>> +	struct net *net;
>>> +#endif
>>> +};
>>> +
>>>  struct nfulnl_instance {
>>>  	struct hlist_node hlist;	/* global list of instances */
>>>  	spinlock_t lock;
>>> @@ -67,14 +85,92 @@ struct nfulnl_instance {
>>>  	u_int16_t flags;
>>>  	u_int8_t copy_mode;
>>>  	struct rcu_head rcu;
>>> +#ifdef NET_NS
>>> +	struct nfulnl_instances *instances;
>>> +#endif
>>
>> This seems odd, the usual way is to add the global data to the
>> net-ns structure.
>
> Since a facility for having 'per subsystem' network namespace specific
> data exists, there seems to be little reason to not use it.

An additional remark: There is actually a reason for using it, namely,
'adding global data to the net-ns structure' implies that this
structure has to contain per-module data of modules which aren't
loaded, while using 'generic net pointers' enables this data to be
allocated/ deallocated on module load/ unload.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 17:56   ` Rainer Weikusat
  2011-07-18 19:11     ` Rainer Weikusat
@ 2011-07-18 19:19     ` Alexey Dobriyan
  2011-07-18 19:43       ` Rainer Weikusat
  1 sibling, 1 reply; 21+ messages in thread
From: Alexey Dobriyan @ 2011-07-18 19:19 UTC (permalink / raw)
  To: Rainer Weikusat; +Cc: Patrick McHardy, netfilter-devel, linux-kernel

On Mon, Jul 18, 2011 at 06:56:11PM +0100, Rainer Weikusat wrote:
> Patrick McHardy <kaber@trash.net> writes:
> > On 01.07.2011 16:44, Rainer Weikusat wrote:
> >> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
> >> 
> >> Presently, the nfnetlink_log.c file contains only very nominal support
> >> for network namespaces: While it is possible to create sockets which
> >> should theoretically receive NFLOG originated messages in arbitrary
> >> network namespaces, there is only one table of nfulnl_instance
> >> structures in the kernel and all log messages sent via __nfulnl_send
> >> are forced into the init_net namespace so that only sockets created
> >> in this namespace will ever actually receive log data.
> 
> [...]
> 
> >> +#define INSTANCE_BUCKETS	16
> >> +
> >> +struct nfulnl_instances {
> >> +	spinlock_t lock;
> >> +	atomic_t global_seq;
> >> +	struct hlist_head table[INSTANCE_BUCKETS];
> >> +	unsigned hash_init;
> >> +#ifdef NET_NS
> >> +	struct net *net;
> >> +#endif
> >> +};
> >> +
> >>  struct nfulnl_instance {
> >>  	struct hlist_node hlist;	/* global list of instances */
> >>  	spinlock_t lock;
> >> @@ -67,14 +85,92 @@ struct nfulnl_instance {
> >>  	u_int16_t flags;
> >>  	u_int8_t copy_mode;
> >>  	struct rcu_head rcu;
> >> +#ifdef NET_NS
> >> +	struct nfulnl_instances *instances;
> >> +#endif
> >
> > This seems odd, the usual way is to add the global data to the
> > net-ns structure.
> 
> Since a facility for having 'per subsystem' network namespace specific
> data exists, there seems to be little reason to not use it. The
> interface is defined in include/net/netns/generic.h and documented as
> 
>  * Generic net pointers are to be used by modules to put some private
>  * stuff on the struct net without explicit struct net modification
> 
> >> +#ifndef NET_NS
> >> +static struct nfulnl_instances instances;
> >>  
> >> -#define INSTANCE_BUCKETS	16
> >> -static struct hlist_head instance_table[INSTANCE_BUCKETS];
> >> -static unsigned int hash_init;
> >> +static inline struct nfulnl_instances *
> >> +instances_via_inst(struct nfulnl_instance *inst)
> >> +{
> >> +	(void)inst;
> >> +	return &instances;
> >> +}
> >
> > ... then you don't need all this because it will automatically
> > use the structures from init_net when CONFIG_NET_NS=n
> 
> But then, the more complicated access method via struct net will
> always be used while anything resembling network namespace support
> will be removed at compile-time if the kernel is configured without
> network namespace support for this code. Which happened to be the
> point of this exercise.
> 
> > Basically everything depending on CONFIG_NET_NS is wrong,
> > this is handled automatically if you're using the API the proper
> > way.
> 
> There is no such thing as 'an API which can be used in the proper way'
> here for the simple reason that not even functional documentation on
> this exists (AFAIK), let alone documentation about usage policies
> someone would like to mandate.

We did whole networking without sprinkling ifdefs and left them only at
few strategic places, now how did we manage to do it?
This functional doc argument, well, we aren't corporate IT shop
where it counts.

Briefly looking, there is no need for instances_via_skb(),
it's badly named and netns can be found at netlink control socket.

_instances struct is unnecessary.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 19:19     ` Alexey Dobriyan
@ 2011-07-18 19:43       ` Rainer Weikusat
  2011-07-18 19:46         ` David Miller
  0 siblings, 1 reply; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-18 19:43 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: Rainer Weikusat, Patrick McHardy, netfilter-devel, linux-kernel

Alexey Dobriyan <adobriyan@gmail.com> writes:
> On Mon, Jul 18, 2011 at 06:56:11PM +0100, Rainer Weikusat wrote:
>> Patrick McHardy <kaber@trash.net> writes:
>> > On 01.07.2011 16:44, Rainer Weikusat wrote:
>> >> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
>> >> 
>> >> Presently, the nfnetlink_log.c file contains only very nominal support
>> >> for network namespaces: While it is possible to create sockets which
>> >> should theoretically receive NFLOG originated messages in arbitrary
>> >> network namespaces, there is only one table of nfulnl_instance
>> >> structures in the kernel and all log messages sent via __nfulnl_send
>> >> are forced into the init_net namespace so that only sockets created
>> >> in this namespace will ever actually receive log data.
>> 
>> [...]
>> 
>> >> +#define INSTANCE_BUCKETS	16
>> >> +
>> >> +struct nfulnl_instances {
>> >> +	spinlock_t lock;
>> >> +	atomic_t global_seq;
>> >> +	struct hlist_head table[INSTANCE_BUCKETS];
>> >> +	unsigned hash_init;
>> >> +#ifdef NET_NS
>> >> +	struct net *net;
>> >> +#endif
>> >> +};
>> >> +
>> >>  struct nfulnl_instance {
>> >>  	struct hlist_node hlist;	/* global list of instances */
>> >>  	spinlock_t lock;
>> >> @@ -67,14 +85,92 @@ struct nfulnl_instance {
>> >>  	u_int16_t flags;
>> >>  	u_int8_t copy_mode;
>> >>  	struct rcu_head rcu;
>> >> +#ifdef NET_NS
>> >> +	struct nfulnl_instances *instances;
>> >> +#endif
>> >
>> > This seems odd, the usual way is to add the global data to the
>> > net-ns structure.
>> 
>> Since a facility for having 'per subsystem' network namespace specific
>> data exists, there seems to be little reason to not use it. The
>> interface is defined in include/net/netns/generic.h and documented as
>> 
>>  * Generic net pointers are to be used by modules to put some private
>>  * stuff on the struct net without explicit struct net modification
>> 
>> >> +#ifndef NET_NS
>> >> +static struct nfulnl_instances instances;
>> >>  
>> >> -#define INSTANCE_BUCKETS	16
>> >> -static struct hlist_head instance_table[INSTANCE_BUCKETS];
>> >> -static unsigned int hash_init;
>> >> +static inline struct nfulnl_instances *
>> >> +instances_via_inst(struct nfulnl_instance *inst)
>> >> +{
>> >> +	(void)inst;
>> >> +	return &instances;
>> >> +}
>> >
>> > ... then you don't need all this because it will automatically
>> > use the structures from init_net when CONFIG_NET_NS=n
>> 
>> But then, the more complicated access method via struct net will
>> always be used while anything resembling network namespace support
>> will be removed at compile-time if the kernel is configured without
>> network namespace support for this code. Which happened to be the
>> point of this exercise.
>> 
>> > Basically everything depending on CONFIG_NET_NS is wrong,
>> > this is handled automatically if you're using the API the proper
>> > way.
>> 
>> There is no such thing as 'an API which can be used in the proper way'
>> here for the simple reason that not even functional documentation on
>> this exists (AFAIK), let alone documentation about usage policies
>> someone would like to mandate.
>
> We did whole networking without sprinkling ifdefs and left them only at
> few strategic places,

[rw@sapphire]~/work/linux-2-6/net/netfilter $find -name '*.c' | xargs grep '^#ifdef' | wc -l
239
[rw@sapphire]~/work/linux-2-6/net/netfilter $..
[rw@sapphire]~/work/linux-2-6/net $find -name '*.c' | xargs grep '^#ifdef' | wc -l
1672

> now how did we manage to do it?

As it seems: Not at all.

> This functional doc argument, well, we aren't corporate IT shop
> where it counts.

Yes. And because of this, you cannot expect independent parties to use
the code as if they were employees of your corporate IT shop who are
supposed to obey to the documented procedures because there are no
such documented procedures. Instead, it boils down to a matter of
indivdual judgement (where different individuals will invariably end
up doing things in different ways).

> Briefly looking, there is no need for instances_via_skb(),
> it's badly named

It's named according to what it does (locates an instances table by
going through a struct skbuff). Also, this table needs to be located
...

> and netns can be found at netlink control socket.

... and 'but there is another way to do it' (except that there
isn't really since the 'netlink control socket' is not available in
nfulnl_log_packet, at least not w/o changing the signature of this
function) isn't really an argument against a specific way of
accomplishing the same.

> _instances struct is unnecessary.

By extension, it follows that the statically defined instances table
in the 'stock' module should also be unnecessary. But then, the
routine used to locate logging instances with the help of this table
(__instance_lookup) must also be unnecessary ...



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 19:43       ` Rainer Weikusat
@ 2011-07-18 19:46         ` David Miller
  2011-07-18 20:17           ` Rainer Weikusat
  0 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2011-07-18 19:46 UTC (permalink / raw)
  To: rweikusat; +Cc: adobriyan, kaber, netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Date: Mon, 18 Jul 2011 20:43:30 +0100

> [rw@sapphire]~/work/linux-2-6/net/netfilter $find -name '*.c' | xargs grep '^#ifdef' | wc -l
> 239
> [rw@sapphire]~/work/linux-2-6/net/netfilter $..
> [rw@sapphire]~/work/linux-2-6/net $find -name '*.c' | xargs grep '^#ifdef' | wc -l
> 1672

You've shown nothing.  Showing exceptions does not prove that the
general effort has been to keep ifdef crap out of *.c files.

And every developer or maintainer who says no to ifdefs in *.c files
for new changes is %100 right.

We're also specifically talking about namespace stuff, so you should have
at least refined your match criteria just a little bit.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 19:46         ` David Miller
@ 2011-07-18 20:17           ` Rainer Weikusat
  2011-07-18 20:19             ` David Miller
                               ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-18 20:17 UTC (permalink / raw)
  To: David Miller; +Cc: rweikusat, adobriyan, kaber, netfilter-devel, linux-kernel

David Miller <davem@davemloft.net> writes:
> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
> Date: Mon, 18 Jul 2011 20:43:30 +0100
>
>> [rw@sapphire]~/work/linux-2-6/net/netfilter $find -name '*.c' | xargs grep '^#ifdef' | wc -l
>> 239
>> [rw@sapphire]~/work/linux-2-6/net/netfilter $..
>> [rw@sapphire]~/work/linux-2-6/net $find -name '*.c' | xargs grep '^#ifdef' | wc -l
>> 1672
>
> You've shown nothing.  Showing exceptions does not prove that the
> general effort has been to keep ifdef crap out of *.c files.

I've 'shown' that the networking code contains a fair amount of
#ifdefs in .c files. Consequently, 'we did it without' is wrong. At
best, 'we would like to do without in future' seems justified.

> And every developer or maintainer who says no to ifdefs in *.c files
> for new changes is %100 right.

Adding new files filled with ifdefs in order to avoid ifdefs in old
files in favor of lines-looking-like-code-which-arent seems
debatable to me. The same goes for adding unused structure members,
uneeded function calls, indirections through fifteen different other
files that turn out to do nothing etc. I spend much more time trying
to read Linux code than to write Linux code and while I decidedly know
worse things, Linux isn't exactly a prime example of easily accessible
code precisely because so much of it is something completely different
than what it appears to be.

But this is actually a digression that is besides the point: Put
something like 'ifdefs must not be used in new code, no matter what'
plainly into the CodingStyle text, then you can expect other people to
stick to this convention in the same way as to all the others (or at
least try to stick to it).

> We're also specifically talking about namespace stuff, so you should have
> at least refined your match criteria just a little bit.

The person I was replying to wrote 'We did whole networking without
sprinkling ifdefs'.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:17           ` Rainer Weikusat
@ 2011-07-18 20:19             ` David Miller
  2011-07-18 20:32               ` Alexey Dobriyan
  2011-07-18 20:27               ` Eric Dumazet
  2011-07-18 20:28             ` Jan Engelhardt
  2 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2011-07-18 20:19 UTC (permalink / raw)
  To: rweikusat; +Cc: adobriyan, kaber, netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Date: Mon, 18 Jul 2011 21:17:00 +0100

> David Miller <davem@davemloft.net> writes:
>> We're also specifically talking about namespace stuff, so you should have
>> at least refined your match criteria just a little bit.
> 
> The person I was replying to wrote 'We did whole networking without
> sprinkling ifdefs'.

He was talking specifically about namespace stuff.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:17           ` Rainer Weikusat
@ 2011-07-18 20:27               ` Eric Dumazet
  2011-07-18 20:27               ` Eric Dumazet
  2011-07-18 20:28             ` Jan Engelhardt
  2 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2011-07-18 20:27 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: David Miller, adobriyan, kaber, netfilter-devel, linux-kernel

Le lundi 18 juillet 2011 à 21:17 +0100, Rainer Weikusat a écrit :

> Adding new files filled with ifdefs in order to avoid ifdefs in old
> files in favor of lines-looking-like-code-which-arent seems
> debatable to me. The same goes for adding unused structure members,
> uneeded function calls, indirections through fifteen different other
> files that turn out to do nothing etc. I spend much more time trying
> to read Linux code than to write Linux code and while I decidedly know
> worse things, Linux isn't exactly a prime example of easily accessible
> code precisely because so much of it is something completely different
> than what it appears to be.

Its true of any large piece of software. A new comer have to read
thousand of lines before even adding a single line.

Note that namespace code was added recently and we tried to use ifdefs
only in include files. You can find only few exceptions to this rule.




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
@ 2011-07-18 20:27               ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2011-07-18 20:27 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: David Miller, adobriyan, kaber, netfilter-devel, linux-kernel

Le lundi 18 juillet 2011 à 21:17 +0100, Rainer Weikusat a écrit :

> Adding new files filled with ifdefs in order to avoid ifdefs in old
> files in favor of lines-looking-like-code-which-arent seems
> debatable to me. The same goes for adding unused structure members,
> uneeded function calls, indirections through fifteen different other
> files that turn out to do nothing etc. I spend much more time trying
> to read Linux code than to write Linux code and while I decidedly know
> worse things, Linux isn't exactly a prime example of easily accessible
> code precisely because so much of it is something completely different
> than what it appears to be.

Its true of any large piece of software. A new comer have to read
thousand of lines before even adding a single line.

Note that namespace code was added recently and we tried to use ifdefs
only in include files. You can find only few exceptions to this rule.



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:17           ` Rainer Weikusat
  2011-07-18 20:19             ` David Miller
  2011-07-18 20:27               ` Eric Dumazet
@ 2011-07-18 20:28             ` Jan Engelhardt
  2011-07-19 21:38               ` Rainer Weikusat
  2 siblings, 1 reply; 21+ messages in thread
From: Jan Engelhardt @ 2011-07-18 20:28 UTC (permalink / raw)
  To: Rainer Weikusat
  Cc: David Miller, adobriyan, kaber, netfilter-devel, linux-kernel


On Monday 2011-07-18 22:17, Rainer Weikusat wrote:
>David Miller <davem@davemloft.net> writes:
>>
>>> [rw@sapphire]~/work/linux-2-6/net/netfilter $find -name '*.c' | xargs grep '^#ifdef' | wc -l
>>> 239
>>
>> You've shown nothing.  Showing exceptions does not prove that the
>> general effort has been to keep ifdef crap out of *.c files.
>
>I've 'shown' that the networking code contains a fair amount of
>#ifdefs in .c files. Consequently, 'we did it without' is wrong. At
>best, 'we would like to do without in future' seems justified.

Your count of ifdefs is just telling that we use ifdef, not how we
use it.

There is a difference between #ifdefs sprinkled inside a function,
and #ifdefs around functions or larger groups where possible,
feasible and optically preferable (cf. security.h, xt_TEE.c).

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:19             ` David Miller
@ 2011-07-18 20:32               ` Alexey Dobriyan
  2011-07-19  9:42                 ` Patrick McHardy
  0 siblings, 1 reply; 21+ messages in thread
From: Alexey Dobriyan @ 2011-07-18 20:32 UTC (permalink / raw)
  To: David Miller; +Cc: rweikusat, kaber, netfilter-devel, linux-kernel

On Mon, Jul 18, 2011 at 01:19:43PM -0700, David Miller wrote:
> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
> Date: Mon, 18 Jul 2011 21:17:00 +0100
> 
> > David Miller <davem@davemloft.net> writes:
> >> We're also specifically talking about namespace stuff, so you should have
> >> at least refined your match criteria just a little bit.
> > 
> > The person I was replying to wrote 'We did whole networking without
> > sprinkling ifdefs'.
> 
> He was talking specifically about namespace stuff.

Indeed.

Rainer, while your desire to keep CONFIG_NET_NS=n case equivalent
to current code is understandable and kernel people share it at large,
what you're fighting for is maybe one dereference on speed-uncritical
code paths and one pointer in small amount of data structures.
As such having common executable code matters more.

netns will be stubbed to &init_net at several places.
See how e. g. xt_hashlimit is done.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:32               ` Alexey Dobriyan
@ 2011-07-19  9:42                 ` Patrick McHardy
  0 siblings, 0 replies; 21+ messages in thread
From: Patrick McHardy @ 2011-07-19  9:42 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: David Miller, rweikusat, netfilter-devel, linux-kernel

On 18.07.2011 22:32, Alexey Dobriyan wrote:
> On Mon, Jul 18, 2011 at 01:19:43PM -0700, David Miller wrote:
>> From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
>> Date: Mon, 18 Jul 2011 21:17:00 +0100
>>
>>> David Miller <davem@davemloft.net> writes:
>>>> We're also specifically talking about namespace stuff, so you should have
>>>> at least refined your match criteria just a little bit.
>>>
>>> The person I was replying to wrote 'We did whole networking without
>>> sprinkling ifdefs'.
>>
>> He was talking specifically about namespace stuff.
> 
> Indeed.
> 
> Rainer, while your desire to keep CONFIG_NET_NS=n case equivalent
> to current code is understandable and kernel people share it at large,
> what you're fighting for is maybe one dereference on speed-uncritical
> code paths and one pointer in small amount of data structures.
> As such having common executable code matters more.
> 
> netns will be stubbed to &init_net at several places.
> See how e. g. xt_hashlimit is done.

Indeed, that's the alternative to putting it directly into struct
netns, not using tons of ifdefs.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c
  2011-07-18 20:28             ` Jan Engelhardt
@ 2011-07-19 21:38               ` Rainer Weikusat
  2011-07-20 15:04                 ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated) Rainer Weikusat
  0 siblings, 1 reply; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-19 21:38 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: Rainer Weikusat, David Miller, adobriyan, kaber, netfilter-devel,
	linux-kernel

Jan Engelhardt <jengelh@medozas.de> writes:
> On Monday 2011-07-18 22:17, Rainer Weikusat wrote:
>>David Miller <davem@davemloft.net> writes:
>>>
>>>> [rw@sapphire]~/work/linux-2-6/net/netfilter $find -name '*.c' | xargs grep '^#ifdef' | wc -l
>>>> 239
>>>
>>> You've shown nothing.  Showing exceptions does not prove that the
>>> general effort has been to keep ifdef crap out of *.c files.
>>
>>I've 'shown' that the networking code contains a fair amount of
>>#ifdefs in .c files. Consequently, 'we did it without' is wrong. At
>>best, 'we would like to do without in future' seems justified.
>
> Your count of ifdefs is just telling that we use ifdef, not how we
> use it.
>
> There is a difference between #ifdefs sprinkled inside a function,
> and #ifdefs around functions or larger groups where possible,
> feasible and optically preferable (cf. security.h, xt_TEE.c).

Counting this one-by-one results in

	1.An #ifdef block in order to include the namespace headers
          conditionally. This could essentially just be dumped without
          any effects except a minor increase in compilation time.

	2.Two of them in order to conditionally include
          a single, namespace-specific pointer in the two structures.
          This is not 'sprinkled across functions' but inside a
          declaration which happens to be in a .c file. Could be
          omitted at the cost of including two otherwise useless
          pointers in the two structures when compiling w/o netns
          support.

	3.A 'large functional group' which conditionally compiles
          either the 'dummy' access functions that all reduce to
          using the static logging instances table the same way it is
          used in the existing code or the more complicated ones that
          actually go via a struct net in order to locate the
          per-namespace logging instance table. In this case, the
          possible cost for someoe who doesn't want to use the network
          namespace is a call to an not entirely trivial function per
          logged packet plus two additional pointer dereferences per
          batch of logged packets forwarded to a userspace
          application.

	4.A conditional pointer assignment in instance_create. That's
          not going to help much in either case and could just be
          dumped alongside the conditionally included pointers.

	5.A conditionally compiled function body for
          instances_for_seq. Could be moved into the 'larger group'
          mentioned in 3.

	6.Another 'larger group' which contains the namespace-specific
	  initialization/ finalization code and the corresponding
	  structure definition. Could be dumped completely by
	  exploiting the 'non-namespace' register_pernet_ interface.

	7.Two blocks of conditional additions to the module
	  initialization/ finalization routines. Both routines already
	  contain conditionally compiled code blocks (for proc
	  support). Apart from that, same as 6.

My opinion on that would be that 1), 6) and 7) should be removed and
that 2) and 4) are essentially cosmetic and thus, should be removed,
too. 5) should just be unified with 3) (meaning, treated identically
to it. 3), however, is different since this would impose an
essentially unbound additional cost on someone who doesn't want to use
network namespaces (roughly proportional to the number packets
logged). And this doesn't seem particularly fair to me (if someone
doesn't want to use network namespace, he shouldn't be paying for the
people who do want to use them).

It is, however, possible to change the instances_via_skb function to
something which gcc (at least 4.4.5) can correctly identify as no-op
for the 'no network namespace' case and to change the remaining code
such that only the 'lookup instances by going through a struct net'
inline routine is still conditionally compiled (either doing a
net_generic call or returning the value of a file scope pointer
variable). This means that the compiled code is roughly identical to
the one produced by the patch I originally posted while the set of
source code changes has become (relatively) significantly smaller and less
'variable'. I did this over the course of the day, mainly because I
was curious if it could be done and the result works with the
2.6.36.4-derived kernel used on 'our' appliances. A patch for nf-2.6
current/ 3.0-rc? also exists but I haven't gotten around to actually
testing that today. Provided that also works (which I expect), I will
post an updated change tomorrow.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated)
  2011-07-19 21:38               ` Rainer Weikusat
@ 2011-07-20 15:04                 ` Rainer Weikusat
  2011-07-26 11:22                   ` Rainer Weikusat
  2011-07-26 11:37                   ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again) Rainer Weikusat
  0 siblings, 2 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-20 15:04 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>

Presently, the nfnetlink_log.c file contains only very nominal support
for network namespaces: While it is possible to create sockets which
should theoretically receive NFLOG originated messages in arbitrary
network namespaces, there is only one table of nfulnl_instance
structures in the kernel and all log messages sent via __nfulnl_send
are forced into the init_net namespace so that only sockets created
in this namespace will ever actually receive log data. Likewise, the
nfulnl_rcv_nl_event notification callback won't destroy logging
instances created by processes in other network namespace upon process
death. The patch included below changes the code to use a logging
instance table per network namespace, to send messages generated from
within a specific namespace to sockets also belonging to this
namespace and to destroy logging instances created from other network
namespaces than init_net when cleaning up after a logging process
terminated. It doesn't touch the code dealing with nfnetlink_log /proc
files which thus remain restricted to the init_net namespace because
this isn't really needed in order to get per-namespace logging and
would require changes to other files, in particular, nf_log.c

The patch also removes the hash_init file scope variable because
that's not used for anything except that the existing init routine
initializes it.

Signed-Off-By: Rainer Weikusat <rweikusat@mobileactivedefense.com>
---
--- nf-2.6/net/netfilter/nfnetlink_log.c	2011-07-20 14:08:42.523589195 +0100
+++ nf-2.6.patched//net/netfilter/nfnetlink_log.c	2011-07-20 15:53:51.233582160 +0100
@@ -39,6 +39,9 @@
 #include "../bridge/br_private.h"
 #endif
 
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
@@ -47,6 +50,15 @@
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
 
+#define INSTANCE_BUCKETS	16
+
+struct nfulnl_instances {
+	spinlock_t lock;
+	atomic_t global_seq;
+	struct hlist_head table[INSTANCE_BUCKETS];
+	struct net *net;
+};
+
 struct nfulnl_instance {
 	struct hlist_node hlist;	/* global list of instances */
 	spinlock_t lock;
@@ -67,14 +79,44 @@ struct nfulnl_instance {
 	u_int16_t flags;
 	u_int8_t copy_mode;
 	struct rcu_head rcu;
+	struct nfulnl_instances *instances;
 };
 
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
+static struct nfulnl_instances *instances;
+static int nfulnl_net_id;
 
-#define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
-static unsigned int hash_init;
+#ifdef CONFIG_NET_NS
+static inline struct nfulnl_instances *instances_for_net(struct net *net)
+{
+	return net_generic(net, nfulnl_net_id);
+}
+#else
+static inline struct nfulnl_instances *instances_for_net(struct net *net)
+{
+	return instances;
+}
+#endif
+
+static struct nfulnl_instances *instances_via_skb(struct sk_buff const *skb)
+{
+	static struct net *dummy_pnet __maybe_unused;
+	struct net *net;
+
+	/*
+	 * The multiway conditional needs to be inside the
+	 * read_pnet argument list because that is a no-op for
+	 * the 'no netns' case and gcc will then [hopefully]
+	 * remove it as 'dead code' (at least 4.4.5 does).
+	 */
+	net = read_pnet(skb->sk ? &skb->sk->sk_net :
+			(skb->dev ? &skb->dev->nd_net : &dummy_pnet));
+	return net ? instances_for_net(net) : NULL;
+}
+
+static inline struct net *inst_net(struct nfulnl_instance *inst)
+{
+	return read_pnet(&inst->instances->net);
+}
 
 static inline u_int8_t instance_hashfn(u_int16_t group_num)
 {
@@ -82,13 +124,13 @@ static inline u_int8_t instance_hashfn(u
 }
 
 static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct hlist_head *head;
 	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(group_num)];
+	head = &instances->table[instance_hashfn(group_num)];
 	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
@@ -103,12 +145,15 @@ instance_get(struct nfulnl_instance *ins
 }
 
 static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
+	if (!instances)
+		return NULL;
+
 	rcu_read_lock_bh();
-	inst = __instance_lookup(group_num);
+	inst = __instance_lookup(instances, group_num);
 	if (inst && !atomic_inc_not_zero(&inst->use))
 		inst = NULL;
 	rcu_read_unlock_bh();
@@ -132,13 +177,14 @@ instance_put(struct nfulnl_instance *ins
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid)
+instance_create(struct nfulnl_instances *instances,
+		u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
 	int err;
 
-	spin_lock_bh(&instances_lock);
-	if (__instance_lookup(group_num)) {
+	spin_lock_bh(&instances->lock);
+	if (__instance_lookup(instances, group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -171,15 +217,16 @@ instance_create(u_int16_t group_num, int
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head_rcu(&inst->hlist,
-		       &instance_table[instance_hashfn(group_num)]);
+	inst->instances = instances;
 
-	spin_unlock_bh(&instances_lock);
+	hlist_add_head_rcu(&inst->hlist,
+			&instances->table[instance_hashfn(group_num)]);
 
+	spin_unlock_bh(&instances->lock);
 	return inst;
 
 out_unlock:
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 	return ERR_PTR(err);
 }
 
@@ -208,11 +255,12 @@ __instance_destroy(struct nfulnl_instanc
 }
 
 static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfulnl_instances *instances,
+		struct nfulnl_instance *inst)
 {
-	spin_lock_bh(&instances_lock);
+	spin_lock_bh(&instances->lock);
 	__instance_destroy(inst);
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 }
 
 static int
@@ -334,7 +382,7 @@ __nfulnl_send(struct nfulnl_instance *in
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+	status = nfnetlink_unicast(inst->skb, inst_net(inst), inst->peer_pid,
 				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
@@ -505,7 +553,8 @@ __build_packet_message(struct nfulnl_ins
 	/* global sequence number */
 	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
 		NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
-			     htonl(atomic_inc_return(&global_seq)));
+			     htonl(atomic_inc_return(
+						&inst->instances->global_seq)));
 
 	if (data_len) {
 		struct nlattr *nla;
@@ -567,7 +616,8 @@ nfulnl_log_packet(u_int8_t pf,
 	else
 		li = &default_loginfo;
 
-	inst = instance_lookup_get(li->u.ulog.group);
+	inst = instance_lookup_get(instances_via_skb(skb),
+				li->u.ulog.group);
 	if (!inst)
 		return;
 
@@ -678,24 +728,26 @@ nfulnl_rcv_nl_event(struct notifier_bloc
 		   unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfulnl_instances *instances;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
+		instances = instances_for_net(n->net);
+
 		/* destroy all instances for this pid */
-		spin_lock_bh(&instances_lock);
+		spin_lock_bh(&instances->lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &instances->table[i];
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((net_eq(n->net, &init_net)) &&
-				    (n->pid == inst->peer_pid))
+				if (n->pid == inst->peer_pid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances->lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -734,6 +786,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
+	struct nfulnl_instances *instances;
 	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
@@ -752,7 +805,11 @@ nfulnl_recv_config(struct sock *ctnl, st
 		}
 	}
 
-	inst = instance_lookup_get(group_num);
+	instances = instances_via_skb(skb);
+	if (!instances)
+		return -ENODEV;
+
+	inst = instance_lookup_get(instances, group_num);
 	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
 		goto out_put;
@@ -766,7 +823,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out_put;
 			}
 
-			inst = instance_create(group_num,
+			inst = instance_create(instances, group_num,
 					       NETLINK_CB(skb).pid);
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
@@ -779,7 +836,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out;
 			}
 
-			instance_destroy(inst);
+			instance_destroy(instances, inst);
 			goto out_put;
 		default:
 			ret = -ENOTSUPP;
@@ -862,17 +919,27 @@ static const struct nfnetlink_subsystem
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct nfulnl_instances *instances;
 	unsigned int bucket;
 };
 
+static inline struct nfulnl_instances *instances_for_seq(void)
+{
+	return instances_for_net(&init_net);
+}
+
 static struct hlist_node *get_first(struct iter_state *st)
 {
 	if (!st)
 		return NULL;
 
+	st->instances = instances_for_seq();
+
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		if (!hlist_empty(&st->instances->table[st->bucket]))
+			return rcu_dereference_bh(
+				hlist_first_rcu(
+					&st->instances->table[st->bucket]));
 	}
 	return NULL;
 }
@@ -884,7 +951,8 @@ static struct hlist_node *get_next(struc
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		h = rcu_dereference_bh(
+			hlist_first_rcu(&st->instances->table[st->bucket]));
 	}
 	return h;
 }
@@ -953,17 +1021,36 @@ static const struct file_operations nful
 
 #endif /* PROC_FS */
 
-static int __init nfnetlink_log_init(void)
+static int nfulnl_net_init(struct net *net)
 {
-	int i, status = -ENOMEM;
+	struct nfulnl_instances *insts;
+	int i;
+
+	insts = net_generic(net, nfulnl_net_id);
+	insts->net = net;
+	spin_lock_init(&insts->lock);
+
+	i = INSTANCE_BUCKETS;
+	do INIT_HLIST_HEAD(insts->table + --i); while (i);
+
+	/* avoid 'runtime' net_generic for 'no netns' */
+	instances = insts;
+	return 0;
+}
+
+static struct pernet_operations nfulnl_net_ops = {
+	.init =		nfulnl_net_init,
+	.id =		&nfulnl_net_id,
+	.size =		sizeof(struct nfulnl_instances)
+};
 
-	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+static int __init nfnetlink_log_init(void)
+{
+	int status = -ENOMEM;
 
-	/* it's not really all that important to have a random value, so
-	 * we can do this from the init function, even if there hasn't
-	 * been that much entropy yet */
-	get_random_bytes(&hash_init, sizeof(hash_init));
+	status = register_pernet_subsys(&nfulnl_net_ops);
+	if (status)
+		return status;
 
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -998,6 +1085,7 @@ cleanup_netlink_notifier:
 
 static void __exit nfnetlink_log_fini(void)
 {
+	unregister_pernet_subsys(&nfulnl_net_ops);
 	nf_log_unregister(&nfulnl_logger);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("nfnetlink_log", proc_net_netfilter);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated)
  2011-07-20 15:04                 ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated) Rainer Weikusat
@ 2011-07-26 11:22                   ` Rainer Weikusat
  2011-07-26 11:37                   ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again) Rainer Weikusat
  1 sibling, 0 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-26 11:22 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, linux-kernel

Rainer Weikusat <rw@sapphire.mobileactivedefense.com> writes:
> Presently, the nfnetlink_log.c file contains only very nominal support
> for network namespaces:

I'm going to do a second 'updated' version of that which rectifies a
couple of more-or-less minor issues with the first one (purpose of
this mail is to document the changes). Specifically,

	- the instances_via_skb function should now also compile when
          network namespace support is not supposed to be included in
          a kernel and the compiler doesn't remove the 'dead code'
          before compiling it (Some kernel data structures include a
          struct net * unconditionally, some include it only
          conditionally. In particular, struct sock and struct
          net_device belong to the latter group)

	- the nfulnl_recv_config routine now gets the net namespace to
          use by examining the ctnl argument since it is kind of
          stupid to use the more complicated route via struct skbuff
          if a simpler alternative is available

	- the unregister_pernet_subsys call has been moved to the end
          of nfnetlink_log_fini so that the init and fini codepaths
          are again axis-symmetric to each other

NB: Except in the fairly unlikely case that something like an actual
error turns up in this code, I won't send this patch again (or over
and over again :-) because I cannot possibly justify making more
essentially cosmetic changes to something which is working code I need
to solve a specifc problem I've encountered as part of my present job.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again)
  2011-07-20 15:04                 ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated) Rainer Weikusat
  2011-07-26 11:22                   ` Rainer Weikusat
@ 2011-07-26 11:37                   ` Rainer Weikusat
  2011-07-28  7:00                     ` Patrick McHardy
  2011-07-28 19:57                     ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated yet again) Rainer Weikusat
  1 sibling, 2 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-26 11:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>

Presently, the nfnetlink_log.c file contains only very nominal support
for network namespaces: While it is possible to create sockets which
should theoretically receive NFLOG originated messages in arbitrary
network namespaces, there is only one table of nfulnl_instance
structures in the kernel and all log messages sent via __nfulnl_send
are forced into the init_net namespace so that only sockets created
in this namespace will ever actually receive log data. Likewise, the
nfulnl_rcv_nl_event notification callback won't destroy logging
instances created by processes in other network namespace upon process
death. The patch included below changes the code to use a logging
instance table per network namespace, to send messages generated from
within a specific namespace to sockets also belonging to this
namespace and to destroy logging instances created from other network
namespaces than init_net when cleaning up after a logging process
terminated. It doesn't touch the code dealing with nfnetlink_log /proc
files which thus remain restricted to the init_net namespace because
this isn't really needed in order to get per-namespace logging and
would require changes to other files, in particular, nf_log.c

The patch also removes the hash_init file scope variable because
that's not used for anything except that the existing init routine
initializes it.

Signed-Off-By: Rainer Weikusat <rweikusat@mobileactivedefense.com>
---
--- nf-2.6/net/netfilter/nfnetlink_log.c	2011-07-25 16:53:41.693829768 +0100
+++ nf-2.6.patched//net/netfilter/nfnetlink_log.c	2011-07-26 12:06:46.543418699 +0100
@@ -39,6 +39,9 @@
 #include "../bridge/br_private.h"
 #endif
 
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
@@ -47,6 +50,15 @@
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
 
+#define INSTANCE_BUCKETS	16
+
+struct nfulnl_instances {
+	spinlock_t lock;
+	atomic_t global_seq;
+	struct hlist_head table[INSTANCE_BUCKETS];
+	struct net *net;
+};
+
 struct nfulnl_instance {
 	struct hlist_node hlist;	/* global list of instances */
 	spinlock_t lock;
@@ -67,14 +79,39 @@ struct nfulnl_instance {
 	u_int16_t flags;
 	u_int8_t copy_mode;
 	struct rcu_head rcu;
+	struct nfulnl_instances *instances;
 };
 
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
+static struct nfulnl_instances *instances;
+static int nfulnl_net_id;
 
-#define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
-static unsigned int hash_init;
+#ifdef CONFIG_NET_NS
+static inline struct nfulnl_instances *instances_for_net(struct net *net)
+{
+	return net_generic(net, nfulnl_net_id);
+}
+#else
+static inline struct nfulnl_instances *instances_for_net(struct net *net)
+{
+	return instances;
+}
+#endif
+
+static struct nfulnl_instances *instances_via_skb(struct sk_buff const *skb)
+{
+	struct net *net;
+
+	net = skb->sk ? sock_net(skb->sk) :
+		(skb->dev ? dev_net(skb->dev) : &init_net);
+
+	return instances_for_net(net);
+}
+
+static inline struct net *inst_net(struct nfulnl_instance *inst)
+{
+	return read_pnet(&inst->instances->net);
+
+}
 
 static inline u_int8_t instance_hashfn(u_int16_t group_num)
 {
@@ -82,13 +119,13 @@ static inline u_int8_t instance_hashfn(u
 }
 
 static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct hlist_head *head;
 	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(group_num)];
+	head = &instances->table[instance_hashfn(group_num)];
 	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
@@ -103,12 +140,15 @@ instance_get(struct nfulnl_instance *ins
 }
 
 static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
+	if (!instances)
+		return NULL;
+
 	rcu_read_lock_bh();
-	inst = __instance_lookup(group_num);
+	inst = __instance_lookup(instances, group_num);
 	if (inst && !atomic_inc_not_zero(&inst->use))
 		inst = NULL;
 	rcu_read_unlock_bh();
@@ -132,13 +172,14 @@ instance_put(struct nfulnl_instance *ins
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid)
+instance_create(struct nfulnl_instances *instances,
+		u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
 	int err;
 
-	spin_lock_bh(&instances_lock);
-	if (__instance_lookup(group_num)) {
+	spin_lock_bh(&instances->lock);
+	if (__instance_lookup(instances, group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -171,15 +212,16 @@ instance_create(u_int16_t group_num, int
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head_rcu(&inst->hlist,
-		       &instance_table[instance_hashfn(group_num)]);
+	inst->instances = instances;
 
-	spin_unlock_bh(&instances_lock);
+	hlist_add_head_rcu(&inst->hlist,
+			&instances->table[instance_hashfn(group_num)]);
 
+	spin_unlock_bh(&instances->lock);
 	return inst;
 
 out_unlock:
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 	return ERR_PTR(err);
 }
 
@@ -208,11 +250,12 @@ __instance_destroy(struct nfulnl_instanc
 }
 
 static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfulnl_instances *instances,
+		struct nfulnl_instance *inst)
 {
-	spin_lock_bh(&instances_lock);
+	spin_lock_bh(&instances->lock);
 	__instance_destroy(inst);
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 }
 
 static int
@@ -334,7 +377,7 @@ __nfulnl_send(struct nfulnl_instance *in
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+	status = nfnetlink_unicast(inst->skb, inst_net(inst), inst->peer_pid,
 				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
@@ -505,7 +548,8 @@ __build_packet_message(struct nfulnl_ins
 	/* global sequence number */
 	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
 		NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
-			     htonl(atomic_inc_return(&global_seq)));
+			     htonl(atomic_inc_return(
+					     &inst->instances->global_seq)));
 
 	if (data_len) {
 		struct nlattr *nla;
@@ -567,7 +611,8 @@ nfulnl_log_packet(u_int8_t pf,
 	else
 		li = &default_loginfo;
 
-	inst = instance_lookup_get(li->u.ulog.group);
+	inst = instance_lookup_get(instances_via_skb(skb),
+				li->u.ulog.group);
 	if (!inst)
 		return;
 
@@ -678,24 +723,26 @@ nfulnl_rcv_nl_event(struct notifier_bloc
 		   unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfulnl_instances *instances;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
+		instances = instances_for_net(n->net);
+
 		/* destroy all instances for this pid */
-		spin_lock_bh(&instances_lock);
+		spin_lock_bh(&instances->lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &instances->table[i];
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((net_eq(n->net, &init_net)) &&
-				    (n->pid == inst->peer_pid))
+				if (n->pid == inst->peer_pid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances->lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -734,6 +781,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
+	struct nfulnl_instances *instances;
 	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
@@ -752,7 +800,11 @@ nfulnl_recv_config(struct sock *ctnl, st
 		}
 	}
 
-	inst = instance_lookup_get(group_num);
+	instances = instances_for_net(sock_net(ctnl));
+	if (!instances)
+		return -ENODEV;
+
+	inst = instance_lookup_get(instances, group_num);
 	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
 		goto out_put;
@@ -766,7 +818,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out_put;
 			}
 
-			inst = instance_create(group_num,
+			inst = instance_create(instances, group_num,
 					       NETLINK_CB(skb).pid);
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
@@ -779,7 +831,7 @@ nfulnl_recv_config(struct sock *ctnl, st
 				goto out;
 			}
 
-			instance_destroy(inst);
+			instance_destroy(instances, inst);
 			goto out_put;
 		default:
 			ret = -ENOTSUPP;
@@ -862,17 +914,27 @@ static const struct nfnetlink_subsystem
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct nfulnl_instances *instances;
 	unsigned int bucket;
 };
 
+static inline struct nfulnl_instances *instances_for_seq(void)
+{
+	return instances_for_net(&init_net);
+}
+
 static struct hlist_node *get_first(struct iter_state *st)
 {
 	if (!st)
 		return NULL;
 
+	st->instances = instances_for_seq();
+
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		if (!hlist_empty(&st->instances->table[st->bucket]))
+			return rcu_dereference_bh(
+				hlist_first_rcu(
+					&st->instances->table[st->bucket]));
 	}
 	return NULL;
 }
@@ -884,7 +946,8 @@ static struct hlist_node *get_next(struc
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		h = rcu_dereference_bh(
+			hlist_first_rcu(&st->instances->table[st->bucket]));
 	}
 	return h;
 }
@@ -953,17 +1016,36 @@ static const struct file_operations nful
 
 #endif /* PROC_FS */
 
-static int __init nfnetlink_log_init(void)
+static int nfulnl_net_init(struct net *net)
 {
-	int i, status = -ENOMEM;
+	struct nfulnl_instances *insts;
+	int i;
 
-	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+	insts = net_generic(net, nfulnl_net_id);
+	insts->net = net;
+	spin_lock_init(&insts->lock);
+
+	i = INSTANCE_BUCKETS;
+	do INIT_HLIST_HEAD(insts->table + --i); while (i);
+
+	/* avoid 'runtime' net_generic for 'no netns' */
+	instances = insts;
+	return 0;
+}
+
+static struct pernet_operations nfulnl_net_ops = {
+	.init =		nfulnl_net_init,
+	.id =		&nfulnl_net_id,
+	.size =		sizeof(struct nfulnl_instances)
+};
+
+static int __init nfnetlink_log_init(void)
+{
+	int status;
 
-	/* it's not really all that important to have a random value, so
-	 * we can do this from the init function, even if there hasn't
-	 * been that much entropy yet */
-	get_random_bytes(&hash_init, sizeof(hash_init));
+	status = register_pernet_subsys(&nfulnl_net_ops);
+	if (status)
+		return status;
 
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -1004,6 +1086,7 @@ static void __exit nfnetlink_log_fini(vo
 #endif
 	nfnetlink_subsys_unregister(&nfulnl_subsys);
 	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfulnl_net_ops);
 }
 
 MODULE_DESCRIPTION("netfilter userspace logging");

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again)
  2011-07-26 11:37                   ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again) Rainer Weikusat
@ 2011-07-28  7:00                     ` Patrick McHardy
  2011-07-28 19:56                       ` Rainer Weikusat
  2011-07-28 19:57                     ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated yet again) Rainer Weikusat
  1 sibling, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2011-07-28  7:00 UTC (permalink / raw)
  To: Rainer Weikusat; +Cc: netfilter-devel, linux-kernel

On 26.07.2011 13:37, Rainer Weikusat wrote:
> --- nf-2.6/net/netfilter/nfnetlink_log.c	2011-07-25 16:53:41.693829768 +0100
> +++ nf-2.6.patched//net/netfilter/nfnetlink_log.c	2011-07-26 12:06:46.543418699 +0100
> @@ -39,6 +39,9 @@
>  #include "../bridge/br_private.h"
>  #endif
>  
> +#include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +
>  #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
>  #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
>  #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
> @@ -47,6 +50,15 @@
>  #define PRINTR(x, args...)	do { if (net_ratelimit()) \
>  				     printk(x, ## args); } while (0);
>  
> +#define INSTANCE_BUCKETS	16
> +
> +struct nfulnl_instances {
> +	spinlock_t lock;
> +	atomic_t global_seq;
> +	struct hlist_head table[INSTANCE_BUCKETS];
> +	struct net *net;
> +};
> +
>  struct nfulnl_instance {
>  	struct hlist_node hlist;	/* global list of instances */
>  	spinlock_t lock;
> @@ -67,14 +79,39 @@ struct nfulnl_instance {
>  	u_int16_t flags;
>  	u_int8_t copy_mode;
>  	struct rcu_head rcu;
> +	struct nfulnl_instances *instances;
>  };
>  
> -static DEFINE_SPINLOCK(instances_lock);
> -static atomic_t global_seq;
> +static struct nfulnl_instances *instances;
> +static int nfulnl_net_id;
>  
> -#define INSTANCE_BUCKETS	16
> -static struct hlist_head instance_table[INSTANCE_BUCKETS];
> -static unsigned int hash_init;
> +#ifdef CONFIG_NET_NS
> +static inline struct nfulnl_instances *instances_for_net(struct net *net)
> +{
> +	return net_generic(net, nfulnl_net_id);
> +}
> +#else
> +static inline struct nfulnl_instances *instances_for_net(struct net *net)
> +{
> +	return instances;
> +}
> +#endif

We use net_generic unconditionally everywhere else, there's no reason
for nfnetlink_log not to.

> +static struct nfulnl_instances *instances_via_skb(struct sk_buff const *skb)
> +{
> +	struct net *net;
> +
> +	net = skb->sk ? sock_net(skb->sk) :
> +		(skb->dev ? dev_net(skb->dev) : &init_net);

You don't need to manually handle init_net, the *_net functions will
do the right thing. Also the common case is that skb->dev is non-NULL,
I'd suggest to use dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev).

This function is also only used once and doesn't really make things
easier to read, please get rid of it and open code.

> +
> +	return instances_for_net(net);
> +}
> +
> +static inline struct net *inst_net(struct nfulnl_instance *inst)
> +{
> +	return read_pnet(&inst->instances->net);
> +
> +}

Only used once, please get rid of it.


>  static struct nfulnl_instance *
> -instance_lookup_get(u_int16_t group_num)
> +instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)

I'd suggest to pass the net * pointer to this function instead of doing
the lookup in the calling functions. Should save a bit of code and also
is a more common pattern used with namespaces.

>  {
>  	struct nfulnl_instance *inst;
>  
> +	if (!instances)
> +		return NULL;
> +
>  	rcu_read_lock_bh();
> -	inst = __instance_lookup(group_num);
> +	inst = __instance_lookup(instances, group_num);
>  	if (inst && !atomic_inc_not_zero(&inst->use))
>  		inst = NULL;
>  	rcu_read_unlock_bh();
> @@ -132,13 +172,14 @@ instance_put(struct nfulnl_instance *ins
>  static void nfulnl_timer(unsigned long data);
>  
>  static struct nfulnl_instance *
> -instance_create(u_int16_t group_num, int pid)
> +instance_create(struct nfulnl_instances *instances,
> +		u_int16_t group_num, int pid)

Same here.

>  {
>  	struct nfulnl_instance *inst;
>  	int err;
>  
> -	spin_lock_bh(&instances_lock);
> -	if (__instance_lookup(group_num)) {
> +	spin_lock_bh(&instances->lock);
> +	if (__instance_lookup(instances, group_num)) {
>  		err = -EEXIST;
>  		goto out_unlock;
>  	}
>  
> @@ -208,11 +250,12 @@ __instance_destroy(struct nfulnl_instanc
>  }
>  
>  static inline void
> -instance_destroy(struct nfulnl_instance *inst)
> +instance_destroy(struct nfulnl_instances *instances,
> +		struct nfulnl_instance *inst)

And here.

>  {
> -	spin_lock_bh(&instances_lock);
> +	spin_lock_bh(&instances->lock);
>  	__instance_destroy(inst);
> -	spin_unlock_bh(&instances_lock);
> +	spin_unlock_bh(&instances->lock);
>  }
>  
>  static int

> @@ -862,17 +914,27 @@ static const struct nfnetlink_subsystem
>  
>  #ifdef CONFIG_PROC_FS
>  struct iter_state {
> +	struct nfulnl_instances *instances;
>  	unsigned int bucket;
>  };
>  
> +static inline struct nfulnl_instances *instances_for_seq(void)
> +{
> +	return instances_for_net(&init_net);
> +}

Also used only once and hides the fact that we're only handling
init_net. Please remove and open code.

> -static int __init nfnetlink_log_init(void)
> +static int nfulnl_net_init(struct net *net)
>  {
> -	int i, status = -ENOMEM;
> +	struct nfulnl_instances *insts;
> +	int i;
>  
> -	for (i = 0; i < INSTANCE_BUCKETS; i++)
> -		INIT_HLIST_HEAD(&instance_table[i]);
> +	insts = net_generic(net, nfulnl_net_id);
> +	insts->net = net;
> +	spin_lock_init(&insts->lock);
> +
> +	i = INSTANCE_BUCKETS;
> +	do INIT_HLIST_HEAD(insts->table + --i); while (i);

Don't put this on one line and please choose a slightly
more readable construct than + --i while (i).

> +
> +	/* avoid 'runtime' net_generic for 'no netns' */
> +	instances = insts;
> +	return 0;
> +}


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again)
  2011-07-28  7:00                     ` Patrick McHardy
@ 2011-07-28 19:56                       ` Rainer Weikusat
  0 siblings, 0 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-28 19:56 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Rainer Weikusat, netfilter-devel, linux-kernel

Patrick McHardy <kaber@trash.net> writes:
> On 26.07.2011 13:37, Rainer Weikusat wrote:

[...]

>> +static inline struct nfulnl_instances *instances_for_net(struct net *net)
>> +{
>> +	return net_generic(net, nfulnl_net_id);
>> +}
>> +#else
>> +static inline struct nfulnl_instances *instances_for_net(struct net *net)
>> +{
>> +	return instances;
>> +}
>> +#endif
>
> We use net_generic unconditionally everywhere else, there's no reason
> for nfnetlink_log not to.

I don't really care for the non-netns per-packet overhead, at least
not at the moment, so that's fine with me.

>>> +static struct nfulnl_instances *instances_via_skb(struct sk_buff const *skb)
>> +{
>> +	struct net *net;
>> +
>> +	net = skb->sk ? sock_net(skb->sk) :
>> +		(skb->dev ? dev_net(skb->dev) : &init_net);
>
> You don't need to manually handle init_net, the *_net functions will
> do the right thing.

The code of dev_net is

	static inline
	struct net *dev_net(const struct net_device *dev)
	{
	        return read_pnet(&dev->nd_net);
	}

and read_pnet is

	static inline struct net *read_pnet(struct net * const *pnet)
	{
	        return *pnet;
	}

consequently, this will happily derefences an invalid pointer (namely
0 + offsetof(struct net_device, nd_net).         

> I'd suggest to use dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev).

I've checked that this should actually work and changed the code
accordingly.

> This function is also only used once and doesn't really make things
> easier to read, please get rid of it and open code.

Open-coded, this looks like this:

	inst = instance_lookup_get(instances_for_net(
					dev_net(skb->dev ?
						skb->dev : skb_dst(skb)->dev)),
				li->u.ulog.group);

and I disagree with the assessment that 'helper functions with
descriptive names' should be avoided, especially since they are free
(and I'm just quoting CodingStyle.txt because I happen to agree with
this opinion).

>>  static struct nfulnl_instance *
>> -instance_lookup_get(u_int16_t group_num)
>> +instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)
>
> I'd suggest to pass the net * pointer to this function instead of doing
> the lookup in the calling functions. Should save a bit of code and also
> is a more common pattern used with namespaces.

It actually requires more code because the instances_for_net/
net_generic code then needs to appear in __instance_lookup,
instance_create and instances_destroy while it is presently only once
in nfulnl_recv_config. The really ugly one would be _create which
would need to lookup the per-netns instances table and pass the struct
net * on to a function called by it so that this function can do the
same lookup again.

[...]

>> -static int __init nfnetlink_log_init(void)
>> +static int nfulnl_net_init(struct net *net)
>>  {
>> -	int i, status = -ENOMEM;
>> +	struct nfulnl_instances *insts;
>> +	int i;
>>  
>> -	for (i = 0; i < INSTANCE_BUCKETS; i++)
>> -		INIT_HLIST_HEAD(&instance_table[i]);
>> +	insts = net_generic(net, nfulnl_net_id);
>> +	insts->net = net;
>> +	spin_lock_init(&insts->lock);
>> +
>> +	i = INSTANCE_BUCKETS;
>> +	do INIT_HLIST_HEAD(insts->table + --i); while (i);
>
> Don't put this on one line and please choose a slightly
> more readable construct than + --i while (i).

I also disagree with the assessment that the clumsy

	for (i = 0; i < INSTANCE_BUCKETS; ++i)
		INIT_HLIST_HEAD(&insts->table[i]);

is inherently more 'readable' than a variant which uses features that
are available in C (but not in Pascal. Oh dear ...) in order to
express the algorithm which is actually needed in a more direct way.
But if in Rome ...

I've changed the code as you suggested, except distributing
net_generic/ instances_for_net calls all over the file instead of
doing this once in an upper layer function. Please feel to drop this
on the floor until someone with more 'Linux kernel street credibility'
reimplements it. I need this to work. I don't need my name in the
kernel changelog. I've made a copy of it available in order to be
'nice to others', since this seemed the right thing to do but I cannot
(and won't) spend an eternity on doing practical penance for the fact
that the way I use C (and structure code) is different from the way
you use C (and structure code).

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated yet again)
  2011-07-26 11:37                   ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again) Rainer Weikusat
  2011-07-28  7:00                     ` Patrick McHardy
@ 2011-07-28 19:57                     ` Rainer Weikusat
  1 sibling, 0 replies; 21+ messages in thread
From: Rainer Weikusat @ 2011-07-28 19:57 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, linux-kernel

From: Rainer Weikusat <rweikusat@mobileactivedefense.com>

Presently, the nfnetlink_log.c file contains only very nominal support
for network namespaces: While it is possible to create sockets which
should theoretically receive NFLOG originated messages in arbitrary
network namespaces, there is only one table of nfulnl_instance
structures in the kernel and all log messages sent via __nfulnl_send
are forced into the init_net namespace so that only sockets created
in this namespace will ever actually receive log data. Likewise, the
nfulnl_rcv_nl_event notification callback won't destroy logging
instances created by processes in other network namespace upon process
death. The patch included below changes the code to use a logging
instance table per network namespace, to send messages generated from
within a specific namespace to sockets also belonging to this
namespace and to destroy logging instances created from other network
namespaces than init_net when cleaning up after a logging process
terminated. It doesn't touch the code dealing with nfnetlink_log /proc
files which thus remain restricted to the init_net namespace because
this isn't really needed in order to get per-namespace logging and
would require changes to other files, in particular, nf_log.c

The patch also removes the hash_init file scope variable because
that's not used for anything except that the existing init routine
initializes it.

Signed-Off-By: Rainer Weikusat <rweikusat@mobileactivedefense.com>
---
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2e7ccbb..2982315 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -39,6 +39,9 @@
 #include "../bridge/br_private.h"
 #endif
 
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
 #define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
@@ -47,6 +50,15 @@
 #define PRINTR(x, args...)	do { if (net_ratelimit()) \
 				     printk(x, ## args); } while (0);
 
+#define INSTANCE_BUCKETS	16
+
+struct nfulnl_instances {
+	spinlock_t lock;
+	atomic_t global_seq;
+	struct hlist_head table[INSTANCE_BUCKETS];
+	struct net *net;
+};
+
 struct nfulnl_instance {
 	struct hlist_node hlist;	/* global list of instances */
 	spinlock_t lock;
@@ -67,14 +79,15 @@ struct nfulnl_instance {
 	u_int16_t flags;
 	u_int8_t copy_mode;
 	struct rcu_head rcu;
+	struct nfulnl_instances *instances;
 };
 
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
+static int nfulnl_net_id;
 
-#define INSTANCE_BUCKETS	16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
-static unsigned int hash_init;
+static inline struct nfulnl_instances *instances_for_net(struct net *net)
+{
+	return net_generic(net, nfulnl_net_id);
+}
 
 static inline u_int8_t instance_hashfn(u_int16_t group_num)
 {
@@ -82,13 +95,13 @@ static inline u_int8_t instance_hashfn(u_int16_t group_num)
 }
 
 static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct hlist_head *head;
 	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
-	head = &instance_table[instance_hashfn(group_num)];
+	head = &instances->table[instance_hashfn(group_num)];
 	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
@@ -103,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)
 }
 
 static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfulnl_instances *instances, u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
 	rcu_read_lock_bh();
-	inst = __instance_lookup(group_num);
+	inst = __instance_lookup(instances, group_num);
 	if (inst && !atomic_inc_not_zero(&inst->use))
 		inst = NULL;
 	rcu_read_unlock_bh();
@@ -132,13 +145,14 @@ instance_put(struct nfulnl_instance *inst)
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid)
+instance_create(struct nfulnl_instances *instances,
+		u_int16_t group_num, int pid)
 {
 	struct nfulnl_instance *inst;
 	int err;
 
-	spin_lock_bh(&instances_lock);
-	if (__instance_lookup(group_num)) {
+	spin_lock_bh(&instances->lock);
+	if (__instance_lookup(instances, group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
 	}
@@ -171,15 +185,16 @@ instance_create(u_int16_t group_num, int pid)
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head_rcu(&inst->hlist,
-		       &instance_table[instance_hashfn(group_num)]);
+	inst->instances = instances;
 
-	spin_unlock_bh(&instances_lock);
+	hlist_add_head_rcu(&inst->hlist,
+			&instances->table[instance_hashfn(group_num)]);
 
+	spin_unlock_bh(&instances->lock);
 	return inst;
 
 out_unlock:
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 	return ERR_PTR(err);
 }
 
@@ -208,11 +223,12 @@ __instance_destroy(struct nfulnl_instance *inst)
 }
 
 static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfulnl_instances *instances,
+		struct nfulnl_instance *inst)
 {
-	spin_lock_bh(&instances_lock);
+	spin_lock_bh(&instances->lock);
 	__instance_destroy(inst);
-	spin_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances->lock);
 }
 
 static int
@@ -334,8 +350,9 @@ __nfulnl_send(struct nfulnl_instance *inst)
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
-				   MSG_DONTWAIT);
+	status = nfnetlink_unicast(inst->skb, read_pnet(&inst->instances->net),
+				inst->peer_pid,
+				MSG_DONTWAIT);
 
 	inst->qlen = 0;
 	inst->skb = NULL;
@@ -505,7 +522,8 @@ __build_packet_message(struct nfulnl_instance *inst,
 	/* global sequence number */
 	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
 		NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
-			     htonl(atomic_inc_return(&global_seq)));
+			     htonl(atomic_inc_return(
+				       &inst->instances->global_seq)));
 
 	if (data_len) {
 		struct nlattr *nla;
@@ -567,7 +585,10 @@ nfulnl_log_packet(u_int8_t pf,
 	else
 		li = &default_loginfo;
 
-	inst = instance_lookup_get(li->u.ulog.group);
+	inst = instance_lookup_get(instances_for_net(
+					dev_net(skb->dev ?
+						skb->dev : skb_dst(skb)->dev)),
+				li->u.ulog.group);
 	if (!inst)
 		return;
 
@@ -678,24 +699,26 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 		   unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nfulnl_instances *instances;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
+		instances = instances_for_net(n->net);
+
 		/* destroy all instances for this pid */
-		spin_lock_bh(&instances_lock);
+		spin_lock_bh(&instances->lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
-			struct hlist_head *head = &instance_table[i];
+			struct hlist_head *head = &instances->table[i];
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
-				if ((net_eq(n->net, &init_net)) &&
-				    (n->pid == inst->peer_pid))
+				if (n->pid == inst->peer_pid)
 					__instance_destroy(inst);
 			}
 		}
-		spin_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances->lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -734,6 +757,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
+	struct nfulnl_instances *instances;
 	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
 	int ret = 0;
@@ -752,7 +776,11 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		}
 	}
 
-	inst = instance_lookup_get(group_num);
+	instances = instances_for_net(sock_net(ctnl));
+	if (!instances)
+		return -ENODEV;
+
+	inst = instance_lookup_get(instances, group_num);
 	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
 		ret = -EPERM;
 		goto out_put;
@@ -766,7 +794,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto out_put;
 			}
 
-			inst = instance_create(group_num,
+			inst = instance_create(instances, group_num,
 					       NETLINK_CB(skb).pid);
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
@@ -779,7 +807,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				goto out;
 			}
 
-			instance_destroy(inst);
+			instance_destroy(instances, inst);
 			goto out_put;
 		default:
 			ret = -ENOTSUPP;
@@ -862,6 +890,7 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
 
 #ifdef CONFIG_PROC_FS
 struct iter_state {
+	struct nfulnl_instances *instances;
 	unsigned int bucket;
 };
 
@@ -870,9 +899,13 @@ static struct hlist_node *get_first(struct iter_state *st)
 	if (!st)
 		return NULL;
 
+	st->instances = instances_for_net(&init_net);
+
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
-		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		if (!hlist_empty(&st->instances->table[st->bucket]))
+			return rcu_dereference_bh(
+				hlist_first_rcu(
+					&st->instances->table[st->bucket]));
 	}
 	return NULL;
 }
@@ -884,7 +917,8 @@ static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+		h = rcu_dereference_bh(hlist_first_rcu(
+					&st->instances->table[st->bucket]));
 	}
 	return h;
 }
@@ -953,17 +987,34 @@ static const struct file_operations nful_file_ops = {
 
 #endif /* PROC_FS */
 
-static int __init nfnetlink_log_init(void)
+static int nfulnl_net_init(struct net *net)
 {
-	int i, status = -ENOMEM;
+	struct nfulnl_instances *insts;
+	int i;
+
+	insts = net_generic(net, nfulnl_net_id);
+	insts->net = net;
+	spin_lock_init(&insts->lock);
+
+	for (i = 0; i < INSTANCE_BUCKETS; ++i)
+		INIT_HLIST_HEAD(&insts->table[i]);
+
+	return 0;
+}
+
+static struct pernet_operations nfulnl_net_ops = {
+	.init =		nfulnl_net_init,
+	.id =		&nfulnl_net_id,
+	.size =		sizeof(struct nfulnl_instances)
+};
 
-	for (i = 0; i < INSTANCE_BUCKETS; i++)
-		INIT_HLIST_HEAD(&instance_table[i]);
+static int __init nfnetlink_log_init(void)
+{
+	int status = -ENOMEM;
 
-	/* it's not really all that important to have a random value, so
-	 * we can do this from the init function, even if there hasn't
-	 * been that much entropy yet */
-	get_random_bytes(&hash_init, sizeof(hash_init));
+	status = register_pernet_subsys(&nfulnl_net_ops);
+	if (status)
+		return status;
 
 	netlink_register_notifier(&nfulnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -1004,6 +1055,7 @@ static void __exit nfnetlink_log_fini(void)
 #endif
 	nfnetlink_subsys_unregister(&nfulnl_subsys);
 	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+	unregister_pernet_subsys(&nfulnl_net_ops);
 }
 
 MODULE_DESCRIPTION("netfilter userspace logging");

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2011-07-28 19:57 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-01 14:44 [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c Rainer Weikusat
2011-07-18 16:21 ` Patrick McHardy
2011-07-18 17:56   ` Rainer Weikusat
2011-07-18 19:11     ` Rainer Weikusat
2011-07-18 19:19     ` Alexey Dobriyan
2011-07-18 19:43       ` Rainer Weikusat
2011-07-18 19:46         ` David Miller
2011-07-18 20:17           ` Rainer Weikusat
2011-07-18 20:19             ` David Miller
2011-07-18 20:32               ` Alexey Dobriyan
2011-07-19  9:42                 ` Patrick McHardy
2011-07-18 20:27             ` Eric Dumazet
2011-07-18 20:27               ` Eric Dumazet
2011-07-18 20:28             ` Jan Engelhardt
2011-07-19 21:38               ` Rainer Weikusat
2011-07-20 15:04                 ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated) Rainer Weikusat
2011-07-26 11:22                   ` Rainer Weikusat
2011-07-26 11:37                   ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated again) Rainer Weikusat
2011-07-28  7:00                     ` Patrick McHardy
2011-07-28 19:56                       ` Rainer Weikusat
2011-07-28 19:57                     ` [PATCH] netfilter: add per-namespace logging to nfnetlink_log.c (updated yet again) Rainer Weikusat

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.