* [PATCH nf-next 1/3] netfilter: bridge: add and use br_nf_hook_thresh
2016-06-30 21:19 [PATCH nf-next 0/3] Compact netfilter hooks list Aaron Conole
@ 2016-06-30 21:19 ` Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 2/3] netfilter: call nf_hook_state_init with rcu_read_lock held Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 3/3] netfilter: replace list_head with single linked list Aaron Conole
2 siblings, 0 replies; 7+ messages in thread
From: Aaron Conole @ 2016-06-30 21:19 UTC (permalink / raw)
To: netdev, netfilter-devel
From: Florian Westphal <fw@strlen.de>
This replaces the last uses of NF_HOOK_THRESH().
Followup patch will remove it and rename nf_hook_thresh.
The reason is that inet (non-bridge) netfilter no longer invokes the
hooks from hooks, so we do no longer need the thresh value to skip hooks
with a lower priority.
The bridge netfilter however may need to do this. br_nf_hook_thresh is a
wrapper that is supposed to do this, i.e. only call hooks with a
priority that exceeds NF_BR_PRI_BRNF.
It's used only in the recursion cases of br_netfilter.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Aaron Conole <aconole@bytheb.org>
---
include/net/netfilter/br_netfilter.h | 6 ++++
net/bridge/br_netfilter_hooks.c | 57 ++++++++++++++++++++++++++++++------
net/bridge/br_netfilter_ipv6.c | 12 ++++----
3 files changed, 59 insertions(+), 16 deletions(-)
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index e8d1448..0b0c35c 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
void nf_bridge_update_protocol(struct sk_buff *skb);
+int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk,
+ struct sk_buff *skb, struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct net *, struct sock *,
+ struct sk_buff *));
+
static inline struct nf_bridge_info *
nf_bridge_info_get(const struct sk_buff *skb)
{
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2d25979..19f230c 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -30,6 +30,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
+#include <linux/rculist.h>
#include <linux/inetdevice.h>
#include <net/ip.h>
@@ -395,11 +396,10 @@ bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE,
- NF_BR_PRE_ROUTING,
- net, sk, skb, skb->dev, NULL,
- br_nf_pre_routing_finish_bridge,
- 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+ net, sk, skb, skb->dev,
+ NULL,
+ br_nf_pre_routing_finish);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -417,10 +417,8 @@ bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
-
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
+ br_handle_frame_finish);
return 0;
}
@@ -992,6 +990,47 @@ static struct notifier_block brnf_notifier __read_mostly = {
.notifier_call = brnf_device_event,
};
+/* recursively invokes nf_hook_slow (again), skipping already-called
+ * hooks (< NF_BR_PRI_BRNF).
+ *
+ * Called with rcu read lock held.
+ */
+int br_nf_hook_thresh(unsigned int hook, struct net *net,
+ struct sock *sk, struct sk_buff *skb,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct net *, struct sock *,
+ struct sk_buf *))
+{
+ struct nf_hook_ops *elem;
+ struct nf_hook_state state;
+ struct list_head *head;
+ int ret;
+
+ head = &net->nf.hooks[NFPROTO_BRIDGE][hook];
+
+ list_for_each_entry_rcu(elem, head, list) {
+ struct nf_hook_ops *next;
+
+ next = list_entry_rcu(list_next_rcu(&elem->list),
+ struct nf_hook_ops, list);
+ if (next->priority <= NF_BR_PRI_BRNF)
+ continue;
+ }
+
+ if (&elem->list == head)
+ return okfn(net, sk, skb);
+
+ nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1,
+ NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+
+ ret = nf_hook_slow(skb, &state);
+ if (ret == 1)
+ ret = okfn(net, sk, skb);
+
+ return ret;
+}
+
#ifdef CONFIG_SYSCTL
static
int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5e59a84..5989661 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
- net, sk, skb, skb->dev, NULL,
- br_nf_pre_routing_finish_bridge,
- 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+ net, sk, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
+ skb->dev, NULL, br_handle_frame_finish);
return 0;
}
--
2.5.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH nf-next 2/3] netfilter: call nf_hook_state_init with rcu_read_lock held
2016-06-30 21:19 [PATCH nf-next 0/3] Compact netfilter hooks list Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 1/3] netfilter: bridge: add and use br_nf_hook_thresh Aaron Conole
@ 2016-06-30 21:19 ` Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 3/3] netfilter: replace list_head with single linked list Aaron Conole
2 siblings, 0 replies; 7+ messages in thread
From: Aaron Conole @ 2016-06-30 21:19 UTC (permalink / raw)
To: netdev, netfilter-devel
From: Florian Westphal <fw@strlen.de>
This makes things simpler because we can store the head of the list
in the nf_state structure without worrying about concurrent add/delete
of hook elements from the list.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Aaron Conole <aconole@bytheb.org>
---
include/linux/netfilter.h | 8 +++++++-
include/linux/netfilter_ingress.h | 1 +
net/bridge/netfilter/ebt_redirect.c | 2 +-
net/bridge/netfilter/ebtables.c | 2 +-
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +-
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +-
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
net/netfilter/core.c | 5 +----
net/netfilter/nf_conntrack_core.c | 2 +-
net/netfilter/nf_conntrack_h323_main.c | 2 +-
net/netfilter/nf_conntrack_helper.c | 2 +-
net/netfilter/nfnetlink_cthelper.c | 2 +-
net/netfilter/nfnetlink_log.c | 8 ++++++--
net/netfilter/nfnetlink_queue.c | 2 +-
net/netfilter/xt_helper.c | 2 +-
16 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 9230f9a..ad444f0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -174,10 +174,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
if (!list_empty(hook_list)) {
struct nf_hook_state state;
+ int ret;
+ /* We may already have this, but read-locks nest anyway */
+ rcu_read_lock();
nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, net, okfn);
- return nf_hook_slow(skb, &state);
+
+ ret = nf_hook_slow(skb, &state);
+ rcu_read_unlock();
+ return ret;
}
return 1;
}
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 5fcd375..6965ba0 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -14,6 +14,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
return !list_empty(&skb->dev->nf_hooks_ingress);
}
+/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
struct nf_hook_state state;
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 20396499..2e7c4f9 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,7 +24,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
return EBT_DROP;
if (par->hooknum != NF_BR_BROUTING)
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
ether_addr_copy(eth_hdr(skb)->h_dest,
br_port_get_rcu(par->in)->br->dev->dev_addr);
else
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a61f35..6faa2c3 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -148,7 +148,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
return 1;
if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
return 1;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
if (in && (p = br_port_get_rcu(in)) != NULL &&
FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
return 1;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ae1a71a..eab0239 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -110,7 +110,7 @@ static unsigned int ipv4_helper(void *priv,
if (!help)
return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index c567e1b..2c08d6a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -149,7 +149,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1aa5848..963ee38 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv,
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 660bc10..f5a61bc 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index f39276d..6561d5c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -291,6 +291,7 @@ repeat:
/* Returns 1 if okfn() needs to be executed by the caller,
+ * Must be called with rcu_read_lock held.
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
@@ -298,9 +299,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
unsigned int verdict;
int ret = 0;
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
-
elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
next_hook:
verdict = nf_iterate(state->hook_list, skb, state, &elem);
@@ -321,7 +319,6 @@ next_hook:
kfree_skb(skb);
}
}
- rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(nf_hook_slow);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db2312e..b85521a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1172,7 +1172,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
skb->nfct = NULL;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
l3proto = __nf_ct_l3proto_find(pf);
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 9511af0..4c426c6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -736,7 +736,7 @@ static int callforward_do_filter(struct net *net,
const struct nf_afinfo *afinfo;
int ret = 0;
- /* rcu_read_lock()ed by nf_hook_slow() */
+ /* rcu_read_lock()ed by nf_hook_thresh() */
afinfo = nf_get_afinfo(family);
if (!afinfo)
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 196cb39..5f127e7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -349,7 +349,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
/* Called from the helper function, this call never fails */
help = nfct_help(ct);
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index e924e95..3b79f34 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -43,7 +43,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
if (help == NULL)
return NF_DROP;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (helper == NULL)
return NF_DROP;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11f81c8..b219741 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -442,7 +442,9 @@ __build_packet_message(struct nfnl_log_net *log,
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(indev->ifindex)) ||
/* this is the bridge group "brX" */
- /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+ /* rcu_read_lock()ed by nf_hook_thresh or
+ * nf_log_packet
+ */
nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
@@ -477,7 +479,9 @@ __build_packet_message(struct nfnl_log_net *log,
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(outdev->ifindex)) ||
/* this is the bridge group "brX" */
- /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+ /* rcu_read_lock()ed by nf_hook_thresh or
+ * nf_log_packet
+ */
nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5d36a09..216bf69 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -740,7 +740,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct net *net = entry->state.net;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- /* rcu_read_lock()ed by nf_hook_slow() */
+ /* rcu_read_lock()ed by nf_hook_thresh() */
queue = instance_lookup(q, queuenum);
if (!queue)
return -ESRCH;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 9f4ab00..a883edb 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -41,7 +41,7 @@ helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (!master_help)
return ret;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(master_help->helper);
if (!helper)
return ret;
--
2.5.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH nf-next 3/3] netfilter: replace list_head with single linked list
2016-06-30 21:19 [PATCH nf-next 0/3] Compact netfilter hooks list Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 1/3] netfilter: bridge: add and use br_nf_hook_thresh Aaron Conole
2016-06-30 21:19 ` [PATCH nf-next 2/3] netfilter: call nf_hook_state_init with rcu_read_lock held Aaron Conole
@ 2016-06-30 21:19 ` Aaron Conole
2016-07-08 23:30 ` Florian Westphal
2 siblings, 1 reply; 7+ messages in thread
From: Aaron Conole @ 2016-06-30 21:19 UTC (permalink / raw)
To: netdev, netfilter-devel
The netfilter hook list never uses the prev pointer, and so can be
trimmed to be a smaller singly-linked list.
In addition to having a more light weight structure for hook traversal,
struct net becomes 5568 bytes (down from 6400) and struct net_device
becomes 2176 bytes (down from 2240).
Signed-off-by: Aaron Conole <aconole@bytheb.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
include/linux/netdevice.h | 2 +-
include/linux/netfilter.h | 18 +++---
include/linux/netfilter_ingress.h | 14 +++--
include/net/netfilter/nf_queue.h | 9 ++-
include/net/netns/netfilter.h | 2 +-
net/bridge/br_netfilter_hooks.c | 21 +++----
net/netfilter/core.c | 126 ++++++++++++++++++++++++--------------
net/netfilter/nf_internals.h | 10 +--
net/netfilter/nf_queue.c | 15 +++--
net/netfilter/nfnetlink_queue.c | 5 +-
10 files changed, 129 insertions(+), 93 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e84d9d2..8235f67 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,7 +1747,7 @@ struct net_device {
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
- struct list_head nf_hooks_ingress;
+ struct nf_hook_entry __rcu *nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN];
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index ad444f0..3390a84 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -55,12 +55,12 @@ struct nf_hook_state {
struct net_device *out;
struct sock *sk;
struct net *net;
- struct list_head *hook_list;
+ struct nf_hook_entry *hook_list;
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};
static inline void nf_hook_state_init(struct nf_hook_state *p,
- struct list_head *hook_list,
+ struct nf_hook_entry *hook_list,
unsigned int hook,
int thresh, u_int8_t pf,
struct net_device *indev,
@@ -97,6 +97,12 @@ struct nf_hook_ops {
int priority;
};
+struct nf_hook_entry {
+ struct nf_hook_entry __rcu *next;
+ struct nf_hook_ops ops;
+ const struct nf_hook_ops *orig_ops;
+};
+
struct nf_sockopt_ops {
struct list_head list;
@@ -161,8 +167,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
int thresh)
{
- struct list_head *hook_list;
-
#ifdef HAVE_JUMP_LABEL
if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook) &&
@@ -170,14 +174,14 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
return 1;
#endif
- hook_list = &net->nf.hooks[pf][hook];
-
- if (!list_empty(hook_list)) {
+ if (rcu_access_pointer(net->nf.hooks[pf][hook])) {
+ struct nf_hook_entry *hook_list;
struct nf_hook_state state;
int ret;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
+ hook_list = rcu_dereference(net->nf.hooks[pf][hook]);
nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, net, okfn);
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 6965ba0..e3e3f6d 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -11,23 +11,27 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
return false;
#endif
- return !list_empty(&skb->dev->nf_hooks_ingress);
+ return rcu_access_pointer(skb->dev->nf_hooks_ingress) != NULL;
}
/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
+ struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
- nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
- NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
- skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
+ if (unlikely(!e))
+ return 0;
+
+ nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+ NFPROTO_NETDEV, skb->dev, NULL, NULL,
+ dev_net(skb->dev), NULL);
return nf_hook_slow(skb, &state);
}
static inline void nf_hook_ingress_init(struct net_device *dev)
{
- INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+ RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 0dbce55..1da5bc0 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -11,7 +11,6 @@ struct nf_queue_entry {
struct sk_buff *skb;
unsigned int id;
- struct nf_hook_ops *elem;
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */
@@ -22,10 +21,10 @@ struct nf_queue_entry {
/* Packet queuing */
struct nf_queue_handler {
- int (*outfn)(struct nf_queue_entry *entry,
- unsigned int queuenum);
- void (*nf_hook_drop)(struct net *net,
- struct nf_hook_ops *ops);
+ int (*outfn)(struct nf_queue_entry *entry,
+ unsigned int queuenum);
+ void (*nf_hook_drop)(struct net *net,
+ const struct nf_hook_entry *ops);
};
void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 36d7235..58487b1 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,6 +16,6 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
- struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
};
#endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 19f230c..88bb8ed 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1000,28 +1000,21 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct net *, struct sock *,
- struct sk_buf *))
+ struct sk_buff *))
{
- struct nf_hook_ops *elem;
+ struct nf_hook_entry *elem;
struct nf_hook_state state;
- struct list_head *head;
int ret;
- head = &net->nf.hooks[NFPROTO_BRIDGE][hook];
+ elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
- list_for_each_entry_rcu(elem, head, list) {
- struct nf_hook_ops *next;
+ while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+ elem = rcu_dereference(elem->next);
- next = list_entry_rcu(list_next_rcu(&elem->list),
- struct nf_hook_ops, list);
- if (next->priority <= NF_BR_PRI_BRNF)
- continue;
- }
-
- if (&elem->list == head)
+ if (!elem)
return okfn(net, sk, skb);
- nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1,
+ nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
ret = nf_hook_slow(skb, &state);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 6561d5c..94c0921 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -22,6 +22,7 @@
#include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -61,33 +62,52 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+#define nf_entry_dereference(e) \
+ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct list_head *nf_find_hook_list(struct net *net,
- const struct nf_hook_ops *reg)
+static struct nf_hook_entry *nf_find_hook_list(struct net *net,
+ const struct nf_hook_ops *reg)
{
- struct list_head *hook_list = NULL;
+ struct nf_hook_entry *hook_list = NULL;
if (reg->pf != NFPROTO_NETDEV)
- hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+ hook_list = rcu_dereference(net->nf.hooks[reg->pf]
+ [reg->hooknum]);
else if (reg->hooknum == NF_NETDEV_INGRESS) {
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->dev && dev_net(reg->dev) == net)
- hook_list = ®->dev->nf_hooks_ingress;
+ hook_list =
+ rcu_dereference(reg->dev->nf_hooks_ingress);
#endif
}
return hook_list;
}
-struct nf_hook_entry {
- const struct nf_hook_ops *orig_ops;
- struct nf_hook_ops ops;
-};
+/* must hold nf_hook_mutex */
+static void nf_set_hook_list(struct net *net, const struct nf_hook_ops *reg,
+ struct nf_hook_entry *e)
+{
+ if (reg->pf != NFPROTO_NETDEV) {
+ rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], e);
+#ifdef CONFIG_NETFILTER_INGRESS
+ } else if (reg->hooknum == NF_NETDEV_INGRESS) {
+ rcu_assign_pointer(reg->dev->nf_hooks_ingress, e);
+#endif
+ } else {
+ net_warn_ratelimited("pf %d, hooknum %d: not set\n",
+ reg->pf, reg->hooknum);
+ }
+}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct list_head *hook_list;
+ struct nf_hook_entry *hook_list;
struct nf_hook_entry *entry;
- struct nf_hook_ops *elem;
+
+ if (reg->pf == NFPROTO_NETDEV &&
+ (reg->hooknum != NF_NETDEV_INGRESS ||
+ !reg->dev || dev_net(reg->dev) != net))
+ return -EINVAL;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
@@ -96,18 +116,20 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
entry->orig_ops = reg;
entry->ops = *reg;
+ mutex_lock(&nf_hook_mutex);
hook_list = nf_find_hook_list(net, reg);
- if (!hook_list) {
- kfree(entry);
- return -ENOENT;
+ entry->next = hook_list;
+ while (hook_list && reg->priority >= hook_list->orig_ops->priority &&
+ hook_list->next) {
+ hook_list = nf_entry_dereference(hook_list->next);
}
- mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, hook_list, list) {
- if (reg->priority < elem->priority)
- break;
+ if (hook_list) {
+ entry->next = hook_list->next;
+ rcu_assign_pointer(hook_list->next, entry);
+ } else {
+ nf_set_hook_list(net, reg, entry);
}
- list_add_rcu(&entry->ops.list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -122,24 +144,34 @@ EXPORT_SYMBOL(nf_register_net_hook);
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct list_head *hook_list;
struct nf_hook_entry *entry;
- struct nf_hook_ops *elem;
-
- hook_list = nf_find_hook_list(net, reg);
- if (!hook_list)
- return;
mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, hook_list, list) {
- entry = container_of(elem, struct nf_hook_entry, ops);
- if (entry->orig_ops == reg) {
- list_del_rcu(&entry->ops.list);
- break;
+ entry = nf_find_hook_list(net, reg);
+ if (!entry)
+ goto unlocked;
+
+ if (entry->orig_ops == reg) {
+ nf_set_hook_list(net, reg,
+ nf_entry_dereference(entry->next));
+ } else {
+ while (entry && entry->next) {
+ struct nf_hook_entry *next =
+ nf_entry_dereference(entry->next);
+ if (next->orig_ops == reg) {
+ struct nf_hook_entry *nn =
+ nf_entry_dereference(next->next);
+ rcu_assign_pointer(entry->next, nn);
+ entry = next;
+ break;
+ }
}
}
+
+unlocked:
mutex_unlock(&nf_hook_mutex);
- if (&elem->list == hook_list) {
+
+ if (!entry) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
return;
}
@@ -151,7 +183,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(net, &entry->ops);
+ nf_queue_nf_hook_drop(net, entry);
/* other cpu might still process nfqueue verdict that used reg */
synchronize_net();
kfree(entry);
@@ -193,6 +225,8 @@ int nf_register_hook(struct nf_hook_ops *reg)
struct net *net, *last;
int ret;
+ WARN_ON(reg->priv);
+
rtnl_lock();
for_each_net(net) {
ret = nf_register_net_hook(net, reg);
@@ -253,10 +287,9 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
}
EXPORT_SYMBOL(nf_unregister_hooks);
-unsigned int nf_iterate(struct list_head *head,
- struct sk_buff *skb,
+unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_ops **elemp)
+ struct nf_hook_entry **elemp)
{
unsigned int verdict;
@@ -264,20 +297,20 @@ unsigned int nf_iterate(struct list_head *head,
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
- list_for_each_entry_continue_rcu((*elemp), head, list) {
- if (state->thresh > (*elemp)->priority)
+ while (*elemp) {
+ if (state->thresh > (*elemp)->ops.priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook((*elemp)->priv, skb, state);
+ verdict = (*elemp)->ops.hook((*elemp)->ops.priv, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
- (*elemp)->hook, state->hook);
+ (*elemp)->ops.hook, state->hook);
continue;
}
#endif
@@ -285,6 +318,7 @@ repeat:
return verdict;
goto repeat;
}
+ *elemp = (*elemp)->next;
}
return NF_ACCEPT;
}
@@ -295,13 +329,13 @@ repeat:
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
- struct nf_hook_ops *elem;
+ struct nf_hook_entry *elem;
unsigned int verdict;
int ret = 0;
- elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
+ elem = state->hook_list;
next_hook:
- verdict = nf_iterate(state->hook_list, skb, state, &elem);
+ verdict = nf_iterate(skb, state, &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -310,8 +344,10 @@ next_hook:
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- int err = nf_queue(skb, elem, state,
- verdict >> NF_VERDICT_QBITS);
+ int err;
+
+ state->hook_list = elem;
+ err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -438,7 +474,7 @@ static int __net_init netfilter_net_init(struct net *net)
for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+ RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
}
#ifdef CONFIG_PROC_FS
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0655225..6c0846d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,13 +13,13 @@
/* core.c */
-unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
- struct nf_hook_state *state, struct nf_hook_ops **elemp);
+unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
+ struct nf_hook_entry **elemp);
/* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
- struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *e);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b19ad20..0cb131b 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *e)
{
const struct nf_queue_handler *qh;
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
- qh->nf_hook_drop(net, ops);
+ qh->nf_hook_drop(net, e);
rcu_read_unlock();
}
@@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
* through nf_reinject().
*/
int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
struct nf_hook_state *state,
unsigned int queuenum)
{
@@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb,
*entry = (struct nf_queue_entry) {
.skb = skb,
- .elem = elem,
.state = *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -166,7 +164,8 @@ err:
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct sk_buff *skb = entry->skb;
- struct nf_hook_ops *elem = entry->elem;
+ struct nf_hook_entry *e = entry->state.hook_list;
+ struct nf_hook_ops *elem = &e->ops;
const struct nf_afinfo *afinfo;
int err;
@@ -186,8 +185,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT) {
next_hook:
- verdict = nf_iterate(entry->state.hook_list,
- skb, &entry->state, &elem);
+ verdict = nf_iterate(skb, &entry->state, &e);
}
switch (verdict & NF_VERDICT_MASK) {
@@ -198,7 +196,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, elem, &entry->state,
+ entry->state.hook_list = e;
+ err = nf_queue(skb, &entry->state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ESRCH &&
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 216bf69..60d3e51 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -919,10 +919,11 @@ static struct notifier_block nfqnl_dev_notifier = {
static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
{
- return entry->elem == (struct nf_hook_ops *)ops_ptr;
+ return entry->state.hook_list == (struct nf_hook_entry *)ops_ptr;
}
-static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+static void nfqnl_nf_hook_drop(struct net *net,
+ const struct nf_hook_entry *hook)
{
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int i;
--
2.5.5
^ permalink raw reply related [flat|nested] 7+ messages in thread