All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race
@ 2016-05-03 11:48 Pablo Neira Ayuso
  2016-05-03 12:14 ` Florian Westphal
  0 siblings, 1 reply; 4+ messages in thread
From: Pablo Neira Ayuso @ 2016-05-03 11:48 UTC (permalink / raw)
  To: netfilter-devel

This patch introduces nf_ct_resolve_clash() to resolve race condition on
conntrack insertions.

This is particularly a problem for connection-less protocols such as
UDP, with no initial handshake. Two or more packets may race to insert
the entry resulting in packet drops.

Another problematic scenario are packets enqueued to userspace via
NFQUEUE after the raw table, that make it easier to trigger this
race.

To resolve this, the idea is to reset the conntrack entry to the one
that won race. Packet and bytes counters are also merged.

The 'insert_failed' stats still accounts for this situation, after
this patch, the drop counter is bumped whenever we drop packets, so we
can watch for unresolved clashes.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v2: drop refcount of the old conntrack entry, otherwise we leak this.
    Call nf_ct_add_to_dying_list() before clash resolution.

 include/net/netfilter/nf_conntrack_l4proto.h |  3 ++
 net/netfilter/nf_conntrack_core.c            | 46 +++++++++++++++++++++++++++-
 net/netfilter/nf_conntrack_proto_udp.c       |  2 ++
 net/netfilter/nf_conntrack_proto_udplite.c   |  2 ++
 4 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 956d8a6..1a5fb36 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -23,6 +23,9 @@ struct nf_conntrack_l4proto {
 	/* L4 Protocol number. */
 	u_int8_t l4proto;
 
+	/* Resolve clashes on insertion races. */
+	bool allow_clash;
+
 	/* Try to fill in the third arg: dataoff is offset past network protocol
            hdr.  Return true if possible. */
 	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8462b54..bb6d837 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -616,6 +616,48 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
 	}
 }
 
+static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			     const struct nf_conn *old_ct)
+{
+	struct nf_conn_acct *acct;
+
+	acct = nf_conn_acct_find(old_ct);
+	if (acct) {
+		struct nf_conn_counter *counter = acct->counter;
+		enum ip_conntrack_info ctinfo;
+		unsigned int bytes;
+
+		/* u32 should be fine since we must have seen one packet. */
+		bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+		nf_ct_acct_update(ct, ctinfo, bytes);
+	}
+}
+
+/* Resolve race on insertion if this protocol allows this. */
+static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
+			       struct nf_conn *old_ct,
+			       enum ip_conntrack_info ctinfo,
+			       struct nf_conntrack_tuple_hash *h)
+{
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+	struct nf_conntrack_l4proto *l4proto;
+
+	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	if (l4proto->allow_clash &&
+	    !nf_ct_is_dying(ct) &&
+	    atomic_inc_not_zero(&ct->ct_general.use)) {
+		/* Don't modify skb->nfctinfo, we're at POSTROUTING so this
+		 * packet is already leaving our framework, it is too late.
+		 */
+		skb->nfct = &ct->ct_general;
+		nf_ct_acct_merge(ct, ctinfo, old_ct);
+		nf_ct_put(old_ct);
+		return NF_ACCEPT;
+	}
+	NF_CT_STAT_INC(net, drop);
+	return NF_DROP;
+}
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
@@ -630,6 +672,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
 	unsigned int sequence;
+	int ret;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	net = nf_ct_net(ct);
@@ -727,10 +770,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 
 out:
 	nf_ct_add_to_dying_list(ct);
+	ret = nf_ct_resolve_clash(net, skb, ct, ctinfo, h);
 	nf_conntrack_double_unlock(hash, reply_hash);
 	NF_CT_STAT_INC(net, insert_failed);
 	local_bh_enable();
-	return NF_DROP;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 478f92f..4fd0405 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
 	.name			= "udp",
+	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
 	.print_tuple		= udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDP,
 	.name			= "udp",
+	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
 	.print_tuple		= udp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1ac8ee1..9d692f5 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
 	.name			= "udplite",
+	.allow_clash		= true,
 	.pkt_to_tuple		= udplite_pkt_to_tuple,
 	.invert_tuple		= udplite_invert_tuple,
 	.print_tuple		= udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDPLITE,
 	.name			= "udplite",
+	.allow_clash		= true,
 	.pkt_to_tuple		= udplite_pkt_to_tuple,
 	.invert_tuple		= udplite_invert_tuple,
 	.print_tuple		= udplite_print_tuple,
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race
  2016-05-03 11:48 [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race Pablo Neira Ayuso
@ 2016-05-03 12:14 ` Florian Westphal
  2016-05-03 12:32   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 4+ messages in thread
From: Florian Westphal @ 2016-05-03 12:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> This patch introduces nf_ct_resolve_clash() to resolve race condition on
> conntrack insertions.
> 
> This is particularly a problem for connection-less protocols such as
> UDP, with no initial handshake. Two or more packets may race to insert
> the entry resulting in packet drops.
> 
> Another problematic scenario are packets enqueued to userspace via
> NFQUEUE after the raw table, that make it easier to trigger this
> race.
> 
> To resolve this, the idea is to reset the conntrack entry to the one
> that won race. Packet and bytes counters are also merged.
> 
> The 'insert_failed' stats still accounts for this situation, after
> this patch, the drop counter is bumped whenever we drop packets, so we
> can watch for unresolved clashes.
> 
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> ---
> v2: drop refcount of the old conntrack entry, otherwise we leak this.
>     Call nf_ct_add_to_dying_list() before clash resolution.
> +/* Resolve race on insertion if this protocol allows this. */
> +static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
> +			       struct nf_conn *old_ct,
> +			       enum ip_conntrack_info ctinfo,
> +			       struct nf_conntrack_tuple_hash *h)
> +{
> +	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
> +	struct nf_conntrack_l4proto *l4proto;
> +
> +	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
> +	if (l4proto->allow_clash &&
> +	    !nf_ct_is_dying(ct) &&
> +	    atomic_inc_not_zero(&ct->ct_general.use)) {

I found this confusing, perhaps add small one-liner comment that
*ct is in fact ct already in the table, not the one that was attached
to skb->nfct (perhaps I just need more coffee, sorry).

> +		/* Don't modify skb->nfctinfo, we're at POSTROUTING so this
> +		 * packet is already leaving our framework, it is too late.
> +		 */

Note that this might be loopback in which case this skb will
reappear on PREROUTING.

> +		skb->nfct = &ct->ct_general;
> +		nf_ct_acct_merge(ct, ctinfo, old_ct);
> +		nf_ct_put(old_ct);

Perhaps it would be better to not have old_ct and instead
nf_conntrack_put(skb->nfct);
skb->nfct = &ct->ct_general;

?

> +	int ret;
>  
>  	ct = nf_ct_get(skb, &ctinfo);
>  	net = nf_ct_net(ct);
> @@ -727,10 +770,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
>  
>  out:
>  	nf_ct_add_to_dying_list(ct);
> +	ret = nf_ct_resolve_clash(net, skb, ct, ctinfo, h);

Is this safe?
Seems we jump to out label in other cases as well, not
just for clashes.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race
  2016-05-03 12:14 ` Florian Westphal
@ 2016-05-03 12:32   ` Pablo Neira Ayuso
  2016-05-03 14:29     ` Florian Westphal
  0 siblings, 1 reply; 4+ messages in thread
From: Pablo Neira Ayuso @ 2016-05-03 12:32 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, May 03, 2016 at 02:14:26PM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > This patch introduces nf_ct_resolve_clash() to resolve race condition on
> > conntrack insertions.
> > 
> > This is particularly a problem for connection-less protocols such as
> > UDP, with no initial handshake. Two or more packets may race to insert
> > the entry resulting in packet drops.
> > 
> > Another problematic scenario are packets enqueued to userspace via
> > NFQUEUE after the raw table, that make it easier to trigger this
> > race.
> > 
> > To resolve this, the idea is to reset the conntrack entry to the one
> > that won race. Packet and bytes counters are also merged.
> > 
> > The 'insert_failed' stats still accounts for this situation, after
> > this patch, the drop counter is bumped whenever we drop packets, so we
> > can watch for unresolved clashes.
> > 
> > Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> > ---
> > v2: drop refcount of the old conntrack entry, otherwise we leak this.
> >     Call nf_ct_add_to_dying_list() before clash resolution.
> > +/* Resolve race on insertion if this protocol allows this. */
> > +static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
> > +			       struct nf_conn *old_ct,
> > +			       enum ip_conntrack_info ctinfo,
> > +			       struct nf_conntrack_tuple_hash *h)
> > +{
> > +	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
> > +	struct nf_conntrack_l4proto *l4proto;
> > +
> > +	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
> > +	if (l4proto->allow_clash &&
> > +	    !nf_ct_is_dying(ct) &&
> > +	    atomic_inc_not_zero(&ct->ct_general.use)) {
> 
> I found this confusing, perhaps add small one-liner comment that
> *ct is in fact ct already in the table, not the one that was attached
> to skb->nfct (perhaps I just need more coffee, sorry).

OK, will add this.

> > +		/* Don't modify skb->nfctinfo, we're at POSTROUTING so this
> > +		 * packet is already leaving our framework, it is too late.
> > +		 */
> 
> Note that this might be loopback in which case this skb will
> reappear on PREROUTING.

The comment intention is that we probably already applied a filtering
decision, so changing the ctinfo here seems awkward to me.

In NFQUEUE, this packet may have spent quite a bit of time so it may
even get a different ctinfo if we reevaluate, but as I said, having
packets changing its original ctinfo is...

> > +		skb->nfct = &ct->ct_general;
> > +		nf_ct_acct_merge(ct, ctinfo, old_ct);
> > +		nf_ct_put(old_ct);
> 
> Perhaps it would be better to not have old_ct and instead
> nf_conntrack_put(skb->nfct);
> skb->nfct = &ct->ct_general;
> 
> ?

I can use this if you find it more readable, no problem.

> > +	int ret;
> >  
> >  	ct = nf_ct_get(skb, &ctinfo);
> >  	net = nf_ct_net(ct);
> > @@ -727,10 +770,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
> >  
> >  out:
> >  	nf_ct_add_to_dying_list(ct);
> > +	ret = nf_ct_resolve_clash(net, skb, ct, ctinfo, h);
> 
> Is this safe?
> Seems we jump to out label in other cases as well, not
> just for clashes.

We're jumping out for dying conntracks too, and clash is handling this
already so I considered not adding more code. I could just run
nf_ct_resolve_clash() iff !nf_ct_dying() but I don't see much of a
benefit on this.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race
  2016-05-03 12:32   ` Pablo Neira Ayuso
@ 2016-05-03 14:29     ` Florian Westphal
  0 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2016-05-03 14:29 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > +		/* Don't modify skb->nfctinfo, we're at POSTROUTING so this
> > > +		 * packet is already leaving our framework, it is too late.
> > > +		 */
> > 
> > Note that this might be loopback in which case this skb will
> > reappear on PREROUTING.
> 
> The comment intention is that we probably already applied a filtering
> decision, so changing the ctinfo here seems awkward to me.

Hmm, but we did not drop the packet, else we would not have ended up in
_confirm().

> In NFQUEUE, this packet may have spent quite a bit of time so it may
> even get a different ctinfo if we reevaluate, but as I said, having
> packets changing its original ctinfo is...

OK, fair enough -- I just wanted to point out that for loopback clashes
we can end up with ->nfct neing set to the "old" one (already in hash
table) while ctinfo is the "new" one (from the ->nfct we tossed since
would could not add it to the table).

But I see that there is no good argument to chose one over the other.

So I'm fine with current version, sorry for the confusion.

> > > +		skb->nfct = &ct->ct_general;
> > > +		nf_ct_acct_merge(ct, ctinfo, old_ct);
> > > +		nf_ct_put(old_ct);
> > 
> > Perhaps it would be better to not have old_ct and instead
> > nf_conntrack_put(skb->nfct);
> > skb->nfct = &ct->ct_general;
> > 
> > ?
> 
> I can use this if you find it more readable, no problem.

Thanks!

> > > +	int ret;
> > >  
> > >  	ct = nf_ct_get(skb, &ctinfo);
> > >  	net = nf_ct_net(ct);
> > > @@ -727,10 +770,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
> > >  
> > >  out:
> > >  	nf_ct_add_to_dying_list(ct);
> > > +	ret = nf_ct_resolve_clash(net, skb, ct, ctinfo, h);
> > 
> > Is this safe?
> > Seems we jump to out label in other cases as well, not
> > just for clashes.
> 
> We're jumping out for dying conntracks too, and clash is handling this
> already so I considered not adding more code. I could just run
> nf_ct_resolve_clash() iff !nf_ct_dying() but I don't see much of a
> benefit on this.

I missed the nf_ct_dying test, that should indeed avoid operating on
*h garbage.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-05-03 14:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-03 11:48 [PATCH nf-next 3/4,v2] netfilter: conntrack: introduce clash resolution on insertion race Pablo Neira Ayuso
2016-05-03 12:14 ` Florian Westphal
2016-05-03 12:32   ` Pablo Neira Ayuso
2016-05-03 14:29     ` Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.