All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] inetpeer: Support ipv6 addresses.
@ 2010-03-28  3:31 David Miller
  2010-03-28  4:06 ` David Miller
  2010-03-28  8:22 ` Herbert Xu
  0 siblings, 2 replies; 14+ messages in thread
From: David Miller @ 2010-03-28  3:31 UTC (permalink / raw)
  To: netdev; +Cc: herbert


inetpeer: Support ipv6 addresses.

Signed-off-by: David S. Miller <davem@davemloft.net>

---

This a first step based upon the talks we were having the other week
about potentially moving the dst metrics out into the inetpeer cache.

The next step would be to move the peer pointer into dst_entry (or
some common encapsulator datastructure that ipv4 and ipv6 could
share, inet_dst_entry or something like that), and provide the
necessary rt6_bind_peer() function for ipv6.  A small set of changes
could then add timewait recycling support to ipv6 essentially for
free (commonize code, provide some wrapper for route type specific
code).

The limited dst metric usage in DecNET could then be moved into
the DecNET route entry struct.  This way we don't have to support
DecNET in the inetpeer cache just for the sake of it's metrics. :-)

Finally, we can really move the metrics array into the inetpeer
entries.

How does this sound?

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 87b1df0..2924302 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -13,10 +13,18 @@
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
 
+typedef struct {
+	union {
+		__be32		a4;
+		__be32		a6[4];
+	};
+	__u16	family;
+} inet_peer_address_t;
+
 struct inet_peer {
 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
 	struct inet_peer	*avl_left, *avl_right;
-	__be32			v4daddr;	/* peer's address */
+	inet_peer_address_t	daddr;
 	__u32			avl_height;
 	struct list_head	unused;
 	__u32			dtime;		/* the time of last use of not
@@ -31,7 +39,7 @@ struct inet_peer {
 void			inet_initpeers(void) __init;
 
 /* can be called with or without local BH being disabled */
-struct inet_peer	*inet_getpeer(__be32 daddr, int create);
+struct inet_peer	*inet_getpeer(inet_peer_address_t *daddr, int create);
 
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52..87066eb 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
 #include <linux/net.h>
 #include <net/ip.h>
 #include <net/inetpeer.h>
+#include <net/ipv6.h>
 
 /*
  *  Theory of operations.
@@ -136,6 +137,47 @@ static void unlink_from_unused(struct inet_peer *p)
 	spin_unlock_bh(&inet_peer_unused_lock);
 }
 
+static inline bool inet_peer_addr_equal(inet_peer_address_t *a, inet_peer_address_t *b)
+{
+	if (a->family == b->family) {
+		switch (a->family) {
+		case AF_INET:
+			if (a->a4 == b->a4)
+				return true;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_cmp((struct in6_addr *)a,
+					   (struct in6_addr *)b))
+				return true;
+			break;
+		default:
+			break;
+		}
+	}
+	return false;
+}
+
+static inline u32 inet_peer_key(inet_peer_address_t *a)
+{
+	u32 key;
+
+	switch (a->family) {
+	case AF_INET:
+		key = (__force __u32) a->a4;
+		break;
+	case AF_INET6:
+		key = ((__force __u32)a->a6[0] ^
+		       (__force __u32)a->a6[1] ^
+		       (__force __u32)a->a6[2] ^
+		       (__force __u32)a->a6[3]);
+		break;
+	default:
+		key = 0;
+		break;
+	}
+	return key;
+}
+
 /*
  * Called with local BH disabled and the pool lock held.
  * _stack is known to be NULL or not at compile time,
@@ -143,15 +185,16 @@ static void unlink_from_unused(struct inet_peer *p)
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
+	u32 key = inet_peer_key(_daddr);			\
 	struct inet_peer *u, **v;				\
 	if (_stack != NULL) {					\
 		stackptr = _stack;				\
 		*stackptr++ = &peer_root;			\
 	}							\
 	for (u = peer_root; u != peer_avl_empty; ) {		\
-		if (_daddr == u->v4daddr)			\
+		if (inet_peer_addr_equal(_daddr, &u->daddr))	\
 			break;					\
-		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
+		if (key < inet_peer_key(&u->daddr))		\
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
@@ -280,7 +323,7 @@ static void unlink_from_pool(struct inet_peer *p)
 	if (atomic_read(&p->refcnt) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
-		if (lookup(p->v4daddr, stack) != p)
+		if (lookup(&p->daddr, stack) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty) {
@@ -292,7 +335,7 @@ static void unlink_from_pool(struct inet_peer *p)
 			t = lookup_rightempty(p);
 			BUG_ON(*stackptr[-1] != t);
 			**--stackptr = t->avl_left;
-			/* t is removed, t->v4daddr > x->v4daddr for any
+			/* t is removed, t->daddr > x->daddr for any
 			 * x in p->avl_left subtree.
 			 * Put t in the old place of p. */
 			*delp[0] = t;
@@ -358,7 +401,7 @@ static int cleanup_once(unsigned long ttl)
 }
 
 /* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(__be32 daddr, int create)
+struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 {
 	struct inet_peer *p, *n;
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
@@ -384,10 +427,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
 	if (n == NULL)
 		return NULL;
-	n->v4daddr = daddr;
+	n->daddr = *daddr;
 	atomic_set(&n->refcnt, 1);
 	atomic_set(&n->rid, 0);
-	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
+	if (daddr->family == AF_INET)
+		atomic_set(&n->ip_id_count, secure_ip_id(daddr->a4));
 	n->tcp_ts_stamp = 0;
 
 	write_lock_bh(&peer_pool_lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b59430b..681c8b9 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -142,8 +142,14 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 	qp->saddr = arg->iph->saddr;
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
-	qp->peer = sysctl_ipfrag_max_dist ?
-		inet_getpeer(arg->iph->saddr, 1) : NULL;
+	if (sysctl_ipfrag_max_dist) {
+		inet_peer_address_t addr;
+
+		addr.a4 = arg->iph->saddr;
+		addr.family = AF_INET;
+		qp->peer = inet_getpeer(&addr, 1);
+	} else
+		qp->peer = NULL;
 }
 
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 32d3961..9334e29 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1287,9 +1287,12 @@ skip_hashing:
 void rt_bind_peer(struct rtable *rt, int create)
 {
 	static DEFINE_SPINLOCK(rt_peer_lock);
+	inet_peer_address_t addr;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer(rt->rt_dst, create);
+	addr.a4 = rt->rt_dst;
+	addr.family = AF_INET;
+	peer = inet_getpeer(&addr, create);
 
 	spin_lock_bh(&rt_peer_lock);
 	if (rt->peer == NULL) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f4df5f9..9de6a12 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1350,7 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    tcp_death_row.sysctl_tw_recycle &&
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
-		    peer->v4daddr == saddr) {
+		    peer->daddr.a4 == saddr) {
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
@@ -1770,7 +1770,11 @@ int tcp_v4_remember_stamp(struct sock *sk)
 	int release_it = 0;
 
 	if (!rt || rt->rt_dst != inet->inet_daddr) {
-		peer = inet_getpeer(inet->inet_daddr, 1);
+		inet_peer_address_t addr;
+
+		addr.a4 = inet->inet_daddr;
+		addr.family = AF_INET;
+		peer = inet_getpeer(&addr, 1);
 		release_it = 1;
 	} else {
 		if (!rt->peer)
@@ -1795,8 +1799,12 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
-	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
+	inet_peer_address_t addr;
+	struct inet_peer *peer;
 
+	addr.a4 = tw->tw_daddr;
+	addr.family = AF_INET;
+	peer = inet_getpeer(&addr, 1);
 	if (peer) {
 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28  3:31 [PATCH RFC] inetpeer: Support ipv6 addresses David Miller
@ 2010-03-28  4:06 ` David Miller
  2010-03-28  8:22 ` Herbert Xu
  1 sibling, 0 replies; 14+ messages in thread
From: David Miller @ 2010-03-28  4:06 UTC (permalink / raw)
  To: netdev; +Cc: herbert

From: David Miller <davem@davemloft.net>
Date: Sat, 27 Mar 2010 20:31:20 -0700 (PDT)

> The limited dst metric usage in DecNET could then be moved into
> the DecNET route entry struct.  This way we don't have to support
> DecNET in the inetpeer cache just for the sake of it's metrics. :-)

And this is what that patch would look like:

decnet: Move route metrics into struct dn_route

This way we can move ipv4/ipv6 metrics into the inetpeer cache.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_route.h |   23 +++++++++++++++++++++++
 net/decnet/af_decnet.c |    4 ++--
 net/decnet/dn_route.c  |   35 ++++++++++++++++++-----------------
 3 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 60c9f22..62b04db 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -80,8 +80,31 @@ struct dn_route {
 
 	unsigned rt_flags;
 	unsigned rt_type;
+
+	u32 metrics[RTAX_MAX];
 };
 
+static inline u32 dn_dst_metric(const struct dst_entry *dst, int metric)
+{
+	struct dn_route *rt = (struct dn_route *) dst;
+	return rt->metrics[metric - 1];
+}
+
+static inline int dn_dst_metric_locked(const struct dst_entry *dst, int metric)
+{
+	return dn_dst_metric(dst, RTAX_LOCK) & (1 << metric);
+}
+
+static inline u32 dn_dst_mtu(const struct dst_entry *dst)
+{
+	u32 mtu = dn_dst_metric(dst, RTAX_MTU);
+	/*
+	 * Alexey put it here, so ask him about it :)
+	 */
+	barrier();
+	return mtu;
+}
+
 extern void dn_route_init(void);
 extern void dn_route_cleanup(void);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2b494fa..83228a2 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -829,7 +829,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 		return -EINVAL;
 
 	scp->state = DN_CC;
-	scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
+	scp->segsize_loc = dn_dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
 	dn_send_conn_conf(sk, allocation);
 
 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
@@ -958,7 +958,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	sk->sk_route_caps = sk->sk_dst_cache->dev->features;
 	sock->state = SS_CONNECTING;
 	scp->state = DN_CI;
-	scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS);
+	scp->segsize_loc = dn_dst_metric(sk->sk_dst_cache, RTAX_ADVMSS);
 
 	dn_nsp_send_conninit(sk, NSP_CI);
 	err = -EINPROGRESS;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a7bf03c..ff62012 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -234,15 +234,16 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 	else
 		min_mtu -= 21;
 
-	if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
-		if (!(dst_metric_locked(dst, RTAX_MTU))) {
-			dst->metrics[RTAX_MTU-1] = mtu;
+	if (dn_dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
+		struct dn_route *rt = (struct dn_route *) dst;
+		if (!(dn_dst_metric_locked(dst, RTAX_MTU))) {
+			rt->metrics[RTAX_MTU-1] = mtu;
 			dst_set_expires(dst, dn_rt_mtu_expires);
 		}
-		if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
+		if (!(dn_dst_metric_locked(dst, RTAX_ADVMSS))) {
 			u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
-			if (dst_metric(dst, RTAX_ADVMSS) > mss)
-				dst->metrics[RTAX_ADVMSS-1] = mss;
+			if (dn_dst_metric(dst, RTAX_ADVMSS) > mss)
+				rt->metrics[RTAX_ADVMSS-1] = mss;
 		}
 	}
 }
@@ -788,8 +789,8 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->metrics, fi->fib_metrics,
+		       sizeof(rt->metrics));
 	}
 	rt->rt_type = res->type;
 
@@ -800,13 +801,13 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		rt->u.dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
-		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	if (dn_dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
+	    dn_dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
+		rt->metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dn_dst_mtu(&rt->u.dst));
+	if (dn_dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
+	    dn_dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
+		rt->metrics[RTAX_ADVMSS-1] = mss;
 	return 0;
 }
 
@@ -1485,7 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->metrics) < 0)
 		goto rtattr_failure;
 	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
@@ -1712,7 +1713,7 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
 			atomic_read(&rt->u.dst.__refcnt),
 			rt->u.dst.__use,
-			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+			(int) dn_dst_metric(&rt->u.dst, RTAX_RTT));
 	return 0;
 }
 
-- 
1.7.0.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28  3:31 [PATCH RFC] inetpeer: Support ipv6 addresses David Miller
  2010-03-28  4:06 ` David Miller
@ 2010-03-28  8:22 ` Herbert Xu
  2010-03-28 12:53   ` David Miller
  1 sibling, 1 reply; 14+ messages in thread
From: Herbert Xu @ 2010-03-28  8:22 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sat, Mar 27, 2010 at 08:31:20PM -0700, David Miller wrote:
>
> Finally, we can really move the metrics array into the inetpeer
> entries.

My main question is how do we deal with source-address policy
routing in a host cache?

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28  8:22 ` Herbert Xu
@ 2010-03-28 12:53   ` David Miller
  2010-03-28 13:11     ` Herbert Xu
  0 siblings, 1 reply; 14+ messages in thread
From: David Miller @ 2010-03-28 12:53 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 Mar 2010 16:22:50 +0800

> On Sat, Mar 27, 2010 at 08:31:20PM -0700, David Miller wrote:
>>
>> Finally, we can really move the metrics array into the inetpeer
>> entries.
> 
> My main question is how do we deal with source-address policy
> routing in a host cache?

We don't, the same like how we don't handle fully specified
IPSEC policies deciding upon the route.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 12:53   ` David Miller
@ 2010-03-28 13:11     ` Herbert Xu
  2010-03-28 13:40       ` David Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Herbert Xu @ 2010-03-28 13:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, Mar 28, 2010 at 05:53:12AM -0700, David Miller wrote:
> From: Herbert Xu <herbert@gondor.apana.org.au>
> Date: Sun, 28 Mar 2010 16:22:50 +0800
>
> > My main question is how do we deal with source-address policy
> > routing in a host cache?
> 
> We don't, the same like how we don't handle fully specified
> IPSEC policies deciding upon the route.

I thought we did handle source-address policy routing for PMTU
messages at least.  I just checked ip_rt_frag_needed and it does

		if (rth->fl.fl4_dst != daddr ||
		    rth->fl.fl4_src != skeys[i] ||
		    rth->rt_dst != daddr ||
		    rth->rt_src != iph->saddr ||
		    rth->fl.oif != ikeys[k] ||
		    rth->fl.iif != 0 ||
		    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
		    !net_eq(dev_net(rth->u.dst.dev), net) ||
		    rt_is_expired(rth))
			continue;

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:11     ` Herbert Xu
@ 2010-03-28 13:40       ` David Miller
  2010-03-28 13:59         ` Herbert Xu
  0 siblings, 1 reply; 14+ messages in thread
From: David Miller @ 2010-03-28 13:40 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 Mar 2010 21:11:12 +0800

> On Sun, Mar 28, 2010 at 05:53:12AM -0700, David Miller wrote:
>> From: Herbert Xu <herbert@gondor.apana.org.au>
>> Date: Sun, 28 Mar 2010 16:22:50 +0800
>>
>> > My main question is how do we deal with source-address policy
>> > routing in a host cache?
>> 
>> We don't, the same like how we don't handle fully specified
>> IPSEC policies deciding upon the route.
> 
> I thought we did handle source-address policy routing for PMTU
> messages at least.  I just checked ip_rt_frag_needed and it does

Same for all the other metrics at the TCP level.

I guess this is the decision we have to make, what does
"host level" metrics mean for us if we decide to move
things into the inetpeer cache.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:40       ` David Miller
@ 2010-03-28 13:59         ` Herbert Xu
  2010-03-28 14:32           ` Herbert Xu
                             ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Herbert Xu @ 2010-03-28 13:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, Mar 28, 2010 at 06:40:12AM -0700, David Miller wrote:
>
> Same for all the other metrics at the TCP level.

I don't think they are quite the same.  The TCP time stamp is
an attribute of the destination host, it doesn't vary depending
on which route you take to reach the host.  The MTU on the other
hand is an attribute of the route that reaches the host.

BTW, it appears that the inetpeer cache doesn't take namespaces
into account.  This means that information could potentially leak
from one namespace into another.  I'm not sure whether that's a
big deal or not but it's something for the namespaces folks to
consider.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:59         ` Herbert Xu
@ 2010-03-28 14:32           ` Herbert Xu
  2010-03-29 15:08             ` Herbert Xu
  2010-03-29 22:15             ` David Miller
  2010-03-29 22:21           ` David Miller
                             ` (2 subsequent siblings)
  3 siblings, 2 replies; 14+ messages in thread
From: Herbert Xu @ 2010-03-28 14:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, Mar 28, 2010 at 09:59:31PM +0800, Herbert Xu wrote:
> 
> I don't think they are quite the same.  The TCP time stamp is
> an attribute of the destination host, it doesn't vary depending
> on which route you take to reach the host.  The MTU on the other
> hand is an attribute of the route that reaches the host.

Here's a convoluted scheme that I just thought up as a compromise
between your wish to expel the metrics and maintaining the policy
routing semantics.

Instead of placing the metrics into the inetpeer, we create a new
global cache for them (let's call it the metrics cache for now).
However, we don't actually populate this cache every time we create
an rt object.  Instead, we only create an entry in this cache
when an event requires it, e.g., when we receive a PMTU message.

In order for this to then propagate to the rt objects, we increment
a genid in the inetpeer cache for the corresponding host.  This
genid is then checked by the rt object every time.  When it is
out of step, the rt object can perform a lookup in the metrics cache
to get the latest data.

Of course once an rt object has a pointer to an entry in the metrics
cache it doesn't need to check the genid anymore, until the metrics
data expires at which point this process can be repeated.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 14:32           ` Herbert Xu
@ 2010-03-29 15:08             ` Herbert Xu
  2010-03-29 22:15             ` David Miller
  1 sibling, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2010-03-29 15:08 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Eric Dumazet

On Sun, Mar 28, 2010 at 10:32:35PM +0800, Herbert Xu wrote:
> 
> Here's a convoluted scheme that I just thought up as a compromise
> between your wish to expel the metrics and maintaining the policy
> routing semantics.

I think the bigger question is are we now ready for a per-cpu
route cache?

The route cache is one of the few remaining global structures
that is standing in the way of full multicore scalability.

Or is there another way of solving this?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 14:32           ` Herbert Xu
  2010-03-29 15:08             ` Herbert Xu
@ 2010-03-29 22:15             ` David Miller
  2010-03-30 12:34               ` Herbert Xu
  1 sibling, 1 reply; 14+ messages in thread
From: David Miller @ 2010-03-29 22:15 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 Mar 2010 22:32:35 +0800

> Instead of placing the metrics into the inetpeer, we create a new
> global cache for them (let's call it the metrics cache for now).
> However, we don't actually populate this cache every time we create
> an rt object.  Instead, we only create an entry in this cache
> when an event requires it, e.g., when we receive a PMTU message.
> 
> In order for this to then propagate to the rt objects, we increment
> a genid in the inetpeer cache for the corresponding host.  This
> genid is then checked by the rt object every time.  When it is
> out of step, the rt object can perform a lookup in the metrics cache
> to get the latest data.
> 
> Of course once an rt object has a pointer to an entry in the metrics
> cache it doesn't need to check the genid anymore, until the metrics
> data expires at which point this process can be repeated.

Interesting idea, but there is the issue of how to fill in
new metrics cache entries when these requests come in later.

We'd have to retain a pointer to the routing table fib entry.
This is because the fib entry states what the initial metric
values need to be for cached routes.

So we'd need a pointer to the fib_info in the routing cache entry, and
this pointer would need to grab a reference to the fib_info.  And this
subsequently leads to the question of what to do for route changes
(f.e. hold the fib_info around until all the route cache entries drop
their references and have a dead state in the fib_info struct that can
be checked, and if we find it dead what can we even do as the route
we're working with might be cached in a socket or similar)

The other option is to relookup the FIB, but we'd then have to
validate that the route cache entry we're working with matches
precisely still, and also this lookup alone going to have non-trivial
cost :-)

It's really depressing how hard it is to untangle the way we have
things currently setup, isn't it. :-)


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:59         ` Herbert Xu
  2010-03-28 14:32           ` Herbert Xu
@ 2010-03-29 22:21           ` David Miller
  2010-03-31  5:43           ` Eric W. Biederman
  2010-03-31  5:44           ` Eric W. Biederman
  3 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2010-03-29 22:21 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 Mar 2010 21:59:31 +0800

> On Sun, Mar 28, 2010 at 06:40:12AM -0700, David Miller wrote:
>>
>> Same for all the other metrics at the TCP level.
> 
> I don't think they are quite the same.  The TCP time stamp is
> an attribute of the destination host, it doesn't vary depending
> on which route you take to reach the host.  The MTU on the other
> hand is an attribute of the route that reaches the host.

It does make a difference, I think.

When we use IPSEC rules on ports and crazy stuff like that,
we end up with cases such as:

1) We're going over a VPN so RTT, RTTVAR, SSTHRESH, CWND, and other
   TCP metrics which are based upon aspects of the path can end up
   being wildly different.

2) even the end host can be different in some convoluted
   setups

IPSEC encapsulation can effectively change the entire universe in fact
:-) Also, even considering only case #1 above, that's nearly half of
the metrics which we arguably can't move into something like the
inetpeer cache.

This is basically why I've been resistent in the past to these kinds
of ideas to simplify metric handling, as it has the potential to break
something.

The gains of being able to pull this off are still enticing which
is why this topic keeps getting revisited nonetheless :-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-29 22:15             ` David Miller
@ 2010-03-30 12:34               ` Herbert Xu
  0 siblings, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2010-03-30 12:34 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Mon, Mar 29, 2010 at 03:15:39PM -0700, David Miller wrote:
>
> Interesting idea, but there is the issue of how to fill in
> new metrics cache entries when these requests come in later.

You just perform an rt lookup as usual.  That'll give you the
metrics either from the route cache, or from scratch if we get
a cache miss.

> We'd have to retain a pointer to the routing table fib entry.
> This is because the fib entry states what the initial metric
> values need to be for cached routes.

We can keep the static metrics in the dst/rt entry.  For me
shrinking the dst entry isn't such a big deal, but avoiding touching
global state when constructing a new rt entry is the deal-breaker.

But if you're really into dieting then we can have that fib
pointer :)

> So we'd need a pointer to the fib_info in the routing cache entry, and
> this pointer would need to grab a reference to the fib_info.  And this
> subsequently leads to the question of what to do for route changes
> (f.e. hold the fib_info around until all the route cache entries drop
> their references and have a dead state in the fib_info struct that can
> be checked, and if we find it dead what can we even do as the route
> we're working with might be cached in a socket or similar)

AFAIK on route changes we always flush the route cache.

> The other option is to relookup the FIB, but we'd then have to
> validate that the route cache entry we're working with matches
> precisely still, and also this lookup alone going to have non-trivial
> cost :-)
> 
> It's really depressing how hard it is to untangle the way we have
> things currently setup, isn't it. :-)

Yeah it's like the Gordian Knot :)
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:59         ` Herbert Xu
  2010-03-28 14:32           ` Herbert Xu
  2010-03-29 22:21           ` David Miller
@ 2010-03-31  5:43           ` Eric W. Biederman
  2010-03-31  5:44           ` Eric W. Biederman
  3 siblings, 0 replies; 14+ messages in thread
From: Eric W. Biederman @ 2010-03-31  5:43 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David Miller, netdev

Herbert Xu <herbert@gondor.apana.org.au> writes:


> BTW, it appears that the inetpeer cache doesn't take namespaces
> into account.  This means that information could potentially leak
> from one namespace into another.  I'm not sure whether that's a
> big deal or not but it's something for the namespaces folks to
> consider.

Bother. I wrote a patch a while back, I even remember people
commenting on it, but it certainly doesn't look like anything
ever made it in.

I will see if I can make some time to dig that up and post
the patch.

Thanks for noticing,
Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] inetpeer: Support ipv6 addresses.
  2010-03-28 13:59         ` Herbert Xu
                             ` (2 preceding siblings ...)
  2010-03-31  5:43           ` Eric W. Biederman
@ 2010-03-31  5:44           ` Eric W. Biederman
  3 siblings, 0 replies; 14+ messages in thread
From: Eric W. Biederman @ 2010-03-31  5:44 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David Miller, netdev

Herbert Xu <herbert@gondor.apana.org.au> writes:


> BTW, it appears that the inetpeer cache doesn't take namespaces
> into account.  This means that information could potentially leak
> from one namespace into another.  I'm not sure whether that's a
> big deal or not but it's something for the namespaces folks to
> consider.

Bother. I wrote a patch a while back, I even remember people
commenting on it, but it certainly doesn't look like anything
ever made it in.

I will see if I can make some time to dig that up and post
the patch.

Thanks for noticing,
Eric

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-03-31  5:44 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-28  3:31 [PATCH RFC] inetpeer: Support ipv6 addresses David Miller
2010-03-28  4:06 ` David Miller
2010-03-28  8:22 ` Herbert Xu
2010-03-28 12:53   ` David Miller
2010-03-28 13:11     ` Herbert Xu
2010-03-28 13:40       ` David Miller
2010-03-28 13:59         ` Herbert Xu
2010-03-28 14:32           ` Herbert Xu
2010-03-29 15:08             ` Herbert Xu
2010-03-29 22:15             ` David Miller
2010-03-30 12:34               ` Herbert Xu
2010-03-29 22:21           ` David Miller
2010-03-31  5:43           ` Eric W. Biederman
2010-03-31  5:44           ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.