All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Fix routing metrics
@ 2012-02-02 10:11 Steffen Klassert
  2012-02-02 10:12 ` [PATCH 1/4] inetpeer: Allocate the peer metrics dynamically Steffen Klassert
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Steffen Klassert @ 2012-02-02 10:11 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

At the moment we initialize the routing metrics with the, on the inetpeer
cached values in rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics. This leads to the fact 
that we can't configure the mtu, hoplimit etc. if we have learned metrics 
cached. This patchset adds a possibility to invalidate and exchange the
cached inetpeer metrics. In detail it does:

1. Allocate the inetpeer metrics dynamically and add a reference
to the inetpeer.

2. Remove the direct reference of the inetpeer metrics from
dst->_metrics and access the metrics via the inetpeer reference
of the route. This makes it possible to exchange the inetpeer
metrics if they get invalidated.

3. Protect the inetpeer metrics with rcu.

4. Add a peer_genid to invalidate the metrics. When the peer_genid
is incremeted we allocate new metrics and exchange the metrics
pointer of the inetpeer.

For the case that such a approach is acceptable, I have another patch
to unify peer_genid and redirect_genid.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/4] inetpeer: Allocate the peer metrics dynamically
  2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
@ 2012-02-02 10:12 ` Steffen Klassert
  2012-02-02 10:12 ` [PATCH 2/4] net: Unlink the inetpeer metrics from dst_entry Steffen Klassert
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 22+ messages in thread
From: Steffen Klassert @ 2012-02-02 10:12 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

We do a dynamic allocation of the peer metrics in order
to be able to reset the peer metrics by exchanging a rcu
pointer. This is done with separate patches.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/inetpeer.h |   13 +++++++++++--
 net/ipv4/inetpeer.c    |   14 ++++++++++++--
 net/ipv4/route.c       |    7 ++++---
 net/ipv6/route.c       |    2 +-
 4 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 06b795d..6bb8060 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -27,13 +27,17 @@ struct inetpeer_addr {
 	__u16				family;
 };
 
+struct inetpeer_metrics {
+	u32		m[RTAX_MAX];
+};
+
 struct inet_peer {
 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
 	struct inet_peer __rcu	*avl_left, *avl_right;
 	struct inetpeer_addr	daddr;
 	__u32			avl_height;
 
-	u32			metrics[RTAX_MAX];
+	struct inetpeer_metrics *metrics;
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	int			redirect_genid;
 	unsigned long		rate_last;
@@ -68,7 +72,12 @@ void			inet_initpeers(void) __init;
 
 static inline bool inet_metrics_new(const struct inet_peer *p)
 {
-	return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW;
+	return p->metrics->m[RTAX_LOCK-1] == INETPEER_METRICS_NEW;
+}
+
+static inline u32 *inetpeer_metrics(const struct inet_peer *p)
+{
+	return p->metrics->m;
 }
 
 /* can be called with or without local BH being disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index bf4a9c4..92071a4 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -315,7 +315,9 @@ do {								\
 
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
-	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
+	struct inet_peer *p = container_of(head, struct inet_peer, rcu);
+	kfree(p->metrics);
+	kmem_cache_free(peer_cachep, p);
 }
 
 static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
@@ -434,6 +436,13 @@ relookup:
 	}
 	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
 	if (p) {
+		p->metrics = kmalloc(sizeof(struct inetpeer_metrics),
+				     GFP_ATOMIC);
+		if (!p->metrics) {
+			kmem_cache_free(peer_cachep, p);
+			goto out;
+		}
+
 		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
@@ -442,7 +451,7 @@ relookup:
 					secure_ip_id(daddr->addr.a4) :
 					secure_ipv6_id(daddr->addr.a6));
 		p->tcp_ts_stamp = 0;
-		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+		p->metrics->m[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
 		p->rate_last = 0;
 		p->pmtu_expires = 0;
@@ -455,6 +464,7 @@ relookup:
 		link_to_pool(p, base);
 		base->total++;
 	}
+out:
 	write_sequnlock_bh(&base->lock);
 
 	return p;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bcacf54..5099d35 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -169,7 +169,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 		u32 *old_p = __DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		p = peer->metrics;
+		p = inetpeer_metrics(peer);
 		if (inet_metrics_new(peer))
 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
@@ -1951,11 +1951,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 
 	rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
 	if (peer) {
+		u32 *peer_metrics = inetpeer_metrics(peer);
 		rt->rt_peer_genid = rt_peer_genid();
 		if (inet_metrics_new(peer))
-			memcpy(peer->metrics, fi->fib_metrics,
+			memcpy(peer_metrics, fi->fib_metrics,
 			       sizeof(u32) * RTAX_MAX);
-		dst_init_metrics(&rt->dst, peer->metrics, false);
+		dst_init_metrics(&rt->dst, peer_metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
 		if (peer->redirect_genid != redirect_genid)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8c2e3ab..ea13565 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -105,7 +105,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 		u32 *old_p = __DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		p = peer->metrics;
+		p = inetpeer_metrics(peer);
 		if (inet_metrics_new(peer))
 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/4] net: Unlink the inetpeer metrics from dst_entry
  2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
  2012-02-02 10:12 ` [PATCH 1/4] inetpeer: Allocate the peer metrics dynamically Steffen Klassert
@ 2012-02-02 10:12 ` Steffen Klassert
  2012-02-02 10:13 ` [PATCH 3/4] inetpeer: protect the inetpeerpeer metrics with rcu Steffen Klassert
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 22+ messages in thread
From: Steffen Klassert @ 2012-02-02 10:12 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

In order to be able to exchange the inetpeer metrics,
we remove the direct reference from the dst_entry. We
access them via a new dst_ops method 'metrics' and the
inet_peer pointer of the routes.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h         |   20 +++++++++++++-------
 include/net/dst_ops.h     |    1 +
 net/bridge/br_netfilter.c |    6 ++++++
 net/decnet/dn_route.c     |    1 +
 net/ipv4/route.c          |   32 +++++++++++++++++---------------
 net/ipv4/xfrm4_policy.c   |    1 +
 net/ipv6/route.c          |   27 +++++++++++++++++----------
 net/ipv6/xfrm6_policy.c   |    1 +
 8 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 344c8dd..be1ffc8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -124,6 +124,17 @@ static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
 		__dst_destroy_metrics_generic(dst, val);
 }
 
+static inline u32 *dst_metrics_ptr(const struct dst_entry *dst)
+{
+	return dst->ops->metrics(dst);
+}
+
+static inline u32 *dst_metrics_ptr_default(const struct dst_entry *dst)
+{
+	return DST_METRICS_PTR(dst);
+
+}
+
 static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
 {
 	unsigned long p = dst->_metrics;
@@ -151,21 +162,16 @@ static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_ent
 	u32 *dst_metrics = dst_metrics_write_ptr(dest);
 
 	if (dst_metrics) {
-		u32 *src_metrics = DST_METRICS_PTR(src);
+		u32 *src_metrics = dst_metrics_ptr(src);
 
 		memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
 	}
 }
 
-static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
-{
-	return DST_METRICS_PTR(dst);
-}
-
 static inline u32
 dst_metric_raw(const struct dst_entry *dst, const int metric)
 {
-	u32 *p = DST_METRICS_PTR(dst);
+	u32 *p = dst_metrics_ptr(dst);
 
 	return p[metric-1];
 }
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index e1c2ee0..2c7849b 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -19,6 +19,7 @@ struct dst_ops {
 	unsigned int		(*default_advmss)(const struct dst_entry *);
 	unsigned int		(*mtu)(const struct dst_entry *);
 	u32 *			(*cow_metrics)(struct dst_entry *, unsigned long);
+	u32 *			(*metrics)(const struct dst_entry *);
 	void			(*destroy)(struct dst_entry *);
 	void			(*ifdown)(struct dst_entry *,
 					  struct net_device *dev, int how);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8412247..74ba4bc 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -109,6 +109,11 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
 	return NULL;
 }
 
+static u32 *fake_metrics(const struct dst_entry *dst)
+{
+	return NULL;
+}
+
 static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 {
 	return NULL;
@@ -124,6 +129,7 @@ static struct dst_ops fake_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.update_pmtu =		fake_update_pmtu,
 	.cow_metrics =		fake_cow_metrics,
+	.metrics =		fake_metrics,
 	.neigh_lookup =		fake_neigh_lookup,
 	.mtu =			fake_mtu,
 };
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index f31ce72..d2e56b0 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -137,6 +137,7 @@ static struct dst_ops dn_dst_ops = {
 	.default_advmss =	dn_dst_default_advmss,
 	.mtu =			dn_dst_mtu,
 	.cow_metrics =		dst_cow_metrics_generic,
+	.metrics =		dst_metrics_ptr_default,
 	.destroy =		dn_dst_destroy,
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5099d35..dc22d6f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -155,6 +155,16 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 }
 
+static u32 *ipv4_metrics(const struct dst_entry *dst)
+{
+	const struct rtable *rt = (const struct rtable *) dst;
+
+	if (rt->peer && !inet_metrics_new(rt->peer))
+		return inetpeer_metrics(rt->peer);
+
+	return DST_METRICS_PTR(dst);
+}
+
 static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
 	struct rtable *rt = (struct rtable *) dst;
@@ -167,25 +177,10 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 	peer = rt->peer;
 	if (peer) {
 		u32 *old_p = __DST_METRICS_PTR(old);
-		unsigned long prev, new;
 
 		p = inetpeer_metrics(peer);
 		if (inet_metrics_new(peer))
 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
-
-		new = (unsigned long) p;
-		prev = cmpxchg(&dst->_metrics, old, new);
-
-		if (prev != old) {
-			p = __DST_METRICS_PTR(prev);
-			if (prev & DST_METRICS_READ_ONLY)
-				p = NULL;
-		} else {
-			if (rt->fi) {
-				fib_info_put(rt->fi);
-				rt->fi = NULL;
-			}
-		}
 	}
 	return p;
 }
@@ -200,6 +195,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.default_advmss =	ipv4_default_advmss,
 	.mtu =			ipv4_mtu,
 	.cow_metrics =		ipv4_cow_metrics,
+	.metrics =		ipv4_metrics,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
@@ -2883,6 +2879,11 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 	return NULL;
 }
 
+static u32 *ipv4_rt_blackhole_metrics(const struct dst_entry *dst)
+{
+	return NULL;
+}
+
 static struct dst_ops ipv4_dst_blackhole_ops = {
 	.family			=	AF_INET,
 	.protocol		=	cpu_to_be16(ETH_P_IP),
@@ -2892,6 +2893,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.default_advmss		=	ipv4_default_advmss,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
+	.metrics		=	ipv4_rt_blackhole_metrics,
 	.neigh_lookup		=	ipv4_neigh_lookup,
 };
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a0b4c5d..7a6ebfe 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -233,6 +233,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.cow_metrics =		dst_cow_metrics_generic,
+	.metrics =		dst_metrics_ptr_default,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea13565..d8b01c0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -88,6 +88,16 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_metrics(const struct dst_entry *dst)
+{
+	const struct rt6_info *rt = (const struct rt6_info *) dst;
+
+	if (rt->rt6i_peer && !inet_metrics_new(rt->rt6i_peer))
+		return inetpeer_metrics(rt->rt6i_peer);
+
+	return DST_METRICS_PTR(dst);
+}
+
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
 	struct rt6_info *rt = (struct rt6_info *) dst;
@@ -103,20 +113,10 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 	peer = rt->rt6i_peer;
 	if (peer) {
 		u32 *old_p = __DST_METRICS_PTR(old);
-		unsigned long prev, new;
 
 		p = inetpeer_metrics(peer);
 		if (inet_metrics_new(peer))
 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
-
-		new = (unsigned long) p;
-		prev = cmpxchg(&dst->_metrics, old, new);
-
-		if (prev != old) {
-			p = __DST_METRICS_PTR(prev);
-			if (prev & DST_METRICS_READ_ONLY)
-				p = NULL;
-		}
 	}
 	return p;
 }
@@ -151,6 +151,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.default_advmss		=	ip6_default_advmss,
 	.mtu			=	ip6_mtu,
 	.cow_metrics		=	ipv6_cow_metrics,
+	.metrics		=	ipv6_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -177,6 +178,11 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 	return NULL;
 }
 
+static u32 *ip6_rt_blackhole_metrics(const struct dst_entry *dst)
+{
+	return NULL;
+}
+
 static struct dst_ops ip6_dst_blackhole_ops = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -186,6 +192,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.default_advmss		=	ip6_default_advmss,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
+	.metrics		=	ip6_rt_blackhole_metrics,
 	.neigh_lookup		=	ip6_neigh_lookup,
 };
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8ea65e0..af4854c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -261,6 +261,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.cow_metrics =		dst_cow_metrics_generic,
+	.metrics =		dst_metrics_ptr_default,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/4] inetpeer: protect the inetpeerpeer metrics with rcu
  2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
  2012-02-02 10:12 ` [PATCH 1/4] inetpeer: Allocate the peer metrics dynamically Steffen Klassert
  2012-02-02 10:12 ` [PATCH 2/4] net: Unlink the inetpeer metrics from dst_entry Steffen Klassert
@ 2012-02-02 10:13 ` Steffen Klassert
  2012-02-02 10:14 ` [PATCH 4/4] route: Invalidate the peer metrics along with the routing cache Steffen Klassert
  2012-02-06 20:29 ` [PATCH 0/4] Fix routing metrics David Miller
  4 siblings, 0 replies; 22+ messages in thread
From: Steffen Klassert @ 2012-02-02 10:13 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

In order to be able to exchange the inetpeer metrics,
we add rcu protection to the struct inetpeer_metrics.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h      |   21 +++++++++++++++++----
 include/net/inetpeer.h |    8 ++++++--
 net/ipv4/inetpeer.c    |   26 ++++++++++++++++++++++++++
 net/ipv4/route.c       |    6 +++++-
 net/ipv6/route.c       |    6 +++++-
 5 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index be1ffc8..f1141d8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -159,21 +159,31 @@ static inline void dst_init_metrics(struct dst_entry *dst,
 
 static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
 {
-	u32 *dst_metrics = dst_metrics_write_ptr(dest);
+	u32 *dst_metrics;
+
+	rcu_read_lock();
+	dst_metrics = dst_metrics_write_ptr(dest);
 
 	if (dst_metrics) {
 		u32 *src_metrics = dst_metrics_ptr(src);
 
 		memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
 	}
+	rcu_read_unlock();
 }
 
 static inline u32
 dst_metric_raw(const struct dst_entry *dst, const int metric)
 {
-	u32 *p = dst_metrics_ptr(dst);
+	u32 *p;
+	u32 val;
+
+	rcu_read_lock();
+	p = dst_metrics_ptr(dst);
+	val = p[metric-1];
+	rcu_read_unlock();
 
-	return p[metric-1];
+	return val;
 }
 
 static inline u32
@@ -198,10 +208,13 @@ dst_metric_advmss(const struct dst_entry *dst)
 
 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
 {
-	u32 *p = dst_metrics_write_ptr(dst);
+	u32 *p;
 
+	rcu_read_lock();
+	p = dst_metrics_write_ptr(dst);
 	if (p)
 		p[metric-1] = val;
+	rcu_read_unlock();
 }
 
 static inline u32
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6bb8060..d97211a 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -28,6 +28,7 @@ struct inetpeer_addr {
 };
 
 struct inetpeer_metrics {
+	struct rcu_head	rcu;
 	u32		m[RTAX_MAX];
 };
 
@@ -37,7 +38,7 @@ struct inet_peer {
 	struct inetpeer_addr	daddr;
 	__u32			avl_height;
 
-	struct inetpeer_metrics *metrics;
+	struct inetpeer_metrics __rcu *metrics;
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	int			redirect_genid;
 	unsigned long		rate_last;
@@ -75,9 +76,11 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
 	return p->metrics->m[RTAX_LOCK-1] == INETPEER_METRICS_NEW;
 }
 
+/* called in rcu_read_lock() section */
 static inline u32 *inetpeer_metrics(const struct inet_peer *p)
 {
-	return p->metrics->m;
+	struct inetpeer_metrics *metrics = rcu_dereference(p->metrics);
+	return metrics->m;
 }
 
 /* can be called with or without local BH being disabled */
@@ -130,4 +133,5 @@ static inline int inet_getid(struct inet_peer *p, int more)
 	return new;
 }
 
+extern bool inetpeer_reset_metrics(struct inet_peer *p);
 #endif /* _NET_INETPEER_H */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 92071a4..2bf23a9 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -519,3 +519,29 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 	return rc;
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
+
+bool inetpeer_reset_metrics(struct inet_peer *p)
+{
+	struct inetpeer_metrics *old, *prev, *new;
+
+	if (inet_metrics_new(p))
+		return true;
+
+	new = kmalloc(sizeof(struct inetpeer_metrics), GFP_ATOMIC);
+	if (!new)
+		return false;
+
+	new->m[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+
+	old = p->metrics;
+
+	prev = cmpxchg(&p->metrics, old, new);
+	if (prev != old) {
+		kfree(new);
+		return false;
+	}
+
+	kfree_rcu(old, rcu);
+	return true;
+}
+EXPORT_SYMBOL(inetpeer_reset_metrics);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dc22d6f..1a88484 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2971,6 +2971,7 @@ static int rt_fill_info(struct net *net,
 	unsigned long expires = 0;
 	const struct inet_peer *peer = rt->peer;
 	u32 id = 0, ts = 0, tsage = 0, error;
+	int ret;
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
 	if (nlh == NULL)
@@ -3010,7 +3011,10 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+	rcu_read_lock();
+	ret = rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst));
+	rcu_read_unlock();
+	if (ret < 0)
 		goto nla_put_failure;
 
 	if (rt->rt_mark)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d8b01c0..5a1c256 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2378,6 +2378,7 @@ static int rt6_fill_node(struct net *net,
 	u32 table;
 	struct neighbour *n;
 	u32 ts, tsage;
+	int ret;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2463,7 +2464,10 @@ static int rt6_fill_node(struct net *net,
 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+	rcu_read_lock();
+	ret = rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst));
+	rcu_read_unlock();
+	if (ret < 0)
 		goto nla_put_failure;
 
 	rcu_read_lock();
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/4] route: Invalidate the peer metrics along with the routing cache
  2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
                   ` (2 preceding siblings ...)
  2012-02-02 10:13 ` [PATCH 3/4] inetpeer: protect the inetpeerpeer metrics with rcu Steffen Klassert
@ 2012-02-02 10:14 ` Steffen Klassert
  2012-02-06 20:29 ` [PATCH 0/4] Fix routing metrics David Miller
  4 siblings, 0 replies; 22+ messages in thread
From: Steffen Klassert @ 2012-02-02 10:14 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

We initialize the routing metrics with the cached values in
rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics. Therefore we
add a global peer_genid and a genid on the inetpeer. The metrics
on the inetpeer are valid as long as the global peer_genid and the
genid on the inetpeer are equal. The global peer_genid is incremented
by rt_cache_invalidate(). We reset the peer metrics with
inetpeer_reset_metrics() if the global peer_genid and the genid
on the inetpeer differ.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/inetpeer.h |    1 +
 include/net/ip.h       |    1 +
 net/ipv4/inetpeer.c    |    2 ++
 net/ipv4/route.c       |   20 ++++++++++++++++++--
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index d97211a..44dd34a 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@ struct inet_peer {
 	struct inetpeer_metrics __rcu *metrics;
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	int			redirect_genid;
+	int			genid;
 	unsigned long		rate_last;
 	unsigned long		pmtu_expires;
 	u32			pmtu_orig;
diff --git a/include/net/ip.h b/include/net/ip.h
index 775009f..80c6642 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -229,6 +229,7 @@ extern struct ctl_path net_ipv4_ctl_path[];
 extern int inet_peer_threshold;
 extern int inet_peer_minttl;
 extern int inet_peer_maxttl;
+extern int peer_genid;
 
 /* From ip_output.c */
 extern int sysctl_ip_dynaddr;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2bf23a9..2218b3c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -102,6 +102,7 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
+int peer_genid = 0;
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -457,6 +458,7 @@ relookup:
 		p->pmtu_expires = 0;
 		p->pmtu_orig = 0;
 		p->redirect_genid = 0;
+		p->genid = peer_genid;
 		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
 
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1a88484..4012e00 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -934,6 +934,7 @@ static void rt_cache_invalidate(struct net *net)
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 	redirect_genid++;
+	peer_genid++;
 }
 
 /*
@@ -1933,9 +1934,18 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	return mtu;
 }
 
+static void peer_reset_learned(struct inet_peer *peer)
+{
+	peer->pmtu_expires = 0;
+	if (inetpeer_reset_metrics(peer))
+		peer->genid = peer_genid;
+}
+
+/* called in rcu_read_lock() section */
 static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 			    struct fib_info *fi)
 {
+	u32 *peer_metrics;
 	struct inet_peer *peer;
 	int create = 0;
 
@@ -1947,11 +1957,17 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 
 	rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
 	if (peer) {
-		u32 *peer_metrics = inetpeer_metrics(peer);
+		if (peer->genid != peer_genid)
+			peer_reset_learned(peer);
+
+		peer_metrics = inetpeer_metrics(peer);
 		rt->rt_peer_genid = rt_peer_genid();
-		if (inet_metrics_new(peer))
+		if (inet_metrics_new(peer)) {
 			memcpy(peer_metrics, fi->fib_metrics,
 			       sizeof(u32) * RTAX_MAX);
+
+			peer->pmtu_orig = fi->fib_mtu;
+		}
 		dst_init_metrics(&rt->dst, peer_metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
                   ` (3 preceding siblings ...)
  2012-02-02 10:14 ` [PATCH 4/4] route: Invalidate the peer metrics along with the routing cache Steffen Klassert
@ 2012-02-06 20:29 ` David Miller
  2012-02-08  7:30   ` Steffen Klassert
  4 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-06 20:29 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 2 Feb 2012 11:11:41 +0100

> At the moment we initialize the routing metrics with the, on the inetpeer
> cached values in rt_init_metrics(). So if we have the metrics cached on the
> inetpeer, we ignore the user configured fib_metrics. This leads to the fact 
> that we can't configure the mtu, hoplimit etc. if we have learned metrics 
> cached. This patchset adds a possibility to invalidate and exchange the
> cached inetpeer metrics. In detail it does:
> 
> 1. Allocate the inetpeer metrics dynamically and add a reference
> to the inetpeer.
> 
> 2. Remove the direct reference of the inetpeer metrics from
> dst->_metrics and access the metrics via the inetpeer reference
> of the route. This makes it possible to exchange the inetpeer
> metrics if they get invalidated.
> 
> 3. Protect the inetpeer metrics with rcu.
> 
> 4. Add a peer_genid to invalidate the metrics. When the peer_genid
> is incremeted we allocate new metrics and exchange the metrics
> pointer of the inetpeer.
> 
> For the case that such a approach is acceptable, I have another patch
> to unify peer_genid and redirect_genid.

Thinking about this, it seems overkill to check this on every metric
access.

You have an opportunity to validate metrics when the peer is bound to
the route.

This is because any change to the FIB metrics, is in turn a change
to the FIB, which therefore invalidates the entire routing cache.

So you can be sure that a new route cache entry will be created, and
at that creation time you can ensure that we'll respect the updated
FIB metrics if encessary.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-06 20:29 ` [PATCH 0/4] Fix routing metrics David Miller
@ 2012-02-08  7:30   ` Steffen Klassert
  2012-02-08 20:18     ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-08  7:30 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Mon, Feb 06, 2012 at 03:29:16PM -0500, David Miller wrote:
> 
> Thinking about this, it seems overkill to check this on every metric
> access.
> 
> You have an opportunity to validate metrics when the peer is bound to
> the route.
> 
> This is because any change to the FIB metrics, is in turn a change
> to the FIB, which therefore invalidates the entire routing cache.
> 
> So you can be sure that a new route cache entry will be created, and
> at that creation time you can ensure that we'll respect the updated
> FIB metrics if encessary.

Not sure if I get you right here, but that's what this patchset
does. It invalidates the metrics on the peer by incrementing
peer_genid in rt_cache_invalidate() which is invoked on every FIB
change. Then, on slowpath route lookup it checks in rt_init_metrics()
whether the peer_genid changed. If it changed, it exchanges the
invalidated merics with new ones and copies the informations from
the FIB into it.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-08  7:30   ` Steffen Klassert
@ 2012-02-08 20:18     ` David Miller
  2012-02-09 12:44       ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-08 20:18 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 8 Feb 2012 08:30:37 +0100

> On Mon, Feb 06, 2012 at 03:29:16PM -0500, David Miller wrote:
>> 
>> Thinking about this, it seems overkill to check this on every metric
>> access.
>> 
>> You have an opportunity to validate metrics when the peer is bound to
>> the route.
>> 
>> This is because any change to the FIB metrics, is in turn a change
>> to the FIB, which therefore invalidates the entire routing cache.
>> 
>> So you can be sure that a new route cache entry will be created, and
>> at that creation time you can ensure that we'll respect the updated
>> FIB metrics if encessary.
> 
> Not sure if I get you right here, but that's what this patchset
> does. It invalidates the metrics on the peer by incrementing
> peer_genid in rt_cache_invalidate() which is invoked on every FIB
> change. Then, on slowpath route lookup it checks in rt_init_metrics()
> whether the peer_genid changed. If it changed, it exchanges the
> invalidated merics with new ones and copies the informations from
> the FIB into it.

If the routing cache is invalided, you'll "see" this updated inetpeer
because every single routing cache entry will get rebuilt.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-08 20:18     ` David Miller
@ 2012-02-09 12:44       ` Steffen Klassert
  2012-02-09 18:40         ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-09 12:44 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Wed, Feb 08, 2012 at 03:18:37PM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Wed, 8 Feb 2012 08:30:37 +0100
> 
> > On Mon, Feb 06, 2012 at 03:29:16PM -0500, David Miller wrote:
> >> 
> >> Thinking about this, it seems overkill to check this on every metric
> >> access.
> >> 
> >> You have an opportunity to validate metrics when the peer is bound to
> >> the route.
> >> 
> >> This is because any change to the FIB metrics, is in turn a change
> >> to the FIB, which therefore invalidates the entire routing cache.
> >> 
> >> So you can be sure that a new route cache entry will be created, and
> >> at that creation time you can ensure that we'll respect the updated
> >> FIB metrics if encessary.
> > 
> > Not sure if I get you right here, but that's what this patchset
> > does. It invalidates the metrics on the peer by incrementing
> > peer_genid in rt_cache_invalidate() which is invoked on every FIB
> > change. Then, on slowpath route lookup it checks in rt_init_metrics()
> > whether the peer_genid changed. If it changed, it exchanges the
> > invalidated merics with new ones and copies the informations from
> > the FIB into it.
> 
> If the routing cache is invalided, you'll "see" this updated inetpeer
> because every single routing cache entry will get rebuilt.

Hm, I still don't get your point. Could you specify this please?

When a route cache entry is created and the peer_genid does not match
the genid on the inetpeer, fresh inetpeer metrics are allocated  and
then published. After that, the new metrics are in proper state and
ready to use.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-09 12:44       ` Steffen Klassert
@ 2012-02-09 18:40         ` David Miller
  2012-02-10  6:50           ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-09 18:40 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 9 Feb 2012 13:44:11 +0100

> On Wed, Feb 08, 2012 at 03:18:37PM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> Date: Wed, 8 Feb 2012 08:30:37 +0100
>> 
>> > On Mon, Feb 06, 2012 at 03:29:16PM -0500, David Miller wrote:
>> >> 
>> >> Thinking about this, it seems overkill to check this on every metric
>> >> access.
>> >> 
>> >> You have an opportunity to validate metrics when the peer is bound to
>> >> the route.
>> >> 
>> >> This is because any change to the FIB metrics, is in turn a change
>> >> to the FIB, which therefore invalidates the entire routing cache.
>> >> 
>> >> So you can be sure that a new route cache entry will be created, and
>> >> at that creation time you can ensure that we'll respect the updated
>> >> FIB metrics if encessary.
>> > 
>> > Not sure if I get you right here, but that's what this patchset
>> > does. It invalidates the metrics on the peer by incrementing
>> > peer_genid in rt_cache_invalidate() which is invoked on every FIB
>> > change. Then, on slowpath route lookup it checks in rt_init_metrics()
>> > whether the peer_genid changed. If it changed, it exchanges the
>> > invalidated merics with new ones and copies the informations from
>> > the FIB into it.
>> 
>> If the routing cache is invalided, you'll "see" this updated inetpeer
>> because every single routing cache entry will get rebuilt.
> 
> Hm, I still don't get your point. Could you specify this please?
> 
> When a route cache entry is created and the peer_genid does not match
> the genid on the inetpeer, fresh inetpeer metrics are allocated  and
> then published. After that, the new metrics are in proper state and
> ready to use.

Right, which is exactly what you want to happen.

Checking on every metric access is therefore pointless and needless.

The peer_genid only increments when the routing cache is flushed,
therefore every subsequent access to the metrics will go through the
route cache entry creation path first, and therefore that will make
sure fresh inetpeer metrics will be allocated since the peer_genid
does not match.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-09 18:40         ` David Miller
@ 2012-02-10  6:50           ` Steffen Klassert
  2012-02-10  7:38             ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-10  6:50 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Thu, Feb 09, 2012 at 01:40:10PM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> > 
> > Hm, I still don't get your point. Could you specify this please?
> > 
> > When a route cache entry is created and the peer_genid does not match
> > the genid on the inetpeer, fresh inetpeer metrics are allocated  and
> > then published. After that, the new metrics are in proper state and
> > ready to use.
> 
> Right, which is exactly what you want to happen.
> 
> Checking on every metric access is therefore pointless and needless.
> 
> The peer_genid only increments when the routing cache is flushed,
> therefore every subsequent access to the metrics will go through the
> route cache entry creation path first, and therefore that will make
> sure fresh inetpeer metrics will be allocated since the peer_genid
> does not match.

I fully agree with you here. But we check for the genid just in
rt_init_metrics() which is invoked only on route cache entry
creation. There is no check when the metrics are accessed,
because the metrics on the inetpeer are valid on every access.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10  6:50           ` Steffen Klassert
@ 2012-02-10  7:38             ` David Miller
  2012-02-10  7:51               ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-10  7:38 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 10 Feb 2012 07:50:31 +0100

> On Thu, Feb 09, 2012 at 01:40:10PM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> > 
>> > Hm, I still don't get your point. Could you specify this please?
>> > 
>> > When a route cache entry is created and the peer_genid does not match
>> > the genid on the inetpeer, fresh inetpeer metrics are allocated  and
>> > then published. After that, the new metrics are in proper state and
>> > ready to use.
>> 
>> Right, which is exactly what you want to happen.
>> 
>> Checking on every metric access is therefore pointless and needless.
>> 
>> The peer_genid only increments when the routing cache is flushed,
>> therefore every subsequent access to the metrics will go through the
>> route cache entry creation path first, and therefore that will make
>> sure fresh inetpeer metrics will be allocated since the peer_genid
>> does not match.
> 
> I fully agree with you here. But we check for the genid just in
> rt_init_metrics() which is invoked only on route cache entry
> creation. There is no check when the metrics are accessed,
> because the metrics on the inetpeer are valid on every access.

Every routing cache entry we will use after the flush will be
a newly created one!  All the old ones will be stop being used.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10  7:38             ` David Miller
@ 2012-02-10  7:51               ` Steffen Klassert
  2012-02-10  8:12                 ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-10  7:51 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Fri, Feb 10, 2012 at 02:38:15AM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> > 
> > I fully agree with you here. But we check for the genid just in
> > rt_init_metrics() which is invoked only on route cache entry
> > creation. There is no check when the metrics are accessed,
> > because the metrics on the inetpeer are valid on every access.
> 
> Every routing cache entry we will use after the flush will be
> a newly created one!  All the old ones will be stop being used.

Yes, I know that. All inetpeer metrics are updated once with the
first new routing cache entry that binds that inetpeer to the route.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10  7:51               ` Steffen Klassert
@ 2012-02-10  8:12                 ` David Miller
  2012-02-10  8:44                   ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-10  8:12 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 10 Feb 2012 08:51:07 +0100

> On Fri, Feb 10, 2012 at 02:38:15AM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> > 
>> > I fully agree with you here. But we check for the genid just in
>> > rt_init_metrics() which is invoked only on route cache entry
>> > creation. There is no check when the metrics are accessed,
>> > because the metrics on the inetpeer are valid on every access.
>> 
>> Every routing cache entry we will use after the flush will be
>> a newly created one!  All the old ones will be stop being used.
> 
> Yes, I know that. All inetpeer metrics are updated once with the
> first new routing cache entry that binds that inetpeer to the route.

And after a routing cache flush, that will be all routing cache
entries every used after that point.

So since this happens, you don't need to check the inetpeer at every
metric access.  The fact that all routing cache entries get recreated
will do it for you.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10  8:12                 ` David Miller
@ 2012-02-10  8:44                   ` Steffen Klassert
  2012-02-10 18:25                     ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-10  8:44 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Fri, Feb 10, 2012 at 03:12:11AM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Fri, 10 Feb 2012 08:51:07 +0100
> 
> > On Fri, Feb 10, 2012 at 02:38:15AM -0500, David Miller wrote:
> >> From: Steffen Klassert <steffen.klassert@secunet.com>
> >> > 
> >> > I fully agree with you here. But we check for the genid just in
> >> > rt_init_metrics() which is invoked only on route cache entry
> >> > creation. There is no check when the metrics are accessed,
> >> > because the metrics on the inetpeer are valid on every access.
> >> 
> >> Every routing cache entry we will use after the flush will be
> >> a newly created one!  All the old ones will be stop being used.
> > 
> > Yes, I know that. All inetpeer metrics are updated once with the
> > first new routing cache entry that binds that inetpeer to the route.
> 
> And after a routing cache flush, that will be all routing cache
> entries every used after that point.

Yes, the genid check happens once for every new routing cache entry
we recreate. After that, we'll find maching routes in the cache
and we don't do that check.

> 
> So since this happens, you don't need to check the inetpeer at every
> metric access.  The fact that all routing cache entries get recreated
> will do it for you.

So if rt_init_metrics() is not the right place to check for genid
changes, where would you suggest to do it?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10  8:44                   ` Steffen Klassert
@ 2012-02-10 18:25                     ` David Miller
  2012-02-21  6:19                       ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-10 18:25 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 10 Feb 2012 09:44:25 +0100

> On Fri, Feb 10, 2012 at 03:12:11AM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> Date: Fri, 10 Feb 2012 08:51:07 +0100
>> 
>> > On Fri, Feb 10, 2012 at 02:38:15AM -0500, David Miller wrote:
>> >> From: Steffen Klassert <steffen.klassert@secunet.com>
>> >> > 
>> >> > I fully agree with you here. But we check for the genid just in
>> >> > rt_init_metrics() which is invoked only on route cache entry
>> >> > creation. There is no check when the metrics are accessed,
>> >> > because the metrics on the inetpeer are valid on every access.
>> >> 
>> >> Every routing cache entry we will use after the flush will be
>> >> a newly created one!  All the old ones will be stop being used.
>> > 
>> > Yes, I know that. All inetpeer metrics are updated once with the
>> > first new routing cache entry that binds that inetpeer to the route.
>> 
>> And after a routing cache flush, that will be all routing cache
>> entries every used after that point.
> 
> Yes, the genid check happens once for every new routing cache entry
> we recreate. After that, we'll find maching routes in the cache
> and we don't do that check.
> 
>> 
>> So since this happens, you don't need to check the inetpeer at every
>> metric access.  The fact that all routing cache entries get recreated
>> will do it for you.
> 
> So if rt_init_metrics() is not the right place to check for genid
> changes, where would you suggest to do it?

It is the right place, and since it will happen there for every routing
cache entry we use after a flush, the inetpeer issues will be taken
care of by it.  Therefore you don't need to check anything at metrics
access time.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-10 18:25                     ` David Miller
@ 2012-02-21  6:19                       ` Steffen Klassert
  2012-02-21  6:36                         ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-21  6:19 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Fri, Feb 10, 2012 at 01:25:57PM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Fri, 10 Feb 2012 09:44:25 +0100
> 
> > On Fri, Feb 10, 2012 at 03:12:11AM -0500, David Miller wrote:
> >> 
> >> So since this happens, you don't need to check the inetpeer at every
> >> metric access.  The fact that all routing cache entries get recreated
> >> will do it for you.
> > 
> > So if rt_init_metrics() is not the right place to check for genid
> > changes, where would you suggest to do it?
> 
> It is the right place, and since it will happen there for every routing
> cache entry we use after a flush, the inetpeer issues will be taken
> care of by it.  Therefore you don't need to check anything at metrics
> access time.

Ok, apparently I looked at the wrong place. The only checks at metrics
access that might be superfluous are the inet_metrics_new() checks in
ipv4_metrics() and ipv6_metrics(). If these are the checks you mean,
I'd remove them and resend the patchset.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-21  6:19                       ` Steffen Klassert
@ 2012-02-21  6:36                         ` David Miller
  2012-02-21  8:18                           ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-21  6:36 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 21 Feb 2012 07:19:22 +0100

> On Fri, Feb 10, 2012 at 01:25:57PM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> Date: Fri, 10 Feb 2012 09:44:25 +0100
>> 
>> > On Fri, Feb 10, 2012 at 03:12:11AM -0500, David Miller wrote:
>> >> 
>> >> So since this happens, you don't need to check the inetpeer at every
>> >> metric access.  The fact that all routing cache entries get recreated
>> >> will do it for you.
>> > 
>> > So if rt_init_metrics() is not the right place to check for genid
>> > changes, where would you suggest to do it?
>> 
>> It is the right place, and since it will happen there for every routing
>> cache entry we use after a flush, the inetpeer issues will be taken
>> care of by it.  Therefore you don't need to check anything at metrics
>> access time.
> 
> Ok, apparently I looked at the wrong place. The only checks at metrics
> access that might be superfluous are the inet_metrics_new() checks in
> ipv4_metrics() and ipv6_metrics(). If these are the checks you mean,
> I'd remove them and resend the patchset.

I'm saying you shouldn't need a metrics access callback nor the rest
of your patch at all.

Look, when routes change, the routing cache is flushed and the gen ID
is incremented.

Every single existing routing cache entry is therefore instantly
invalid, and will not be used another time.

Therefore every route usage afterwards will go through the routing
cache entry creation path.  And in this path the inetpeer and it's
metrics can be validated, out of date metrics freed and replaced with
new ones, etc.  Whatever might be necessary can all be done here.

There is no need to every have special handling at metric access time
on a valid routing cache entry, the fact that it's valid and it's gen
ID matches up, means there has been no route table changes meanwhile
and that the inetpeer metrics have been validated since the last route
table change.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-21  6:36                         ` David Miller
@ 2012-02-21  8:18                           ` Steffen Klassert
  2012-02-21 19:24                             ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-21  8:18 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Tue, Feb 21, 2012 at 01:36:23AM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> > 
> > Ok, apparently I looked at the wrong place. The only checks at metrics
> > access that might be superfluous are the inet_metrics_new() checks in
> > ipv4_metrics() and ipv6_metrics(). If these are the checks you mean,
> > I'd remove them and resend the patchset.
> 
> I'm saying you shouldn't need a metrics access callback nor the rest
> of your patch at all.

I need the dst->ops->metrics() method because I removed the direct
reference to the inetpeer metrics from the dst_entry. I had to
remove this direct reference to be able to free the old metrics
safely. A dst_entry with a direct reference to old metrics
could leave the rcu protected region and might then try to access
already freed metrics (i.e. if a dst_entry with old metrics is already
attached to a skb when the routing cache is flushed and the skb is queued
for asynchronous processing). With this patchset we access the interpeer
metrics via the inetpeer itself on every metrics access, so we ensure
the metrics are not freed in the meantime.

> 
> Look, when routes change, the routing cache is flushed and the gen ID
> is incremented.
> 
> Every single existing routing cache entry is therefore instantly
> invalid, and will not be used another time.
>

Yes. But the dst_entry that belongs to an old existing routing cache
entry could be already attached to a skb in the moment when the routing
cache is flushed. This could lead to an access of already freed metrics,
as mentioned above.

Maybe I'm a bit slow on the uptake, but I don't see what's superfluous.
Could you please point me to the superfluous code in the patchset?
 
> Therefore every route usage afterwards will go through the routing
> cache entry creation path.  And in this path the inetpeer and it's
> metrics can be validated, out of date metrics freed and replaced with
> new ones, etc.  Whatever might be necessary can all be done here.
> 
> There is no need to every have special handling at metric access time
> on a valid routing cache entry, the fact that it's valid and it's gen
> ID matches up, means there has been no route table changes meanwhile
> and that the inetpeer metrics have been validated since the last route
> table change.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-21  8:18                           ` Steffen Klassert
@ 2012-02-21 19:24                             ` David Miller
  2012-02-24  9:08                               ` Steffen Klassert
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2012-02-21 19:24 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 21 Feb 2012 09:18:27 +0100

> I need the dst->ops->metrics() method because I removed the direct
> reference to the inetpeer metrics from the dst_entry. I had to
> remove this direct reference to be able to free the old metrics
> safely. A dst_entry with a direct reference to old metrics
> could leave the rcu protected region and might then try to access
> already freed metrics (i.e. if a dst_entry with old metrics is already
> attached to a skb when the routing cache is flushed and the skb is queued
> for asynchronous processing). With this patchset we access the interpeer
> metrics via the inetpeer itself on every metrics access, so we ensure
> the metrics are not freed in the meantime.

Then a callback still seems like extreme overkill just to ensure the
RCU safety of metric pointer accesses.

It seems much simpler to me to just kill the inetpeer when we find out
we actually do need to change the metrics, instead of trying to change
the metric memory from underneath it.  Just make a new inetpeer and let
the old one with the old outdated metrics simply die off as the stray
references disappear.

Remove the old inetpeer from the tree (so it cannot be found in a
lookup), and then any dangling old, invalid, routing cache entries
referring to it will hold a reference count.  And once that final
reference drops, we'll know we can safely free the inetpeer up.  So
we'll use stale metrics for a while from these old invalid routing
cache entries, but that's perfectly fine.

The whole thing is about not doing something non-trivial in the fast
path, which is what your patch does.  Your approach causes us to incur
a cost every single metric access just because we "might" access stale
metrics from a old invalid routing cache entry.  This is not the
common case at all.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-21 19:24                             ` David Miller
@ 2012-02-24  9:08                               ` Steffen Klassert
  2012-02-24  9:13                                 ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Steffen Klassert @ 2012-02-24  9:08 UTC (permalink / raw)
  To: David Miller; +Cc: timo.teras, netdev

On Tue, Feb 21, 2012 at 02:24:55PM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Tue, 21 Feb 2012 09:18:27 +0100
> 
> > I need the dst->ops->metrics() method because I removed the direct
> > reference to the inetpeer metrics from the dst_entry. I had to
> > remove this direct reference to be able to free the old metrics
> > safely. A dst_entry with a direct reference to old metrics
> > could leave the rcu protected region and might then try to access
> > already freed metrics (i.e. if a dst_entry with old metrics is already
> > attached to a skb when the routing cache is flushed and the skb is queued
> > for asynchronous processing). With this patchset we access the interpeer
> > metrics via the inetpeer itself on every metrics access, so we ensure
> > the metrics are not freed in the meantime.
> 
> Then a callback still seems like extreme overkill just to ensure the
> RCU safety of metric pointer accesses.

Ok, so it is the approach that is problematic. I had real hard times
to find something that I can remove from the patchset without adding
a bug :-)

> 
> It seems much simpler to me to just kill the inetpeer when we find out
> we actually do need to change the metrics, instead of trying to change
> the metric memory from underneath it. 

Well, the inetpeer was intended to keep the long-living route independent
informations about the peer. With this approach I tried to keep these
long-living informations about the peer. If we don't want to keep these
informations, it is indeed better to remove the whole inetpeer.

> Just make a new inetpeer and let
> the old one with the old outdated metrics simply die off as the stray
> references disappear.
> 
> Remove the old inetpeer from the tree (so it cannot be found in a
> lookup), and then any dangling old, invalid, routing cache entries
> referring to it will hold a reference count.  And once that final
> reference drops, we'll know we can safely free the inetpeer up. 

Actually the whole tree is invalid in such cases. So instead of
replacing each single entry of the tree, we could just replace
the old tree with a fresh initialized inet_peer_base. The old
tree could be removed later with a delayed work queue.

When rt_cache_invalidate() is invoked, all we have to do is to
replace the root node with a peer_fake_node and to add the old root
node to a garbage collecting list. The old tree will be destroyed
with a work queue later.

We would not even need a genid and we could also get rid of the
redirect_genid handling.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Fix routing metrics
  2012-02-24  9:08                               ` Steffen Klassert
@ 2012-02-24  9:13                                 ` David Miller
  0 siblings, 0 replies; 22+ messages in thread
From: David Miller @ 2012-02-24  9:13 UTC (permalink / raw)
  To: steffen.klassert; +Cc: timo.teras, netdev

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 24 Feb 2012 10:08:38 +0100

> Actually the whole tree is invalid in such cases. So instead of
> replacing each single entry of the tree, we could just replace
> the old tree with a fresh initialized inet_peer_base. The old
> tree could be removed later with a delayed work queue.
> 
> When rt_cache_invalidate() is invoked, all we have to do is to
> replace the root node with a peer_fake_node and to add the old root
> node to a garbage collecting list. The old tree will be destroyed
> with a work queue later.
> 
> We would not even need a genid and we could also get rid of the
> redirect_genid handling.

It sounds like a great idea.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2012-02-24  9:15 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-02 10:11 [PATCH 0/4] Fix routing metrics Steffen Klassert
2012-02-02 10:12 ` [PATCH 1/4] inetpeer: Allocate the peer metrics dynamically Steffen Klassert
2012-02-02 10:12 ` [PATCH 2/4] net: Unlink the inetpeer metrics from dst_entry Steffen Klassert
2012-02-02 10:13 ` [PATCH 3/4] inetpeer: protect the inetpeerpeer metrics with rcu Steffen Klassert
2012-02-02 10:14 ` [PATCH 4/4] route: Invalidate the peer metrics along with the routing cache Steffen Klassert
2012-02-06 20:29 ` [PATCH 0/4] Fix routing metrics David Miller
2012-02-08  7:30   ` Steffen Klassert
2012-02-08 20:18     ` David Miller
2012-02-09 12:44       ` Steffen Klassert
2012-02-09 18:40         ` David Miller
2012-02-10  6:50           ` Steffen Klassert
2012-02-10  7:38             ` David Miller
2012-02-10  7:51               ` Steffen Klassert
2012-02-10  8:12                 ` David Miller
2012-02-10  8:44                   ` Steffen Klassert
2012-02-10 18:25                     ` David Miller
2012-02-21  6:19                       ` Steffen Klassert
2012-02-21  6:36                         ` David Miller
2012-02-21  8:18                           ` Steffen Klassert
2012-02-21 19:24                             ` David Miller
2012-02-24  9:08                               ` Steffen Klassert
2012-02-24  9:13                                 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.