* [PATCH 1/2] ipv4: Cache routes in nexthop exception entries.
@ 2012-07-31 1:23 David Miller
0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2012-07-31 1:23 UTC (permalink / raw)
To: eric.dumazet; +Cc: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/ip_fib.h | 1 +
net/ipv4/fib_semantics.c | 4 +++
net/ipv4/route.c | 82 ++++++++++++++++++++++++++----------------------
3 files changed, 49 insertions(+), 38 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e69c3a4..c4770fc 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -54,6 +54,7 @@ struct fib_nh_exception {
u32 fnhe_pmtu;
__be32 fnhe_gw;
unsigned long fnhe_expires;
+ struct rtable *fnhe_rth;
unsigned long fnhe_stamp;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e55171f..eaccdb5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -153,6 +153,10 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+ if (fnhe->fnhe_rth)
+ dst_release(&fnhe->fnhe_rth->dst);
+
kfree(fnhe);
fnhe = next;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6eabcf..e2abb0d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -587,7 +587,7 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
@@ -599,6 +599,10 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ if (oldest->fnhe_rth) {
+ dst_release(&oldest->fnhe_rth->dst);
+ oldest->fnhe_rth = NULL;
+ }
return oldest;
}
@@ -620,7 +624,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +671,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -1167,36 +1171,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ spin_lock_bh(&fnhe_lock);
+
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+
+ orig = fnhe->fnhe_rth;
+ if (orig)
+ dst_release(&orig->dst);
+
+ rt->dst.flags |= DST_RCU_FREE;
+ dst_hold(&rt->dst);
+ fnhe->fnhe_rth = rt;
+
+ fnhe->fnhe_stamp = jiffies;
}
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
+
+ spin_unlock_bh(&fnhe_lock);
}
static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
@@ -1236,13 +1241,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_NOCACHE))
+ if (unlikely(fnhe))
+ rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
rt_cache_route(nh, rt);
}
@@ -1741,18 +1746,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe) {
+ if (fnhe)
+ rth = fnhe->fnhe_rth;
+ else
rth = FIB_RES_NH(*res).nh_rth_output;
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
--
1.7.11.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 1/2] ipv4: Cache routes in nexthop exception entries.
@ 2012-07-31 22:20 David Miller
0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2012-07-31 22:20 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
This is just a respin of what I posted the other day, against Eric's
fixes of today. Pushed to 'net'.
include/net/ip_fib.h | 1 +
net/ipv4/fib_semantics.c | 39 ++++++++++--------
net/ipv4/route.c | 103 +++++++++++++++++++++++++---------------------
3 files changed, 79 insertions(+), 64 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e331746..926142e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -55,6 +55,7 @@ struct fib_nh_exception {
u32 fnhe_pmtu;
__be32 fnhe_gw;
unsigned long fnhe_expires;
+ struct rtable __rcu *fnhe_rth;
unsigned long fnhe_stamp;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index fe2ca02..da80dc1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
},
};
+static void rt_fibinfo_free(struct rtable __rcu **rtp)
+{
+ struct rtable *rt = rcu_dereference_protected(*rtp, 1);
+
+ if (!rt)
+ return;
+
+ /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
+ * because we waited an RCU grace period before calling
+ * free_fib_info_rcu()
+ */
+
+ dst_free(&rt->dst);
+}
+
static void free_nh_exceptions(struct fib_nh *nh)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+ rt_fibinfo_free(&fnhe->fnhe_rth);
+
kfree(fnhe);
fnhe = next;
@@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
kfree(hash);
}
-static void rt_nexthop_free(struct rtable __rcu **rtp)
-{
- struct rtable *rt = rcu_dereference_protected(*rtp, 1);
-
- if (!rt)
- return;
-
- /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
- * because we waited an RCU grace period before calling
- * free_fib_info_rcu()
- */
-
- dst_free(&rt->dst);
-}
-
-static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp)
+static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
{
int cpu;
@@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
- rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output);
- rt_nexthop_free(&nexthop_nh->nh_rth_input);
+ rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
+ rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4f6276c..b102eeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
+ struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ orig = rcu_dereference(oldest->fnhe_rth);
+ if (orig) {
+ RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+ rt_free(orig);
+ }
return oldest;
}
@@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ spin_lock_bh(&fnhe_lock);
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
+ }
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
}
- }
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
-}
-static inline void rt_free(struct rtable *rt)
-{
- call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+ orig = rcu_dereference(fnhe->fnhe_rth);
+ rcu_assign_pointer(fnhe->fnhe_rth, rt);
+ if (orig)
+ rt_free(orig);
+
+ fnhe->fnhe_stamp = jiffies;
+ } else {
+ /* Routes we intend to cache in nexthop exception have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
+ }
+ spin_unlock_bh(&fnhe_lock);
}
static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
@@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_NOCACHE))
+ if (unlikely(fnhe))
+ rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
rt_cache_route(nh, rt);
}
@@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
- fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) {
- struct rtable __rcu **prth;
+ struct rtable __rcu **prth;
+ fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ if (fnhe)
+ prth = &fnhe->fnhe_rth;
+ else
prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
- rth = rcu_dereference(*prth);
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ rth = rcu_dereference(*prth);
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-07-31 22:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-31 1:23 [PATCH 1/2] ipv4: Cache routes in nexthop exception entries David Miller
2012-07-31 22:20 David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).