netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 1/3] net: Refactor rtable initialization
@ 2015-09-02 16:40 David Ahern
  2015-09-02 16:40 ` [PATCH net-next 2/3] net: Add FIB table id to rtable David Ahern
  2015-09-02 16:40 ` [PATCH net-next 3/3] net: Add table id from route lookup to route response David Ahern
  0 siblings, 2 replies; 16+ messages in thread
From: David Ahern @ 2015-09-02 16:40 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

All callers to rt_dst_alloc have nearly the same initialization following
a successful allocation. Consolidate it into rt_dst_alloc.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv4/route.c | 85 ++++++++++++++++++++++----------------------------------
 1 file changed, 33 insertions(+), 52 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5f4a5565ad8b..eaefeadce07c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1438,12 +1438,33 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 }
 
 static struct rtable *rt_dst_alloc(struct net_device *dev,
+				   unsigned int flags, u16 type,
 				   bool nopolicy, bool noxfrm, bool will_cache)
 {
-	return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
-			 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
-			 (nopolicy ? DST_NOPOLICY : 0) |
-			 (noxfrm ? DST_NOXFRM : 0));
+	struct rtable *rt;
+
+	rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+		       (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
+		       (nopolicy ? DST_NOPOLICY : 0) |
+		       (noxfrm ? DST_NOXFRM : 0));
+
+	if (rt) {
+		rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+		rt->rt_flags = flags;
+		rt->rt_type = type;
+		rt->rt_is_input = 0;
+		rt->rt_iif = 0;
+		rt->rt_pmtu = 0;
+		rt->rt_gateway = 0;
+		rt->rt_uses_gateway = 0;
+		INIT_LIST_HEAD(&rt->rt_uncached);
+
+		rt->dst.output = ip_output;
+		if (flags & RTCF_LOCAL)
+			rt->dst.input = ip_local_deliver;
+	}
+
+	return rt;
 }
 
 /* called in rcu_read_lock() section */
@@ -1452,6 +1473,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	struct rtable *rth;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	unsigned int flags = RTCF_MULTICAST;
 	u32 itag = 0;
 	int err;
 
@@ -1477,7 +1499,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (err < 0)
 			goto e_err;
 	}
-	rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
+	if (our)
+		flags |= RTCF_LOCAL;
+
+	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
 	if (!rth)
 		goto e_nobufs;
@@ -1486,20 +1511,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->dst.tclassid = itag;
 #endif
 	rth->dst.output = ip_rt_bug;
-
-	rth->rt_genid	= rt_genid_ipv4(dev_net(dev));
-	rth->rt_flags	= RTCF_MULTICAST;
-	rth->rt_type	= RTN_MULTICAST;
 	rth->rt_is_input= 1;
-	rth->rt_iif	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
-	if (our) {
-		rth->dst.input= ip_local_deliver;
-		rth->rt_flags |= RTCF_LOCAL;
-	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -1608,7 +1620,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	rth = rt_dst_alloc(out_dev->dev,
+	rth = rt_dst_alloc(out_dev->dev, 0, res->type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
 			   IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
 	if (!rth) {
@@ -1616,19 +1628,10 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
-	rth->rt_flags = 0;
-	rth->rt_type = res->type;
 	rth->rt_is_input = 1;
-	rth->rt_iif 	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
-	rth->dst.output = ip_output;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
 	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
@@ -1795,26 +1798,16 @@ out:	return err;
 		}
 	}
 
-	rth = rt_dst_alloc(net->loopback_dev,
+	rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
 
-	rth->dst.input= ip_local_deliver;
 	rth->dst.output= ip_rt_bug;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
-
-	rth->rt_genid = rt_genid_ipv4(net);
-	rth->rt_flags 	= flags|RTCF_LOCAL;
-	rth->rt_type	= res.type;
 	rth->rt_is_input = 1;
-	rth->rt_iif	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res.type == RTN_UNREACHABLE) {
@@ -1987,28 +1980,16 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	}
 
 add:
-	rth = rt_dst_alloc(dev_out,
+	rth = rt_dst_alloc(dev_out, flags, type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
 			   IN_DEV_CONF_GET(in_dev, NOXFRM),
 			   do_cache);
 	if (!rth)
 		return ERR_PTR(-ENOBUFS);
 
-	rth->dst.output = ip_output;
-
-	rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
-	rth->rt_flags	= flags;
-	rth->rt_type	= type;
-	rth->rt_is_input = 0;
 	rth->rt_iif	= orig_oif ? : 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway = 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
 	RT_CACHE_STAT_INC(out_slow_tot);
 
-	if (flags & RTCF_LOCAL)
-		rth->dst.input = ip_local_deliver;
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 2/3] net: Add FIB table id to rtable
  2015-09-02 16:40 [PATCH net-next 1/3] net: Refactor rtable initialization David Ahern
@ 2015-09-02 16:40 ` David Ahern
  2015-09-02 16:40 ` [PATCH net-next 3/3] net: Add table id from route lookup to route response David Ahern
  1 sibling, 0 replies; 16+ messages in thread
From: David Ahern @ 2015-09-02 16:40 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

Add the FIB table id to rtable to make the information available for
IPv4 as it is for IPv6.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 drivers/net/vrf.c       | 2 ++
 include/net/route.h     | 2 ++
 net/ipv4/route.c        | 8 ++++++++
 net/ipv4/xfrm4_policy.c | 1 +
 4 files changed, 13 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index e7094fbd7568..8c9ab5ebea23 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -320,6 +320,7 @@ static void vrf_rtable_destroy(struct net_vrf *vrf)
 
 static struct rtable *vrf_rtable_create(struct net_device *dev)
 {
+	struct net_vrf *vrf = netdev_priv(dev);
 	struct rtable *rth;
 
 	rth = dst_alloc(&vrf_dst_ops, dev, 2,
@@ -335,6 +336,7 @@ static struct rtable *vrf_rtable_create(struct net_device *dev)
 		rth->rt_pmtu	= 0;
 		rth->rt_gateway	= 0;
 		rth->rt_uses_gateway = 0;
+		rth->rt_table_id = vrf->tb_id;
 		INIT_LIST_HEAD(&rth->rt_uncached);
 		rth->rt_uncached_list = NULL;
 	}
diff --git a/include/net/route.h b/include/net/route.h
index cc61cb95f059..10a7d21a211c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -64,6 +64,8 @@ struct rtable {
 	/* Miscellaneous cached information */
 	u32			rt_pmtu;
 
+	u32			rt_table_id;
+
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
 };
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index eaefeadce07c..92acc95b7578 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1457,6 +1457,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_pmtu = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
+		rt->rt_table_id = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1629,6 +1630,8 @@ static int __mkroute_input(struct sk_buff *skb,
 	}
 
 	rth->rt_is_input = 1;
+	if (res->table)
+		rth->rt_table_id = res->table->tb_id;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
@@ -1808,6 +1811,8 @@ out:	return err;
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_is_input = 1;
+	if (res.table)
+		rth->rt_table_id = res.table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res.type == RTN_UNREACHABLE) {
@@ -1988,6 +1993,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		return ERR_PTR(-ENOBUFS);
 
 	rth->rt_iif	= orig_oif ? : 0;
+	if (res->table)
+		rth->rt_table_id = res->table->tb_id;
+
 	RT_CACHE_STAT_INC(out_slow_tot);
 
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index bb919b28619f..671011055ad5 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -95,6 +95,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
 	return 0;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 16:40 [PATCH net-next 1/3] net: Refactor rtable initialization David Ahern
  2015-09-02 16:40 ` [PATCH net-next 2/3] net: Add FIB table id to rtable David Ahern
@ 2015-09-02 16:40 ` David Ahern
  2015-09-02 18:43   ` Thomas Graf
  1 sibling, 1 reply; 16+ messages in thread
From: David Ahern @ 2015-09-02 16:40 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

rt_fill_info which is called for 'route get' requests hardcodes the
table id as RT_TABLE_MAIN which is not correct when multiple tables
are used. Use the newly added table id in the rtable to send back
the correct table.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92acc95b7578..2738bf4132db 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2325,8 +2325,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= fl4->flowi4_tos;
-	r->rtm_table	= RT_TABLE_MAIN;
-	if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+	r->rtm_table	= rt->rt_table_id;
+	if (nla_put_u32(skb, RTA_TABLE, rt->rt_table_id))
 		goto nla_put_failure;
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 16:40 ` [PATCH net-next 3/3] net: Add table id from route lookup to route response David Ahern
@ 2015-09-02 18:43   ` Thomas Graf
  2015-09-02 18:49     ` David Miller
  0 siblings, 1 reply; 16+ messages in thread
From: Thomas Graf @ 2015-09-02 18:43 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev

On 09/02/15 at 09:40am, David Ahern wrote:
> rt_fill_info which is called for 'route get' requests hardcodes the
> table id as RT_TABLE_MAIN which is not correct when multiple tables
> are used. Use the newly added table id in the rtable to send back
> the correct table.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

What RTM_GETROUTE returns is not the actual route but a description
of the routing decision which is why table id, scope, protocol, and
prefix length are hardcoded. This is indicated by the RTM_F_CLONED
flag. What you propose would break userspace ABI.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 18:43   ` Thomas Graf
@ 2015-09-02 18:49     ` David Miller
  2015-09-02 18:51       ` David Ahern
  0 siblings, 1 reply; 16+ messages in thread
From: David Miller @ 2015-09-02 18:49 UTC (permalink / raw)
  To: tgraf; +Cc: dsa, netdev

From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 2 Sep 2015 20:43:46 +0200

> On 09/02/15 at 09:40am, David Ahern wrote:
>> rt_fill_info which is called for 'route get' requests hardcodes the
>> table id as RT_TABLE_MAIN which is not correct when multiple tables
>> are used. Use the newly added table id in the rtable to send back
>> the correct table.
>> 
>> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> 
> What RTM_GETROUTE returns is not the actual route but a description
> of the routing decision which is why table id, scope, protocol, and
> prefix length are hardcoded. This is indicated by the RTM_F_CLONED
> flag. What you propose would break userspace ABI.

Agreed, I don't think we can do this.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 18:49     ` David Miller
@ 2015-09-02 18:51       ` David Ahern
  2015-09-02 19:08         ` Thomas Graf
  0 siblings, 1 reply; 16+ messages in thread
From: David Ahern @ 2015-09-02 18:51 UTC (permalink / raw)
  To: David Miller, tgraf; +Cc: netdev

On 9/2/15 12:49 PM, David Miller wrote:
> From: Thomas Graf <tgraf@suug.ch>
> Date: Wed, 2 Sep 2015 20:43:46 +0200
>
>> On 09/02/15 at 09:40am, David Ahern wrote:
>>> rt_fill_info which is called for 'route get' requests hardcodes the
>>> table id as RT_TABLE_MAIN which is not correct when multiple tables
>>> are used. Use the newly added table id in the rtable to send back
>>> the correct table.
>>>
>>> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
>>
>> What RTM_GETROUTE returns is not the actual route but a description
>> of the routing decision which is why table id, scope, protocol, and
>> prefix length are hardcoded. This is indicated by the RTM_F_CLONED
>> flag. What you propose would break userspace ABI.
>
> Agreed, I don't think we can do this.
>

Doesn't the table used to come up with the decision matter for IPv4? 
ie., hardcoding to MAIN is misleading when there is absolutely no way 
the decision comes from that table. IPv6 already returns the table id.

Or is your response that it breaks ABI and hence not going to fix.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 18:51       ` David Ahern
@ 2015-09-02 19:08         ` Thomas Graf
  2015-09-02 19:43           ` Andy Gospodarek
  2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
  0 siblings, 2 replies; 16+ messages in thread
From: Thomas Graf @ 2015-09-02 19:08 UTC (permalink / raw)
  To: David Ahern; +Cc: David Miller, netdev

On 09/02/15 at 12:51pm, David Ahern wrote:
> On 9/2/15 12:49 PM, David Miller wrote:
> >From: Thomas Graf <tgraf@suug.ch>
> >Date: Wed, 2 Sep 2015 20:43:46 +0200
> >
> >>On 09/02/15 at 09:40am, David Ahern wrote:
> >>>rt_fill_info which is called for 'route get' requests hardcodes the
> >>>table id as RT_TABLE_MAIN which is not correct when multiple tables
> >>>are used. Use the newly added table id in the rtable to send back
> >>>the correct table.
> >>>
> >>>Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> >>
> >>What RTM_GETROUTE returns is not the actual route but a description
> >>of the routing decision which is why table id, scope, protocol, and
> >>prefix length are hardcoded. This is indicated by the RTM_F_CLONED
> >>flag. What you propose would break userspace ABI.
> >
> >Agreed, I don't think we can do this.
> >
> 
> Doesn't the table used to come up with the decision matter for IPv4? ie.,
> hardcoding to MAIN is misleading when there is absolutely no way the
> decision comes from that table. IPv6 already returns the table id.
> 
> Or is your response that it breaks ABI and hence not going to fix.

This behaviour comes back from when we still had the IPv4 routing cache
which was flat. I'm not against exposing the table id but you have
to use a new attribute for it.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 19:08         ` Thomas Graf
@ 2015-09-02 19:43           ` Andy Gospodarek
  2015-09-02 20:18             ` Thomas Graf
  2015-09-02 22:33             ` David Miller
  2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
  1 sibling, 2 replies; 16+ messages in thread
From: Andy Gospodarek @ 2015-09-02 19:43 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David Ahern, David Miller, netdev

On Wed, Sep 02, 2015 at 09:08:36PM +0200, Thomas Graf wrote:
> On 09/02/15 at 12:51pm, David Ahern wrote:
> > On 9/2/15 12:49 PM, David Miller wrote:
> > >From: Thomas Graf <tgraf@suug.ch>
> > >Date: Wed, 2 Sep 2015 20:43:46 +0200
> > >
> > >>On 09/02/15 at 09:40am, David Ahern wrote:
> > >>>rt_fill_info which is called for 'route get' requests hardcodes the
> > >>>table id as RT_TABLE_MAIN which is not correct when multiple tables
> > >>>are used. Use the newly added table id in the rtable to send back
> > >>>the correct table.
> > >>>
> > >>>Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> > >>
> > >>What RTM_GETROUTE returns is not the actual route but a description
> > >>of the routing decision which is why table id, scope, protocol, and
> > >>prefix length are hardcoded. This is indicated by the RTM_F_CLONED
> > >>flag. What you propose would break userspace ABI.
> > >
> > >Agreed, I don't think we can do this.
> > >
> > 
> > Doesn't the table used to come up with the decision matter for IPv4? ie.,
> > hardcoding to MAIN is misleading when there is absolutely no way the
> > decision comes from that table. IPv6 already returns the table id.
> > 
> > Or is your response that it breaks ABI and hence not going to fix.
> 
> This behaviour comes back from when we still had the IPv4 routing cache
> which was flat.

So before the routing cache was removed, was the response always
RTA_TABLE_MAIN since there was no way to indicate which table may have
route if it came from the cache?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 19:08         ` Thomas Graf
  2015-09-02 19:43           ` Andy Gospodarek
@ 2015-09-02 20:16           ` David Ahern
  2015-09-02 20:23             ` Thomas Graf
                               ` (2 more replies)
  1 sibling, 3 replies; 16+ messages in thread
From: David Ahern @ 2015-09-02 20:16 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

IPv4 ABI has the table hardcoded as RT_TABLE_MAIN regardless of the table
hit for the route lookup. Add the table using a new attribute,
RTA_TABLE_LOOKUP, to maintain the ABI yet return the right table id.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---

Thomas: Something like this?

The current ABI is returning wrong data in some cases; that seems worse
to me than breaking the ABI.

 include/uapi/linux/rtnetlink.h | 1 +
 net/ipv4/route.c               | 5 +++++
 net/ipv6/route.c               | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 702024769c74..5add1468350a 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -310,6 +310,7 @@ enum rtattr_type_t {
 	RTA_PREF,
 	RTA_ENCAP_TYPE,
 	RTA_ENCAP,
+	RTA_TABLE_LOOKUP,  /* table hit for fib lookup */
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92acc95b7578..95454c368e66 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2328,6 +2328,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	r->rtm_table	= RT_TABLE_MAIN;
 	if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
 		goto nla_put_failure;
+
+	if (rt->rt_table_id && rt->rt_table_id != RT_TABLE_MAIN &&
+	    nla_put_u32(skb, RTA_TABLE_LOOKUP, rt->rt_table_id))
+		goto nla_put_failure;
+
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
 	r->rtm_protocol = RTPROT_UNSPEC;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f45cac6f8356..3c5d3a50bb7b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2922,6 +2922,10 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_table = table;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
+
+	if (table && nla_put_u32(skb, RTA_TABLE_LOOKUP, table))
+		goto nla_put_failure;
+
 	if (rt->rt6i_flags & RTF_REJECT) {
 		switch (rt->dst.error) {
 		case -EINVAL:
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 19:43           ` Andy Gospodarek
@ 2015-09-02 20:18             ` Thomas Graf
  2015-09-02 22:33             ` David Miller
  1 sibling, 0 replies; 16+ messages in thread
From: Thomas Graf @ 2015-09-02 20:18 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: David Ahern, David Miller, netdev

On 09/02/15 at 03:43pm, Andy Gospodarek wrote:
> On Wed, Sep 02, 2015 at 09:08:36PM +0200, Thomas Graf wrote:
> > This behaviour comes back from when we still had the IPv4 routing cache
> > which was flat.
> 
> So before the routing cache was removed, was the response always
> RTA_TABLE_MAIN since there was no way to indicate which table may have
> route if it came from the cache?

Yes, from that perspective, get and list are very different in
behaviour. Again, I'm not against including this information
but we can't break compatibility.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
@ 2015-09-02 20:23             ` Thomas Graf
  2015-09-02 20:34               ` David Ahern
  2015-09-02 20:41             ` Alexander Duyck
  2015-09-02 20:53             ` Stephen Hemminger
  2 siblings, 1 reply; 16+ messages in thread
From: Thomas Graf @ 2015-09-02 20:23 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev

On 09/02/15 at 01:16pm, David Ahern wrote:
> IPv4 ABI has the table hardcoded as RT_TABLE_MAIN regardless of the table
> hit for the route lookup. Add the table using a new attribute,
> RTA_TABLE_LOOKUP, to maintain the ABI yet return the right table id.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> ---
> 
> Thomas: Something like this?
> 
> The current ABI is returning wrong data in some cases; that seems worse
> to me than breaking the ABI.

Another option is to introduce a new flag bundled with RTM_GETROUTE
which fixes RTM_GETROUTE altogether and makes it return the actual
route instead of a simulated cache entry.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 20:23             ` Thomas Graf
@ 2015-09-02 20:34               ` David Ahern
  0 siblings, 0 replies; 16+ messages in thread
From: David Ahern @ 2015-09-02 20:34 UTC (permalink / raw)
  To: Thomas Graf; +Cc: netdev

On 9/2/15 2:23 PM, Thomas Graf wrote:
> On 09/02/15 at 01:16pm, David Ahern wrote:
>> IPv4 ABI has the table hardcoded as RT_TABLE_MAIN regardless of the table
>> hit for the route lookup. Add the table using a new attribute,
>> RTA_TABLE_LOOKUP, to maintain the ABI yet return the right table id.
>>
>> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
>> ---
>>
>> Thomas: Something like this?
>>
>> The current ABI is returning wrong data in some cases; that seems worse
>> to me than breaking the ABI.
>
> Another option is to introduce a new flag bundled with RTM_GETROUTE
> which fixes RTM_GETROUTE altogether and makes it return the actual
> route instead of a simulated cache entry.
>

I like that better; it least then information is not duplicated. Thanks 
for the suggestion.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
  2015-09-02 20:23             ` Thomas Graf
@ 2015-09-02 20:41             ` Alexander Duyck
  2015-09-02 20:47               ` David Ahern
  2015-09-02 20:53             ` Stephen Hemminger
  2 siblings, 1 reply; 16+ messages in thread
From: Alexander Duyck @ 2015-09-02 20:41 UTC (permalink / raw)
  To: David Ahern, netdev

On 09/02/2015 01:16 PM, David Ahern wrote:
> IPv4 ABI has the table hardcoded as RT_TABLE_MAIN regardless of the table
> hit for the route lookup. Add the table using a new attribute,
> RTA_TABLE_LOOKUP, to maintain the ABI yet return the right table id.
>
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> ---
>
> Thomas: Something like this?
>
> The current ABI is returning wrong data in some cases; that seems worse
> to me than breaking the ABI.
>
>   include/uapi/linux/rtnetlink.h | 1 +
>   net/ipv4/route.c               | 5 +++++
>   net/ipv6/route.c               | 4 ++++
>   3 files changed, 10 insertions(+)
>
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 702024769c74..5add1468350a 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -310,6 +310,7 @@ enum rtattr_type_t {
>   	RTA_PREF,
>   	RTA_ENCAP_TYPE,
>   	RTA_ENCAP,
> +	RTA_TABLE_LOOKUP,  /* table hit for fib lookup */
>   	__RTA_MAX
>   };
>
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 92acc95b7578..95454c368e66 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2328,6 +2328,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
>   	r->rtm_table	= RT_TABLE_MAIN;
>   	if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
>   		goto nla_put_failure;
> +
> +	if (rt->rt_table_id && rt->rt_table_id != RT_TABLE_MAIN &&
> +	    nla_put_u32(skb, RTA_TABLE_LOOKUP, rt->rt_table_id))
> +		goto nla_put_failure;
> +
>   	r->rtm_type	= rt->rt_type;
>   	r->rtm_scope	= RT_SCOPE_UNIVERSE;
>   	r->rtm_protocol = RTPROT_UNSPEC;

Why not implement this this same for IPv4 and IPv6?  It looks like it is 
only included if it is non-zer and not MAIN in the above case, and then 
below as long as a table ID is non-zero you are setting the value.  Why 
not just include the value in all cases where it is defined just like 
for IPv6?

> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index f45cac6f8356..3c5d3a50bb7b 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -2922,6 +2922,10 @@ static int rt6_fill_node(struct net *net,
>   	rtm->rtm_table = table;
>   	if (nla_put_u32(skb, RTA_TABLE, table))
>   		goto nla_put_failure;
> +
> +	if (table && nla_put_u32(skb, RTA_TABLE_LOOKUP, table))
> +		goto nla_put_failure;
> +
>   	if (rt->rt6i_flags & RTF_REJECT) {
>   		switch (rt->dst.error) {
>   		case -EINVAL:
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 20:41             ` Alexander Duyck
@ 2015-09-02 20:47               ` David Ahern
  0 siblings, 0 replies; 16+ messages in thread
From: David Ahern @ 2015-09-02 20:47 UTC (permalink / raw)
  To: Alexander Duyck, netdev

On 9/2/15 2:41 PM, Alexander Duyck wrote:
>
> Why not implement this this same for IPv4 and IPv6?  It looks like it is
> only included if it is non-zer and not MAIN in the above case, and then
> below as long as a table ID is non-zero you are setting the value.  Why
> not just include the value in all cases where it is defined just like
> for IPv6?
>

I like Thomas' suggestion to add an rtm_flag better. We only need to fix 
IPv4 which hardcodes the tableid. Adding a flag, e.g.,

+#define RTM_F_LOOKUP_TABLE     0x1000  /* set rtm_table to FIB lookup 
result */

signifies the caller wants the real table. When set rt_fill_info sets 
rtm_table to the actual table id. This allows updated tools to work 
properly for both ipv4 and ipv6 and without breaking existing userspace.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v2] net: Add table id from route lookup to route response
  2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
  2015-09-02 20:23             ` Thomas Graf
  2015-09-02 20:41             ` Alexander Duyck
@ 2015-09-02 20:53             ` Stephen Hemminger
  2 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2015-09-02 20:53 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev

On Wed,  2 Sep 2015 13:16:20 -0700
David Ahern <dsa@cumulusnetworks.com> wrote:

> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 702024769c74..5add1468350a 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -310,6 +310,7 @@ enum rtattr_type_t {
>  	RTA_PREF,
>  	RTA_ENCAP_TYPE,
>  	RTA_ENCAP,
> +	RTA_TABLE_LOOKUP,  /* table hit for fib lookup */

Why add a comment here. There is nothing special that needs a comment.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next 3/3] net: Add table id from route lookup to route response
  2015-09-02 19:43           ` Andy Gospodarek
  2015-09-02 20:18             ` Thomas Graf
@ 2015-09-02 22:33             ` David Miller
  1 sibling, 0 replies; 16+ messages in thread
From: David Miller @ 2015-09-02 22:33 UTC (permalink / raw)
  To: gospo; +Cc: tgraf, dsa, netdev

From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Wed, 2 Sep 2015 15:43:27 -0400

> On Wed, Sep 02, 2015 at 09:08:36PM +0200, Thomas Graf wrote:
>> On 09/02/15 at 12:51pm, David Ahern wrote:
>> > On 9/2/15 12:49 PM, David Miller wrote:
>> > >From: Thomas Graf <tgraf@suug.ch>
>> > >Date: Wed, 2 Sep 2015 20:43:46 +0200
>> > >
>> > >>On 09/02/15 at 09:40am, David Ahern wrote:
>> > >>>rt_fill_info which is called for 'route get' requests hardcodes the
>> > >>>table id as RT_TABLE_MAIN which is not correct when multiple tables
>> > >>>are used. Use the newly added table id in the rtable to send back
>> > >>>the correct table.
>> > >>>
>> > >>>Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
>> > >>
>> > >>What RTM_GETROUTE returns is not the actual route but a description
>> > >>of the routing decision which is why table id, scope, protocol, and
>> > >>prefix length are hardcoded. This is indicated by the RTM_F_CLONED
>> > >>flag. What you propose would break userspace ABI.
>> > >
>> > >Agreed, I don't think we can do this.
>> > >
>> > 
>> > Doesn't the table used to come up with the decision matter for IPv4? ie.,
>> > hardcoding to MAIN is misleading when there is absolutely no way the
>> > decision comes from that table. IPv6 already returns the table id.
>> > 
>> > Or is your response that it breaks ABI and hence not going to fix.
>> 
>> This behaviour comes back from when we still had the IPv4 routing cache
>> which was flat.
> 
> So before the routing cache was removed, was the response always
> RTA_TABLE_MAIN since there was no way to indicate which table may have
> route if it came from the cache?

Right.  In fact, it was possible for routes from multiple tables to
end up evaluating to the same routing cache entry.  So there could be
a many to one relationship back then.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-09-02 22:33 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-02 16:40 [PATCH net-next 1/3] net: Refactor rtable initialization David Ahern
2015-09-02 16:40 ` [PATCH net-next 2/3] net: Add FIB table id to rtable David Ahern
2015-09-02 16:40 ` [PATCH net-next 3/3] net: Add table id from route lookup to route response David Ahern
2015-09-02 18:43   ` Thomas Graf
2015-09-02 18:49     ` David Miller
2015-09-02 18:51       ` David Ahern
2015-09-02 19:08         ` Thomas Graf
2015-09-02 19:43           ` Andy Gospodarek
2015-09-02 20:18             ` Thomas Graf
2015-09-02 22:33             ` David Miller
2015-09-02 20:16           ` [PATCH net-next v2] " David Ahern
2015-09-02 20:23             ` Thomas Graf
2015-09-02 20:34               ` David Ahern
2015-09-02 20:41             ` Alexander Duyck
2015-09-02 20:47               ` David Ahern
2015-09-02 20:53             ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).