netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
@ 2017-05-12 11:15 Jan Moskyto Matejka
  2017-05-12 15:24 ` David Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-12 11:15 UTC (permalink / raw)
  To: netdev
  Cc: Jan Moskyto Matejka, linux-kernel, David Ahern, David S. Miller,
	Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy

When rt6_fill_node() fails to fit the route into the buffer,
it drops the route, returns -EMSGSIZE and waits for buffer flush.
This condition is detected by non-null return value and non-empty
buffer; the buffer is flushed and rt6_fill_node() restarted.

However, when a single route generates such a long message that
it doesn't fit into the buffer itself, inet6_dump_fib() misinterprets
the non-null return value together with non-empty buffer as end of dump
and silently truncates the dump.

This patch fixes this by explicitly truncating the message and
inidicating it by NLM_F_TRUNC flag in its nlmsghdr.

Reproducer:
  # ip -6 route show
  ... it shows some routes
  # ip -6 route add fccc::/64 via fe80::ff:fe00:0 dev testdev table 2
  # for a in $(seq 1 1000); do
      ip -6 route append fccc::/64 via fe80::ff:fe00:$a dev testdev table 2
    done
  # ip -6 route show
  ... the output is truncated

This came to light by David Ahern's
commit beb1afac518dec5a15dc ("net: ipv6: Add support to dump multipath
routes via RTA_MULTIPATH attribute")
but obviously existed before, just hidden.

Signed-off-by: Jan Moskyto Matejka <mq@ucw.cz>
---
 include/net/ip6_route.h      |  2 +-
 include/uapi/linux/netlink.h |  1 +
 net/ipv6/ip6_fib.c           | 17 ++++++++++++++---
 net/ipv6/route.c             |  9 +++++++--
 net/netlink/af_netlink.c     |  7 ++++++-
 5 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9dc2c182a263..9b035b6bdf8c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -156,7 +156,7 @@ struct rt6_rtnl_dump_arg {
 	struct net *net;
 };
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
 void rt6_ifdown(struct net *net, struct net_device *dev);
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f3946a27bd07..1d463dbf89db 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -56,6 +56,7 @@ struct nlmsghdr {
 #define NLM_F_ECHO		8	/* Echo this request 		*/
 #define NLM_F_DUMP_INTR		16	/* Dump was inconsistent due to sequence change */
 #define NLM_F_DUMP_FILTERED	32	/* Dump was filtered as requested */
+#define NLM_F_TRUNC		64	/* Message truncated */
 
 /* Modifiers to GET request */
 #define NLM_F_ROOT	0x100	/* specify tree	root	*/
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c68a545..4a962a61e559 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -310,13 +310,20 @@ static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
 	struct rt6_info *rt;
+	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *)w->args;
 
 	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
-		res = rt6_dump_route(rt, w->args);
+		res = rt6_dump_route(rt, w->args, 0);
+		if (res < 0 && arg->skb->len == 0)
+			/* One single route is too long for buffer.
+			 * Will truncate it.
+			 */
+			res = rt6_dump_route(rt, w->args, 1);
+
 		if (res < 0) {
 			/* Frame is full, suspend walking */
 			w->leaf = rt;
-			return 1;
+			return res;
 		}
 
 		/* Multipath routes are dumped in one route with the
@@ -456,9 +463,14 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	cb->args[1] = e;
 	cb->args[0] = h;
 
-	res = res < 0 ? res : skb->len;
 	if (res <= 0)
 		fib6_dump_end(cb);
+
+	if (res == -EMSGSIZE && skb->len)
+		res = skb->len;
+	else
+		res = res < 0 ? res : skb->len;
+
 	return res;
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb174b590fd3..0adcbdba87a1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3539,11 +3539,16 @@ static int rt6_fill_node(struct net *net,
 	return 0;
 
 nla_put_failure:
+	if (flags & NLM_F_TRUNC) {
+		nlmsg_end(skb, nlh);
+		return 0;
+	}
+
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
 }
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	struct net *net = arg->net;
@@ -3565,7 +3570,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 	return rt6_fill_node(net,
 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
-		     NLM_F_MULTI);
+		     NLM_F_MULTI | (truncate ? NLM_F_TRUNC : 0));
 }
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 596eaff66649..f8102f976cad 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2177,7 +2177,12 @@ static int netlink_dump(struct sock *sk)
 		return 0;
 	}
 
-	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
+	if (len < 0)
+		nlh = nlmsg_put_answer(skb, cb, NLMSG_ERROR, sizeof(len), 0);
+	else
+		nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len),
+				       NLM_F_MULTI);
+
 	if (!nlh)
 		goto errout_skb;
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 11:15 [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer Jan Moskyto Matejka
@ 2017-05-12 15:24 ` David Miller
  2017-05-12 17:26   ` David Ahern
  2017-05-12 21:34   ` Jan Moskyto Matejka
  0 siblings, 2 replies; 14+ messages in thread
From: David Miller @ 2017-05-12 15:24 UTC (permalink / raw)
  To: mq; +Cc: netdev, linux-kernel, dsa, kuznet, jmorris, yoshfuji, kaber

From: Jan Moskyto Matejka <mq@ucw.cz>
Date: Fri, 12 May 2017 13:15:10 +0200

> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);

Please use "bool" and "true"/"false" for boolean values.

What does ipv4 do in this situation?

I'm hesitant to be OK with adding a new nlmsg flag just for this case
if we solve this problem differently and using existing mechanisms
elsewhere.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 15:24 ` David Miller
@ 2017-05-12 17:26   ` David Ahern
  2017-05-12 17:34     ` David Miller
  2017-05-12 21:41     ` Jan Moskyto Matejka
  2017-05-12 21:34   ` Jan Moskyto Matejka
  1 sibling, 2 replies; 14+ messages in thread
From: David Ahern @ 2017-05-12 17:26 UTC (permalink / raw)
  To: David Miller, mq
  Cc: netdev, linux-kernel, dsa, kuznet, jmorris, yoshfuji, kaber

On 5/12/17 8:24 AM, David Miller wrote:
> From: Jan Moskyto Matejka <mq@ucw.cz>
> Date: Fri, 12 May 2017 13:15:10 +0200
> 
>> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
>> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
> 
> Please use "bool" and "true"/"false" for boolean values.
> 
> What does ipv4 do in this situation?
> 
> I'm hesitant to be OK with adding a new nlmsg flag just for this case
> if we solve this problem differently and using existing mechanisms
> elsewhere.
> 

I'll take a look at this later today or this weekend; we can't just
truncate the route returned to userspace.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 17:26   ` David Ahern
@ 2017-05-12 17:34     ` David Miller
  2017-05-12 21:41     ` Jan Moskyto Matejka
  1 sibling, 0 replies; 14+ messages in thread
From: David Miller @ 2017-05-12 17:34 UTC (permalink / raw)
  To: dsahern; +Cc: mq, netdev, linux-kernel, dsa, kuznet, jmorris, yoshfuji, kaber

From: David Ahern <dsahern@gmail.com>
Date: Fri, 12 May 2017 10:26:08 -0700

> On 5/12/17 8:24 AM, David Miller wrote:
>> From: Jan Moskyto Matejka <mq@ucw.cz>
>> Date: Fri, 12 May 2017 13:15:10 +0200
>> 
>>> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
>>> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
>> 
>> Please use "bool" and "true"/"false" for boolean values.
>> 
>> What does ipv4 do in this situation?
>> 
>> I'm hesitant to be OK with adding a new nlmsg flag just for this case
>> if we solve this problem differently and using existing mechanisms
>> elsewhere.
>> 
> 
> I'll take a look at this later today or this weekend; we can't just
> truncate the route returned to userspace.

Agreed.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 15:24 ` David Miller
  2017-05-12 17:26   ` David Ahern
@ 2017-05-12 21:34   ` Jan Moskyto Matejka
  2017-05-12 21:43     ` Jan Moskyto Matejka
  1 sibling, 1 reply; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-12 21:34 UTC (permalink / raw)
  To: David Miller
  Cc: mq, netdev, linux-kernel, dsa, kuznet, jmorris, yoshfuji, kaber

On Fri, May 12, 2017 at 11:24:47AM -0400, David Miller wrote:
> From: Jan Moskyto Matejka <mq@ucw.cz>
> Date: Fri, 12 May 2017 13:15:10 +0200
> 
> > -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
> > +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
> 
> Please use "bool" and "true"/"false" for boolean values.

Missed that, sorry. Will remember next time.

> What does ipv4 do in this situation?
> 
> I'm hesitant to be OK with adding a new nlmsg flag just for this case
> if we solve this problem differently and using existing mechanisms
> elsewhere.

IPv4 is broken the same way as IPv6, with the difference that
'ip route append' does not append a new multipath nexthop but
creates a whole new route.

It is probably impossible to create such a big route via iproute2 tool;
it is needed to open netlink socket by hand and write there the
attached file. (It is one nlmsg adding one huge multipath route.)

It should be also noted that 'ip monitor' gets the huge routes
truncated.

MQ

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 17:26   ` David Ahern
  2017-05-12 17:34     ` David Miller
@ 2017-05-12 21:41     ` Jan Moskyto Matejka
  2017-05-13  6:52       ` David Ahern
  1 sibling, 1 reply; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-12 21:41 UTC (permalink / raw)
  To: David Ahern
  Cc: David Miller, mq, netdev, linux-kernel, dsa, kuznet, jmorris,
	yoshfuji, kaber

On Fri, May 12, 2017 at 10:26:08AM -0700, David Ahern wrote:
> On 5/12/17 8:24 AM, David Miller wrote:
> > From: Jan Moskyto Matejka <mq@ucw.cz>
> > Date: Fri, 12 May 2017 13:15:10 +0200
> > 
> >> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
> >> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
> > 
> > Please use "bool" and "true"/"false" for boolean values.
> > 
> > What does ipv4 do in this situation?
> > 
> > I'm hesitant to be OK with adding a new nlmsg flag just for this case
> > if we solve this problem differently and using existing mechanisms
> > elsewhere.
> > 
> 
> I'll take a look at this later today or this weekend; we can't just
> truncate the route returned to userspace.

Agreed. My favourite would be skb realloc somewhere inside the dump loop
... but I don't know whether it's feasible.

MQ

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 21:34   ` Jan Moskyto Matejka
@ 2017-05-12 21:43     ` Jan Moskyto Matejka
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-12 21:43 UTC (permalink / raw)
  To: David Miller
  Cc: mq, netdev, linux-kernel, dsa, kuznet, jmorris, yoshfuji, kaber

[-- Attachment #1: Type: text/plain, Size: 1102 bytes --]

On Fri, May 12, 2017 at 11:34:04PM +0200, Jan Moskyto Matejka wrote:
> On Fri, May 12, 2017 at 11:24:47AM -0400, David Miller wrote:
> > From: Jan Moskyto Matejka <mq@ucw.cz>
> > Date: Fri, 12 May 2017 13:15:10 +0200
> > 
> > > -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
> > > +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
> > 
> > Please use "bool" and "true"/"false" for boolean values.
> 
> Missed that, sorry. Will remember next time.
> 
> > What does ipv4 do in this situation?
> > 
> > I'm hesitant to be OK with adding a new nlmsg flag just for this case
> > if we solve this problem differently and using existing mechanisms
> > elsewhere.
> 
> IPv4 is broken the same way as IPv6, with the difference that
> 'ip route append' does not append a new multipath nexthop but
> creates a whole new route.
> 
> It is probably impossible to create such a big route via iproute2 tool;
> it is needed to open netlink socket by hand and write there the
> attached file. (It is one nlmsg adding one huge multipath route.)

Oops, it's late evening here. Attached now.
MQ

[-- Attachment #2: ipv4-huge-multipath --]
[-- Type: application/octet-stream, Size: 65560 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-12 21:41     ` Jan Moskyto Matejka
@ 2017-05-13  6:52       ` David Ahern
  2017-05-13 10:54         ` Jan Moskyto Matejka
  0 siblings, 1 reply; 14+ messages in thread
From: David Ahern @ 2017-05-13  6:52 UTC (permalink / raw)
  To: Jan Moskyto Matejka; +Cc: David Miller, mq, netdev, roopa

On 5/12/17 3:41 PM, Jan Moskyto Matejka wrote:
> On Fri, May 12, 2017 at 10:26:08AM -0700, David Ahern wrote:
>> On 5/12/17 8:24 AM, David Miller wrote:
>>> From: Jan Moskyto Matejka <mq@ucw.cz>
>>> Date: Fri, 12 May 2017 13:15:10 +0200
>>>
>>>> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
>>>> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
>>>
>>> Please use "bool" and "true"/"false" for boolean values.
>>>
>>> What does ipv4 do in this situation?
>>>
>>> I'm hesitant to be OK with adding a new nlmsg flag just for this case
>>> if we solve this problem differently and using existing mechanisms
>>> elsewhere.
>>>
>>
>> I'll take a look at this later today or this weekend; we can't just
>> truncate the route returned to userspace.
> 
> Agreed. My favourite would be skb realloc somewhere inside the dump loop
> ... but I don't know whether it's feasible.
> 
> MQ
> 

Here is what is happening: user initiates a route dump and specifies a
buffer size for receiving the message which becomes max_recvmsg_len.
This buffer size dictates the skb length allocated by netlink_dump.

The dump is interrupted when a route does not fit in the skb and
returns. Subsequent call picks up with the next route to be dumped - the
one that overflowed. All good and normal so far.

If the next route is larger than max_recvmsg_len, then the route can not
be put in the buffer, nothing is returned to the user which causes the
dump to abort showing the abbreviated output. This problem occurs with
IPv4 and IPv6. You can see this with modest size routes by just dropping
the buffer size to something really small (e.g., with iproute2, change
buf size in rtnl_dump_filter_l to say 2048).

I see 2 problems:
1. the kernel is not telling the user the supplied buffer is too small
(ie., if a single route does not fit in the skb then it should fail and
return an error code to the user),

2. multipath routes for IPv4 and IPv6 do not have a limit.

Should the kernel put a limit on the number of nexthops? I recently put
a cap on MPLS route size as 4096 bytes, but I think this should be
revisited in terms of a limit on number of nexthops to create a
consistent limit even if struct sizes change. And, the limit on the
number of nexthops should be consistent across address families (same
limit for IPv4, IPv6, and MPLS).

>From discussions I have had, 32 nexthops for a single route is on the
laughably high side, but some people do crazy things. How about a limit
of 256 nexthops?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-13  6:52       ` David Ahern
@ 2017-05-13 10:54         ` Jan Moskyto Matejka
  2017-05-13 17:13           ` David Ahern
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-13 10:54 UTC (permalink / raw)
  To: David Ahern; +Cc: David Miller, mq, netdev, roopa

On Sat, May 13, 2017 at 12:52:47AM -0600, David Ahern wrote:
> On 5/12/17 3:41 PM, Jan Moskyto Matejka wrote:
> > On Fri, May 12, 2017 at 10:26:08AM -0700, David Ahern wrote:
> >> On 5/12/17 8:24 AM, David Miller wrote:
> >>> From: Jan Moskyto Matejka <mq@ucw.cz>
> >>> Date: Fri, 12 May 2017 13:15:10 +0200
> >>>
> >>>> -int rt6_dump_route(struct rt6_info *rt, void *p_arg);
> >>>> +int rt6_dump_route(struct rt6_info *rt, void *p_arg, int truncate);
> >>>
> >>> Please use "bool" and "true"/"false" for boolean values.
> >>>
> >>> What does ipv4 do in this situation?
> >>>
> >>> I'm hesitant to be OK with adding a new nlmsg flag just for this case
> >>> if we solve this problem differently and using existing mechanisms
> >>> elsewhere.
> >>>
> >>
> >> I'll take a look at this later today or this weekend; we can't just
> >> truncate the route returned to userspace.
> > 
> > Agreed. My favourite would be skb realloc somewhere inside the dump loop
> > ... but I don't know whether it's feasible.
> > 
> > MQ
> > 
> 
> Here is what is happening: user initiates a route dump and specifies a
> buffer size for receiving the message which becomes max_recvmsg_len.
> This buffer size dictates the skb length allocated by netlink_dump.
> 
> The dump is interrupted when a route does not fit in the skb and
> returns. Subsequent call picks up with the next route to be dumped - the
> one that overflowed. All good and normal so far.
> 
> If the next route is larger than max_recvmsg_len, then the route can not
> be put in the buffer, nothing is returned to the user which causes the
> dump to abort showing the abbreviated output. This problem occurs with
> IPv4 and IPv6. You can see this with modest size routes by just dropping
> the buffer size to something really small (e.g., with iproute2, change
> buf size in rtnl_dump_filter_l to say 2048).

Ergo no skb realloc is possible.

> I see 2 problems:
> 1. the kernel is not telling the user the supplied buffer is too small
> (ie., if a single route does not fit in the skb then it should fail and
> return an error code to the user),

Definitely. I want just to note that this condition usually occurs
somewhere during route dump. To know it before starting output, we would 
have to walk the FIB once before dump to calculate max route len.

> 2. multipath routes for IPv4 and IPv6 do not have a limit.
> 
> Should the kernel put a limit on the number of nexthops? I recently put
> a cap on MPLS route size as 4096 bytes, but I think this should be
> revisited in terms of a limit on number of nexthops to create a
> consistent limit even if struct sizes change. And, the limit on the
> number of nexthops should be consistent across address families (same
> limit for IPv4, IPv6, and MPLS).
> 
> From discussions I have had, 32 nexthops for a single route is on the
> laughably high side, but some people do crazy things. How about a limit
> of 256 nexthops?

256 should be OK even for a crazy developer of BIRD.

It would be nice to have if the returned error were somehow useful for
the userspace -- to know what is happening, not only something like
"impossible to add / append route".

Thanks for looking into that!
MQ

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-13 10:54         ` Jan Moskyto Matejka
@ 2017-05-13 17:13           ` David Ahern
  2017-05-13 17:29             ` Jan Moskyto Matejka
  0 siblings, 1 reply; 14+ messages in thread
From: David Ahern @ 2017-05-13 17:13 UTC (permalink / raw)
  To: Jan Moskyto Matejka; +Cc: David Miller, mq, netdev, roopa

On 5/13/17 4:54 AM, Jan Moskyto Matejka wrote:
>> I see 2 problems:
>> 1. the kernel is not telling the user the supplied buffer is too small
>> (ie., if a single route does not fit in the skb then it should fail and
>> return an error code to the user),
> 
> Definitely. I want just to note that this condition usually occurs
> somewhere during route dump. To know it before starting output, we would 
> have to walk the FIB once before dump to calculate max route len.

When adding a route to the skb, track whether it contains at least 1
route. If not, it means the next route in the dump is larger than the
given buffer. Detect this condition and error out of the dump -
returning an error to the user (-ENOSPC? or EMSGSIZE?)

> 
>> 2. multipath routes for IPv4 and IPv6 do not have a limit.
>>
>> Should the kernel put a limit on the number of nexthops? I recently put
>> a cap on MPLS route size as 4096 bytes, but I think this should be
>> revisited in terms of a limit on number of nexthops to create a
>> consistent limit even if struct sizes change. And, the limit on the
>> number of nexthops should be consistent across address families (same
>> limit for IPv4, IPv6, and MPLS).
>>
>> From discussions I have had, 32 nexthops for a single route is on the
>> laughably high side, but some people do crazy things. How about a limit
>> of 256 nexthops?
> 
> 256 should be OK even for a crazy developer of BIRD.
> 
> It would be nice to have if the returned error were somehow useful for
> the userspace -- to know what is happening, not only something like
> "impossible to add / append route".

Top of tree kernel has extended error reporting so a message can be
returned that says something to the effect of "route size is larger than
supplied buffer size".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-13 17:13           ` David Ahern
@ 2017-05-13 17:29             ` Jan Moskyto Matejka
  2017-05-14 21:00               ` Johannes Berg
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Moskyto Matejka @ 2017-05-13 17:29 UTC (permalink / raw)
  To: David Ahern; +Cc: David Miller, mq, netdev, roopa

On Sat, May 13, 2017 at 11:13:38AM -0600, David Ahern wrote:
> On 5/13/17 4:54 AM, Jan Moskyto Matejka wrote:
> >> I see 2 problems:
> >> 1. the kernel is not telling the user the supplied buffer is too small
> >> (ie., if a single route does not fit in the skb then it should fail and
> >> return an error code to the user),
> > 
> > Definitely. I want just to note that this condition usually occurs
> > somewhere during route dump. To know it before starting output, we would 
> > have to walk the FIB once before dump to calculate max route len.
> 
> When adding a route to the skb, track whether it contains at least 1
> route. If not, it means the next route in the dump is larger than the
> given buffer. Detect this condition and error out of the dump -
> returning an error to the user (-ENOSPC? or EMSGSIZE?)

EMSGSIZE seems OK for me.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-13 17:29             ` Jan Moskyto Matejka
@ 2017-05-14 21:00               ` Johannes Berg
  2017-05-14 22:14                 ` David Ahern
  0 siblings, 1 reply; 14+ messages in thread
From: Johannes Berg @ 2017-05-14 21:00 UTC (permalink / raw)
  To: Jan Moskyto Matejka, David Ahern; +Cc: David Miller, mq, netdev, roopa

On Sat, 2017-05-13 at 19:29 +0200, Jan Moskyto Matejka wrote:
> 
> > When adding a route to the skb, track whether it contains at least
> > 1
> > route. If not, it means the next route in the dump is larger than
> > the
> > given buffer. Detect this condition and error out of the dump -
> > returning an error to the user (-ENOSPC? or EMSGSIZE?)
> 
> EMSGSIZE seems OK for me.

If we return an error here, and consequently allow for userspace
changes to pick this up, perhaps we could also consider allowing to
split the dump between nexthops, so that arbitrary such things can be
returned.

We did a similar thing in nl80211 at some point - originally, a dump of
wireless devices present was doing a whole device per message, we later
allowed splitting a single device across multiple messages because
capability information was reaching a reasonable message size limit.

johannes

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-14 21:00               ` Johannes Berg
@ 2017-05-14 22:14                 ` David Ahern
  2017-05-14 22:20                   ` Johannes Berg
  0 siblings, 1 reply; 14+ messages in thread
From: David Ahern @ 2017-05-14 22:14 UTC (permalink / raw)
  To: Johannes Berg, Jan Moskyto Matejka; +Cc: David Miller, mq, netdev, roopa

On 5/14/17 3:00 PM, Johannes Berg wrote:
> On Sat, 2017-05-13 at 19:29 +0200, Jan Moskyto Matejka wrote:
>>
>>> When adding a route to the skb, track whether it contains at least
>>> 1
>>> route. If not, it means the next route in the dump is larger than
>>> the
>>> given buffer. Detect this condition and error out of the dump -
>>> returning an error to the user (-ENOSPC? or EMSGSIZE?)
>>
>> EMSGSIZE seems OK for me.
> 
> If we return an error here, and consequently allow for userspace
> changes to pick this up, perhaps we could also consider allowing to
> split the dump between nexthops, so that arbitrary such things can be
> returned.

Returning an error should not impact existing userspace; it should
already be checking for an error response in the message.

Splitting the dump between nexthops across multiple messages will have
repercussions on userspace. I think (at least for rtnetlink and links,
addresses, routes) userspace needs to provide a buffer large enough for
a complete object. If we limit the number of nexthops to something
reasonable (e.g., 256), then ipv4 for example will be ~ 3kB + lwt encap
size we are talking on the order of an 8kb maybe 16kB buffer. That is a
reasonable request for the API.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer.
  2017-05-14 22:14                 ` David Ahern
@ 2017-05-14 22:20                   ` Johannes Berg
  0 siblings, 0 replies; 14+ messages in thread
From: Johannes Berg @ 2017-05-14 22:20 UTC (permalink / raw)
  To: David Ahern, Jan Moskyto Matejka; +Cc: David Miller, mq, netdev, roopa

On Sun, 2017-05-14 at 16:14 -0600, David Ahern wrote:
> On 5/14/17 3:00 PM, Johannes Berg wrote:
> > On Sat, 2017-05-13 at 19:29 +0200, Jan Moskyto Matejka wrote:
> > > 
> > > > When adding a route to the skb, track whether it contains at
> > > > least
> > > > 1
> > > > route. If not, it means the next route in the dump is larger
> > > > than
> > > > the
> > > > given buffer. Detect this condition and error out of the dump -
> > > > returning an error to the user (-ENOSPC? or EMSGSIZE?)
> > > 
> > > EMSGSIZE seems OK for me.
> > 
> > If we return an error here, and consequently allow for userspace
> > changes to pick this up, perhaps we could also consider allowing to
> > split the dump between nexthops, so that arbitrary such things can
> > be
> > returned.
> 
> Returning an error should not impact existing userspace; it should
> already be checking for an error response in the message.

Right. I was sort of thinking that you'd catch the error and try with a
bigger buffer or so.

> Splitting the dump between nexthops across multiple messages will
> have repercussions on userspace. I think (at least for rtnetlink and
> links, addresses, routes) userspace needs to provide a buffer large
> enough for a complete object. If we limit the number of nexthops to
> something reasonable (e.g., 256), then ipv4 for example will be ~ 3kB
> + lwt encap size we are talking on the order of an 8kb maybe 16kB
> buffer. That is a reasonable request for the API.

Yeah, that seems reasonable.

We had a bigger problem here, in that the # of channels supported, and
the amount of data per channel, was increasing all the time.

johannes

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-05-14 22:20 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-12 11:15 [PATCH] net: ipv6: Truncate single route when it doesn't fit into dump buffer Jan Moskyto Matejka
2017-05-12 15:24 ` David Miller
2017-05-12 17:26   ` David Ahern
2017-05-12 17:34     ` David Miller
2017-05-12 21:41     ` Jan Moskyto Matejka
2017-05-13  6:52       ` David Ahern
2017-05-13 10:54         ` Jan Moskyto Matejka
2017-05-13 17:13           ` David Ahern
2017-05-13 17:29             ` Jan Moskyto Matejka
2017-05-14 21:00               ` Johannes Berg
2017-05-14 22:14                 ` David Ahern
2017-05-14 22:20                   ` Johannes Berg
2017-05-12 21:34   ` Jan Moskyto Matejka
2017-05-12 21:43     ` Jan Moskyto Matejka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).