All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ipv4: Revert removal of rt_uses_gateway
@ 2019-09-17 17:39 David Ahern
  2019-09-17 18:50 ` Julian Anastasov
  2019-09-21  1:30 ` Jakub Kicinski
  0 siblings, 2 replies; 6+ messages in thread
From: David Ahern @ 2019-09-17 17:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ja, David Ahern

From: David Ahern <dsahern@gmail.com>

Julian noted that rt_uses_gateway has a more subtle use than 'is gateway
set':
    https://lore.kernel.org/netdev/alpine.LFD.2.21.1909151104060.2546@ja.home.ssi.bg/

Revert that part of the commit referenced in the Fixes tag.

Currently, there are no u8 holes in 'struct rtable'. There is a 4-byte hole
in the second cacheline which contains the gateway declaration. So move
rt_gw_family down to the gateway declarations since they are always used
together, and then re-use that u8 for rt_uses_gateway. End result is that
rtable size is unchanged.

Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway")
Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 drivers/infiniband/core/addr.c  |  2 +-
 include/net/route.h             |  3 ++-
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/ip_forward.c           |  2 +-
 net/ipv4/ip_output.c            |  2 +-
 net/ipv4/route.c                | 36 +++++++++++++++++++++---------------
 net/ipv4/xfrm4_policy.c         |  1 +
 7 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 9b76a8fcdd24..bf539c34ccd3 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
 
 	if (family == AF_INET) {
 		rt = container_of(dst, struct rtable, dst);
-		return rt->rt_gw_family == AF_INET;
+		return rt->rt_uses_gateway;
 	}
 
 	rt6 = container_of(dst, struct rt6_info, dst);
diff --git a/include/net/route.h b/include/net/route.h
index dfce19c9fa96..6c516840380d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -53,10 +53,11 @@ struct rtable {
 	unsigned int		rt_flags;
 	__u16			rt_type;
 	__u8			rt_is_input;
-	u8			rt_gw_family;
+	__u8			rt_uses_gateway;
 
 	int			rt_iif;
 
+	u8			rt_gw_family;
 	/* Info on neighbour */
 	union {
 		__be32		rt_gw4;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f5c163d4771b..a9183543ca30 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -560,7 +560,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
 	rcu_read_unlock();
 	return &rt->dst;
@@ -598,7 +598,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
 	return &rt->dst;
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 06f6f280b9ff..00ec819f949b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 
-	if (opt->is_strictroute && rt->rt_gw_family)
+	if (opt->is_strictroute && rt->rt_uses_gateway)
 		goto sr_failed;
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cc7ef0d05bbd..da521790cd63 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -499,7 +499,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
-	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
+	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto no_route;
 
 	/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b6a6f18c3dd1..7dcce724c78b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -635,6 +635,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 
 	if (fnhe->fnhe_gw) {
 		rt->rt_flags |= RTCF_REDIRECTED;
+		rt->rt_uses_gateway = 1;
 		rt->rt_gw_family = AF_INET;
 		rt->rt_gw4 = fnhe->fnhe_gw;
 	}
@@ -1313,7 +1314,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	mtu = READ_ONCE(dst->dev->mtu);
 
 	if (unlikely(ip_mtu_locked(dst))) {
-		if (rt->rt_gw_family && mtu > 576)
+		if (rt->rt_uses_gateway && mtu > 576)
 			mtu = 576;
 	}
 
@@ -1569,6 +1570,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
 		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+			rt->rt_uses_gateway = 1;
 			rt->rt_gw_family = nhc->nhc_gw_family;
 			/* only INET and INET6 are supported */
 			if (likely(nhc->nhc_gw_family == AF_INET))
@@ -1634,6 +1636,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
 		rt->rt_mtu_locked = 0;
+		rt->rt_uses_gateway = 0;
 		rt->rt_gw_family = 0;
 		rt->rt_gw4 = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2694,6 +2697,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
+		rt->rt_uses_gateway = ort->rt_uses_gateway;
 		rt->rt_gw_family = ort->rt_gw_family;
 		if (rt->rt_gw_family == AF_INET)
 			rt->rt_gw4 = ort->rt_gw4;
@@ -2778,21 +2782,23 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
 			goto nla_put_failure;
 	}
-	if (rt->rt_gw_family == AF_INET &&
-	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
-		goto nla_put_failure;
-	} else if (rt->rt_gw_family == AF_INET6) {
-		int alen = sizeof(struct in6_addr);
-		struct nlattr *nla;
-		struct rtvia *via;
-
-		nla = nla_reserve(skb, RTA_VIA, alen + 2);
-		if (!nla)
+	if (rt->rt_uses_gateway) {
+		if (rt->rt_gw_family == AF_INET &&
+		    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
 			goto nla_put_failure;
-
-		via = nla_data(nla);
-		via->rtvia_family = AF_INET6;
-		memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+		} else if (rt->rt_gw_family == AF_INET6) {
+			int alen = sizeof(struct in6_addr);
+			struct nlattr *nla;
+			struct rtvia *via;
+
+			nla = nla_reserve(skb, RTA_VIA, alen + 2);
+			if (!nla)
+				goto nla_put_failure;
+
+			via = nla_data(nla);
+			via->rtvia_family = AF_INET6;
+			memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+		}
 	}
 
 	expires = rt->dst.expires;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cdef8f9a3b01..35b84b52b702 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -85,6 +85,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 					      RTCF_LOCAL);
 	xdst->u.rt.rt_type = rt->rt_type;
+	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_gw_family = rt->rt_gw_family;
 	if (rt->rt_gw_family == AF_INET)
 		xdst->u.rt.rt_gw4 = rt->rt_gw4;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ipv4: Revert removal of rt_uses_gateway
  2019-09-17 17:39 [PATCH net] ipv4: Revert removal of rt_uses_gateway David Ahern
@ 2019-09-17 18:50 ` Julian Anastasov
  2019-09-17 18:55   ` David Ahern
  2019-09-21  1:30 ` Jakub Kicinski
  1 sibling, 1 reply; 6+ messages in thread
From: Julian Anastasov @ 2019-09-17 18:50 UTC (permalink / raw)
  To: David Ahern; +Cc: davem, netdev, David Ahern


	Hello,

On Tue, 17 Sep 2019, David Ahern wrote:

> From: David Ahern <dsahern@gmail.com>
> 
> Julian noted that rt_uses_gateway has a more subtle use than 'is gateway
> set':
>     https://lore.kernel.org/netdev/alpine.LFD.2.21.1909151104060.2546@ja.home.ssi.bg/
> 
> Revert that part of the commit referenced in the Fixes tag.
> 
> Currently, there are no u8 holes in 'struct rtable'. There is a 4-byte hole
> in the second cacheline which contains the gateway declaration. So move
> rt_gw_family down to the gateway declarations since they are always used
> together, and then re-use that u8 for rt_uses_gateway. End result is that
> rtable size is unchanged.
> 
> Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway")
> Reported-by: Julian Anastasov <ja@ssi.bg>
> Signed-off-by: David Ahern <dsahern@gmail.com>

	Looks good to me, thanks!

Reviewed-by: Julian Anastasov <ja@ssi.bg>

> ---
>  drivers/infiniband/core/addr.c  |  2 +-
>  include/net/route.h             |  3 ++-
>  net/ipv4/inet_connection_sock.c |  4 ++--
>  net/ipv4/ip_forward.c           |  2 +-
>  net/ipv4/ip_output.c            |  2 +-
>  net/ipv4/route.c                | 36 +++++++++++++++++++++---------------
>  net/ipv4/xfrm4_policy.c         |  1 +
>  7 files changed, 29 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
> index 9b76a8fcdd24..bf539c34ccd3 100644
> --- a/drivers/infiniband/core/addr.c
> +++ b/drivers/infiniband/core/addr.c
> @@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
>  
>  	if (family == AF_INET) {
>  		rt = container_of(dst, struct rtable, dst);
> -		return rt->rt_gw_family == AF_INET;
> +		return rt->rt_uses_gateway;
>  	}
>  
>  	rt6 = container_of(dst, struct rt6_info, dst);
> diff --git a/include/net/route.h b/include/net/route.h
> index dfce19c9fa96..6c516840380d 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -53,10 +53,11 @@ struct rtable {
>  	unsigned int		rt_flags;
>  	__u16			rt_type;
>  	__u8			rt_is_input;
> -	u8			rt_gw_family;
> +	__u8			rt_uses_gateway;
>  
>  	int			rt_iif;
>  
> +	u8			rt_gw_family;
>  	/* Info on neighbour */
>  	union {
>  		__be32		rt_gw4;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index f5c163d4771b..a9183543ca30 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -560,7 +560,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
>  	rt = ip_route_output_flow(net, fl4, sk);
>  	if (IS_ERR(rt))
>  		goto no_route;
> -	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
> +	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
>  		goto route_err;
>  	rcu_read_unlock();
>  	return &rt->dst;
> @@ -598,7 +598,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
>  	rt = ip_route_output_flow(net, fl4, sk);
>  	if (IS_ERR(rt))
>  		goto no_route;
> -	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
> +	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
>  		goto route_err;
>  	return &rt->dst;
>  
> diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
> index 06f6f280b9ff..00ec819f949b 100644
> --- a/net/ipv4/ip_forward.c
> +++ b/net/ipv4/ip_forward.c
> @@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
>  
>  	rt = skb_rtable(skb);
>  
> -	if (opt->is_strictroute && rt->rt_gw_family)
> +	if (opt->is_strictroute && rt->rt_uses_gateway)
>  		goto sr_failed;
>  
>  	IPCB(skb)->flags |= IPSKB_FORWARDED;
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index cc7ef0d05bbd..da521790cd63 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -499,7 +499,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
>  	skb_dst_set_noref(skb, &rt->dst);
>  
>  packet_routed:
> -	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
> +	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
>  		goto no_route;
>  
>  	/* OK, we know where to send it, allocate and build IP header. */
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index b6a6f18c3dd1..7dcce724c78b 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -635,6 +635,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
>  
>  	if (fnhe->fnhe_gw) {
>  		rt->rt_flags |= RTCF_REDIRECTED;
> +		rt->rt_uses_gateway = 1;
>  		rt->rt_gw_family = AF_INET;
>  		rt->rt_gw4 = fnhe->fnhe_gw;
>  	}
> @@ -1313,7 +1314,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
>  	mtu = READ_ONCE(dst->dev->mtu);
>  
>  	if (unlikely(ip_mtu_locked(dst))) {
> -		if (rt->rt_gw_family && mtu > 576)
> +		if (rt->rt_uses_gateway && mtu > 576)
>  			mtu = 576;
>  	}
>  
> @@ -1569,6 +1570,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
>  		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
>  
>  		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
> +			rt->rt_uses_gateway = 1;
>  			rt->rt_gw_family = nhc->nhc_gw_family;
>  			/* only INET and INET6 are supported */
>  			if (likely(nhc->nhc_gw_family == AF_INET))
> @@ -1634,6 +1636,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
>  		rt->rt_iif = 0;
>  		rt->rt_pmtu = 0;
>  		rt->rt_mtu_locked = 0;
> +		rt->rt_uses_gateway = 0;
>  		rt->rt_gw_family = 0;
>  		rt->rt_gw4 = 0;
>  		INIT_LIST_HEAD(&rt->rt_uncached);
> @@ -2694,6 +2697,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
>  		rt->rt_genid = rt_genid_ipv4(net);
>  		rt->rt_flags = ort->rt_flags;
>  		rt->rt_type = ort->rt_type;
> +		rt->rt_uses_gateway = ort->rt_uses_gateway;
>  		rt->rt_gw_family = ort->rt_gw_family;
>  		if (rt->rt_gw_family == AF_INET)
>  			rt->rt_gw4 = ort->rt_gw4;
> @@ -2778,21 +2782,23 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
>  		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
>  			goto nla_put_failure;
>  	}
> -	if (rt->rt_gw_family == AF_INET &&
> -	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
> -		goto nla_put_failure;
> -	} else if (rt->rt_gw_family == AF_INET6) {
> -		int alen = sizeof(struct in6_addr);
> -		struct nlattr *nla;
> -		struct rtvia *via;
> -
> -		nla = nla_reserve(skb, RTA_VIA, alen + 2);
> -		if (!nla)
> +	if (rt->rt_uses_gateway) {
> +		if (rt->rt_gw_family == AF_INET &&
> +		    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
>  			goto nla_put_failure;
> -
> -		via = nla_data(nla);
> -		via->rtvia_family = AF_INET6;
> -		memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
> +		} else if (rt->rt_gw_family == AF_INET6) {
> +			int alen = sizeof(struct in6_addr);
> +			struct nlattr *nla;
> +			struct rtvia *via;
> +
> +			nla = nla_reserve(skb, RTA_VIA, alen + 2);
> +			if (!nla)
> +				goto nla_put_failure;
> +
> +			via = nla_data(nla);
> +			via->rtvia_family = AF_INET6;
> +			memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
> +		}
>  	}
>  
>  	expires = rt->dst.expires;
> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
> index cdef8f9a3b01..35b84b52b702 100644
> --- a/net/ipv4/xfrm4_policy.c
> +++ b/net/ipv4/xfrm4_policy.c
> @@ -85,6 +85,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
>  	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
>  					      RTCF_LOCAL);
>  	xdst->u.rt.rt_type = rt->rt_type;
> +	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
>  	xdst->u.rt.rt_gw_family = rt->rt_gw_family;
>  	if (rt->rt_gw_family == AF_INET)
>  		xdst->u.rt.rt_gw4 = rt->rt_gw4;
> -- 
> 2.11.0

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ipv4: Revert removal of rt_uses_gateway
  2019-09-17 18:50 ` Julian Anastasov
@ 2019-09-17 18:55   ` David Ahern
  2019-09-17 22:02     ` Julian Anastasov
  0 siblings, 1 reply; 6+ messages in thread
From: David Ahern @ 2019-09-17 18:55 UTC (permalink / raw)
  To: Julian Anastasov, David Ahern; +Cc: davem, netdev

On 9/17/19 12:50 PM, Julian Anastasov wrote:
> 
> 	Looks good to me, thanks!
> 
> Reviewed-by: Julian Anastasov <ja@ssi.bg>
> 

BTW, do you have any tests for the rt_uses_gateway paths - showing why
it is needed?

All of the pmtu, redirect, fib tests, etc worked fine without the
special flag. Sure, the 'ip ro get' had extra data; it seems like that
could be handled.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ipv4: Revert removal of rt_uses_gateway
  2019-09-17 18:55   ` David Ahern
@ 2019-09-17 22:02     ` Julian Anastasov
  0 siblings, 0 replies; 6+ messages in thread
From: Julian Anastasov @ 2019-09-17 22:02 UTC (permalink / raw)
  To: David Ahern; +Cc: David Ahern, davem, netdev


	Hello,

On Tue, 17 Sep 2019, David Ahern wrote:

> On 9/17/19 12:50 PM, Julian Anastasov wrote:
> > 
> > 	Looks good to me, thanks!
> > 
> > Reviewed-by: Julian Anastasov <ja@ssi.bg>
> > 
> 
> BTW, do you have any tests for the rt_uses_gateway paths - showing why
> it is needed?

	No special tests.

> All of the pmtu, redirect, fib tests, etc worked fine without the
> special flag. Sure, the 'ip ro get' had extra data; it seems like that
> could be handled.

	I'll explain. In the period before the route cache
was removed in 3.6, there were two fields: rt_dst and rt_gateway.
For targets on LAN both contained the target. For targets via
gateway (nh_gw), rt_dst still stores the target but rt_gateway
stored the nh_gw. In short, rt_gateway always contained the
next hop IP for neigh resolving.

	In 3.6, rt_dst was removed and only rt_gateway remained
to store nh_gw. As fnhe_rth/nh_pcpu_rth_output were used to
cache the output route, rt_gateway can not contain any IP
because the route can be used for any target on the LAN.
This is true even now, rt_gateway is 0 for cached routes
that are not DST_HOST, i.e. not for single target IP.

	Why this matters? There are users such as IPVS,
TEE, raw.c (IP_HDRINCL) that use FLOWI_FLAG_KNOWN_NH to
request route with rt_gateway != 0 to be returned, i.e.
they want rt_gateway to store a next hop IP for neigh
resolving but to put different IP in iph->daddr. This is
honoured by rt_nexthop(), it prefers rt_gateway before the
iph->daddr. With this FLOWI flag the routing will avoid
returning routes with rt_gateway = 0 (cached in NH), instead
it will allocate DST_HOST route which can safely hold IP
in rt_gateway.

	So, in 3.7 commit 155e8336c373 ("ipv4: introduce
rt_uses_gateway") was created to restore the knowledge
that rt_dst != rt_gateway means route via GW and also
commit c92b96553a80 ("ipv4: Add FLOWI_FLAG_KNOWN_NH")
to make sure packets are routed by requested next hop and
not by iph->daddr.

	You see the places that need to know if rt_gateway
contains nh_gw (via GW) or just a requested next hop (when
nh_gw is 0). It matters for cases where strict source routes
should be on connected network.

	In simple tests things are working without rt_uses_gateway
flag because it is a corner case to see above cases combined
with strict source routing or MTU locking. Not sure if we can
use some trick to support it differently without such flag.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ipv4: Revert removal of rt_uses_gateway
  2019-09-17 17:39 [PATCH net] ipv4: Revert removal of rt_uses_gateway David Ahern
  2019-09-17 18:50 ` Julian Anastasov
@ 2019-09-21  1:30 ` Jakub Kicinski
  2019-09-21  1:32   ` David Ahern
  1 sibling, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2019-09-21  1:30 UTC (permalink / raw)
  To: David Ahern; +Cc: davem, netdev, ja, David Ahern

On Tue, 17 Sep 2019 10:39:49 -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Julian noted that rt_uses_gateway has a more subtle use than 'is gateway
> set':
>     https://lore.kernel.org/netdev/alpine.LFD.2.21.1909151104060.2546@ja.home.ssi.bg/
> 
> Revert that part of the commit referenced in the Fixes tag.
> 
> Currently, there are no u8 holes in 'struct rtable'. There is a 4-byte hole
> in the second cacheline which contains the gateway declaration. So move
> rt_gw_family down to the gateway declarations since they are always used
> together, and then re-use that u8 for rt_uses_gateway. End result is that
> rtable size is unchanged.
> 
> Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway")
> Reported-by: Julian Anastasov <ja@ssi.bg>
> Signed-off-by: David Ahern <dsahern@gmail.com>

I'm assuming the mix of u8 and __u8 is intentional, since this is a partial revert :)

Applied, queued, thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] ipv4: Revert removal of rt_uses_gateway
  2019-09-21  1:30 ` Jakub Kicinski
@ 2019-09-21  1:32   ` David Ahern
  0 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2019-09-21  1:32 UTC (permalink / raw)
  To: Jakub Kicinski, David Ahern; +Cc: davem, netdev, ja

On 9/20/19 7:30 PM, Jakub Kicinski wrote:
> I'm assuming the mix of u8 and __u8 is intentional, since this is a partial revert :)

original patch for rt_uses_gateway used __u8 so yes put that back.

not sure why I used u8 for the new field, but left it on the move.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-09-21  1:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-17 17:39 [PATCH net] ipv4: Revert removal of rt_uses_gateway David Ahern
2019-09-17 18:50 ` Julian Anastasov
2019-09-17 18:55   ` David Ahern
2019-09-17 22:02     ` Julian Anastasov
2019-09-21  1:30 ` Jakub Kicinski
2019-09-21  1:32   ` David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.