netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
@ 2017-01-30 20:36 Robert Shearman
  2017-01-31  0:17 ` Eric W. Biederman
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Robert Shearman @ 2017-01-30 20:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, roopa, ebiederm, Robert Shearman

It is sometimes desirable to present an MPLS transport network as a
single hop to traffic transiting it because it prevents confusion when
diagnosing failures. An example of where confusion can be generated is
when addresses used in the provider network overlap with addresses in
the overlay network and the addresses get exposed through ICMP errors
generated as packets transit the provider network.

Therefore, provide the ability to control whether the TTL value from
an MPLS packet is propagated to an IPv4/IPv6 packet when the last
label is popped through the addition of a new per-namespace sysctl:
"net.mpls.ip_ttl_propagate" which defaults to enabled.

Use the same sysctl to control whether the TTL is propagated from IP
packets into the MPLS header. If the TTL isn't propagated then a
default TTL value is used which can be configured via a new sysctl:
"net.mpls.default_ttl".

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 Documentation/networking/mpls-sysctl.txt | 19 +++++++++
 include/net/netns/mpls.h                 |  3 ++
 net/mpls/af_mpls.c                       | 70 ++++++++++++++++++++++++--------
 net/mpls/mpls_iptunnel.c                 | 12 +++++-
 4 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 15d8d16934fd..b8f0725ff09e 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -19,6 +19,25 @@ platform_labels - INTEGER
 	Possible values: 0 - 1048575
 	Default: 0
 
+ip_ttl_propagate - BOOL
+	Control whether TTL is propagated from the IPv4/IPv6 header to
+	the MPLS header on imposing labels and propagated from the
+	MPLS header to the IPv4/IPv6 header on popping the last label.
+
+	If disabled, the MPLS transport network will appear as a
+	single hop to transit traffic.
+
+	0 - disabled
+	1 - enabled (default)
+
+default_ttl - BOOL
+	Default TTL value to use for MPLS packets where it cannot be
+	propagated from an IP header, either because one isn't present
+	or ip_ttl_propagate has been disabled.
+
+	Possible values: 1 - 255
+	Default: 255
+
 conf/<interface>/input - BOOL
 	Control whether packets can be input on this interface.
 
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index d29203651c01..1b68aed6e1b9 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -10,7 +10,10 @@ struct ctl_table_header;
 
 struct netns_mpls {
 	size_t platform_labels;
+	int ip_ttl_propagate;
+	int default_ttl;
 	struct mpls_route __rcu * __rcu *platform_label;
+
 	struct ctl_table_header *ctl;
 };
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 64d3bf269a26..bf5f0792e8a2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -31,7 +31,9 @@
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
 static int zero = 0;
+static int one = 1;
 static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
 		       struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -219,8 +221,8 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
 	return &rt->rt_nh[nh_index];
 }
 
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
-			struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+			struct sk_buff *skb, struct mpls_entry_decoded dec)
 {
 	enum mpls_payload_type payload_type;
 	bool success = false;
@@ -243,24 +245,29 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
 		payload_type = ip_hdr(skb)->version;
 
 	switch (payload_type) {
-	case MPT_IPV4: {
-		struct iphdr *hdr4 = ip_hdr(skb);
-		skb->protocol = htons(ETH_P_IP);
-		csum_replace2(&hdr4->check,
-			      htons(hdr4->ttl << 8),
-			      htons(dec.ttl << 8));
-		hdr4->ttl = dec.ttl;
+	case MPT_IPV4:
+		if (net->mpls.ip_ttl_propagate) {
+			struct iphdr *hdr4 = ip_hdr(skb);
+
+			skb->protocol = htons(ETH_P_IP);
+			csum_replace2(&hdr4->check,
+				      htons(hdr4->ttl << 8),
+				      htons(dec.ttl << 8));
+			hdr4->ttl = dec.ttl;
+		}
 		success = true;
 		break;
-	}
-	case MPT_IPV6: {
-		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
-		skb->protocol = htons(ETH_P_IPV6);
-		hdr6->hop_limit = dec.ttl;
+	case MPT_IPV6:
+		if (net->mpls.ip_ttl_propagate) {
+			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+
+			skb->protocol = htons(ETH_P_IPV6);
+			hdr6->hop_limit = dec.ttl;
+		}
 		success = true;
 		break;
-	}
 	case MPT_UNSPEC:
+		/* Should have decided which protocol it is by now */
 		break;
 	}
 
@@ -360,7 +367,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 
 	if (unlikely(!new_header_size && dec.bos)) {
 		/* Penultimate hop popping */
-		if (!mpls_egress(rt, skb, dec))
+		if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
 			goto err;
 	} else {
 		bool bos;
@@ -1764,6 +1771,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
 	return ret;
 }
 
+#define MPLS_NS_SYSCTL_OFFSET(field)		\
+	(&((struct net *)0)->field)
+
 static const struct ctl_table mpls_table[] = {
 	{
 		.procname	= "platform_labels",
@@ -1772,21 +1782,47 @@ static const struct ctl_table mpls_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mpls_platform_labels,
 	},
+	{
+		.procname	= "ip_ttl_propagate",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
+		.procname	= "default_ttl",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &ttl_max,
+	},
 	{ }
 };
 
 static int mpls_net_init(struct net *net)
 {
 	struct ctl_table *table;
+	int i;
 
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
+	net->mpls.ip_ttl_propagate = 1;
+	net->mpls.default_ttl = 255;
 
 	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
 	if (table == NULL)
 		return -ENOMEM;
 
-	table[0].data = net;
+	/* Table data contains only offsets relative to the base of
+	 * the mdev at this point, so make them absolute.
+	 */
+	for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+		table[i].data = (char *)net + (uintptr_t)table[i].data;
+
 	net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
 	if (net->mpls.ctl == NULL) {
 		kfree(table);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 67b7a955de65..c6a8e1c7c5f5 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -49,6 +49,7 @@ static int mpls_xmit(struct sk_buff *skb)
 	struct rtable *rt = NULL;
 	struct rt6_info *rt6 = NULL;
 	struct mpls_dev *out_mdev;
+	struct net *net;
 	int err = 0;
 	bool bos;
 	int i;
@@ -56,13 +57,20 @@ static int mpls_xmit(struct sk_buff *skb)
 
 	/* Find the output device */
 	out_dev = dst->dev;
+	net = dev_net(out_dev);
 
 	/* Obtain the ttl */
 	if (dst->ops->family == AF_INET) {
-		ttl = ip_hdr(skb)->ttl;
+		if (net->mpls.ip_ttl_propagate)
+			ttl = ip_hdr(skb)->ttl;
+		else
+			ttl = net->mpls.default_ttl;
 		rt = (struct rtable *)dst;
 	} else if (dst->ops->family == AF_INET6) {
-		ttl = ipv6_hdr(skb)->hop_limit;
+		if (net->mpls.ip_ttl_propagate)
+			ttl = ipv6_hdr(skb)->hop_limit;
+		else
+			ttl = net->mpls.default_ttl;
 		rt6 = (struct rt6_info *)dst;
 	} else {
 		goto drop;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-30 20:36 [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured Robert Shearman
@ 2017-01-31  0:17 ` Eric W. Biederman
  2017-01-31 11:59   ` Robert Shearman
  2017-01-31  0:41 ` David Ahern
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: Eric W. Biederman @ 2017-01-31  0:17 UTC (permalink / raw)
  To: Robert Shearman; +Cc: davem, netdev, roopa

Robert Shearman <rshearma@brocade.com> writes:

> It is sometimes desirable to present an MPLS transport network as a
> single hop to traffic transiting it because it prevents confusion when
> diagnosing failures. An example of where confusion can be generated is
> when addresses used in the provider network overlap with addresses in
> the overlay network and the addresses get exposed through ICMP errors
> generated as packets transit the provider network.
>
> Therefore, provide the ability to control whether the TTL value from
> an MPLS packet is propagated to an IPv4/IPv6 packet when the last
> label is popped through the addition of a new per-namespace sysctl:
> "net.mpls.ip_ttl_propagate" which defaults to enabled.
>
> Use the same sysctl to control whether the TTL is propagated from IP
> packets into the MPLS header. If the TTL isn't propagated then a
> default TTL value is used which can be configured via a new sysctl:
> "net.mpls.default_ttl".

Instead of having a global sysctl can we please have a different way
to configure the ingress/egress?

My general memory is that this makes sense for a slightly different
tunnel type.   Making it a per mpls tunnel property instead of global
property feels like it should be much more maintainable.

Similarly with the related behavior of what to do if the mpls ttl is
exhausted during the trip through the tunnel.  Drop or dig through the
packet and send an ICMP error message at the ip layer.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-30 20:36 [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured Robert Shearman
  2017-01-31  0:17 ` Eric W. Biederman
@ 2017-01-31  0:41 ` David Ahern
  2017-01-31 12:00   ` Robert Shearman
  2017-01-31  1:09 ` David Ahern
  2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
  3 siblings, 1 reply; 25+ messages in thread
From: David Ahern @ 2017-01-31  0:41 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, roopa, ebiederm

On 1/30/17 1:36 PM, Robert Shearman wrote:
> It is sometimes desirable to present an MPLS transport network as a
> single hop to traffic transiting it because it prevents confusion when
> diagnosing failures. An example of where confusion can be generated is
> when addresses used in the provider network overlap with addresses in
> the overlay network and the addresses get exposed through ICMP errors
> generated as packets transit the provider network.
> 
> Therefore, provide the ability to control whether the TTL value from
> an MPLS packet is propagated to an IPv4/IPv6 packet when the last
> label is popped through the addition of a new per-namespace sysctl:
> "net.mpls.ip_ttl_propagate" which defaults to enabled.
> 
> Use the same sysctl to control whether the TTL is propagated from IP
> packets into the MPLS header. If the TTL isn't propagated then a
> default TTL value is used which can be configured via a new sysctl:
> "net.mpls.default_ttl".
> 
> Signed-off-by: Robert Shearman <rshearma@brocade.com>
> ---
>  Documentation/networking/mpls-sysctl.txt | 19 +++++++++
>  include/net/netns/mpls.h                 |  3 ++
>  net/mpls/af_mpls.c                       | 70 ++++++++++++++++++++++++--------
>  net/mpls/mpls_iptunnel.c                 | 12 +++++-
>  4 files changed, 85 insertions(+), 19 deletions(-)
> 
> diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
> index 15d8d16934fd..b8f0725ff09e 100644
> --- a/Documentation/networking/mpls-sysctl.txt
> +++ b/Documentation/networking/mpls-sysctl.txt
> @@ -19,6 +19,25 @@ platform_labels - INTEGER
>  	Possible values: 0 - 1048575
>  	Default: 0
>  
> +ip_ttl_propagate - BOOL
> +	Control whether TTL is propagated from the IPv4/IPv6 header to
> +	the MPLS header on imposing labels and propagated from the
> +	MPLS header to the IPv4/IPv6 header on popping the last label.
> +
> +	If disabled, the MPLS transport network will appear as a
> +	single hop to transit traffic.
> +
> +	0 - disabled
> +	1 - enabled (default)
> +

It seems like you are going after RFC 3443 with this change. Can you add comment to that effect? i.e.,  ip_ttl_propagate enabled is the Uniform Model and ip_ttl_propagate disabled is the Short Pipe Model.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-30 20:36 [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured Robert Shearman
  2017-01-31  0:17 ` Eric W. Biederman
  2017-01-31  0:41 ` David Ahern
@ 2017-01-31  1:09 ` David Ahern
  2017-01-31 12:01   ` Robert Shearman
  2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
  3 siblings, 1 reply; 25+ messages in thread
From: David Ahern @ 2017-01-31  1:09 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, roopa, ebiederm

On 1/30/17 1:36 PM, Robert Shearman wrote:
> @@ -243,24 +245,29 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
>  		payload_type = ip_hdr(skb)->version;
>  
>  	switch (payload_type) {
> -	case MPT_IPV4: {
> -		struct iphdr *hdr4 = ip_hdr(skb);
> -		skb->protocol = htons(ETH_P_IP);
> -		csum_replace2(&hdr4->check,
> -			      htons(hdr4->ttl << 8),
> -			      htons(dec.ttl << 8));
> -		hdr4->ttl = dec.ttl;
> +	case MPT_IPV4:
> +		if (net->mpls.ip_ttl_propagate) {
> +			struct iphdr *hdr4 = ip_hdr(skb);
> +
> +			skb->protocol = htons(ETH_P_IP);

The protocol setting here and ...

> +			csum_replace2(&hdr4->check,
> +				      htons(hdr4->ttl << 8),
> +				      htons(dec.ttl << 8));
> +			hdr4->ttl = dec.ttl;
> +		}
>  		success = true;
>  		break;
> -	}
> -	case MPT_IPV6: {
> -		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
> -		skb->protocol = htons(ETH_P_IPV6);
> -		hdr6->hop_limit = dec.ttl;
> +	case MPT_IPV6:
> +		if (net->mpls.ip_ttl_propagate) {
> +			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
> +
> +			skb->protocol = htons(ETH_P_IPV6);

here need to be done outside of net->mpls.ip_ttl_propagate otherwise ...

> +			hdr6->hop_limit = dec.ttl;
> +		}
>  		success = true;
>  		break;
> -	}
>  	case MPT_UNSPEC:
> +		/* Should have decided which protocol it is by now */
>  		break;
>  	}
>  

disabling ip_ttl_propagate causes a corrupted packet to show up at the end host (after the LSP):

IP4:
16:54:08.895372 46:a9:1c:9f:30:ba > fa:61:57:d6:1a:7d, ethertype MPLS unicast (0x8847), length 98: MPLS (label 282624, exp 0, ttl 84)
	(label 433380, exp 0, ttl 0)
	(label 262160, exp 7, [S], ttl 182)
	0x0000:  ac10 0101 0a0a 0a0a 0800 1677 05d3 0001  ...........w....
	0x0010:  30e0 8f58 0000 0000 4fa9 0d00 0000 0000  0..X....O.......
	0x0020:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f  ................
	0x0030:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f  .!"#$%&'()*+,-./
	0x0040:  3031 3233 3435 3637                      01234567


IPv6:
16:57:40.517520 46:a9:1c:9f:30:ba > fa:61:57:d6:1a:7d, ethertype MPLS unicast (0x8847), length 118: MPLS (label 393290, exp 5, ttl 240)
	(label 1027, exp 5, ttl 64)
	(label 131072, exp 0, ttl 1)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 0)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 0)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 1)
	(label 196608, exp 0, ttl 1)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 0)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 0)
	(label 0 (IPv4 explicit NULL), exp 0, ttl 1)
	(label 524297, exp 1, [S], ttl 102)
	0x0000:  0628 0001 04e1 8f58 0000 0000 3be5 0700  .(.....X....;...
	0x0010:  0000 0000 1011 1213 1415 1617 1819 1a1b  ................
	0x0020:  1c1d 1e1f 2021 2223 2425 2627 2829 2a2b  .....!"#$%&'()*+
	0x0030:  2c2d 2e2f 3031 3233 3435 3637            ,-./01234567

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-31  0:17 ` Eric W. Biederman
@ 2017-01-31 11:59   ` Robert Shearman
  2017-02-03  3:21     ` Eric W. Biederman
  0 siblings, 1 reply; 25+ messages in thread
From: Robert Shearman @ 2017-01-31 11:59 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: davem, netdev, roopa, David Ahern

On 31/01/17 00:17, Eric W. Biederman wrote:
> Robert Shearman <rshearma@brocade.com> writes:
>
>> It is sometimes desirable to present an MPLS transport network as a
>> single hop to traffic transiting it because it prevents confusion when
>> diagnosing failures. An example of where confusion can be generated is
>> when addresses used in the provider network overlap with addresses in
>> the overlay network and the addresses get exposed through ICMP errors
>> generated as packets transit the provider network.
>>
>> Therefore, provide the ability to control whether the TTL value from
>> an MPLS packet is propagated to an IPv4/IPv6 packet when the last
>> label is popped through the addition of a new per-namespace sysctl:
>> "net.mpls.ip_ttl_propagate" which defaults to enabled.
>>
>> Use the same sysctl to control whether the TTL is propagated from IP
>> packets into the MPLS header. If the TTL isn't propagated then a
>> default TTL value is used which can be configured via a new sysctl:
>> "net.mpls.default_ttl".
>
> Instead of having a global sysctl can we please have a different way
> to configure the ingress/egress?
>
> My general memory is that this makes sense for a slightly different
> tunnel type.   Making it a per mpls tunnel property instead of global
> property feels like it should be much more maintainable.

RFC 3443 that David Ahern referenced does indeed infer that this should 
be a per-LSP property. However, it says:

>    We also note here that signaling the LSP type (Pipe, Short Pipe or
>    Uniform Model) is out of the scope of this document, and that is also
>    not addressed in the current versions of the label distribution
>    protocols, e.g. LDP [MPLS-LDP] and RSVP-TE [MPLS-RSVP].  Currently,
>    the LSP type is configured by the network operator manually by means
>    of either a command line or network management interface.

AIUI, the situation of label distribution protocols not signaling this 
property hasn't changed from when this RFC has written, which limits the 
usefulness of a per-LSP property, and perhaps also indicates a lack of 
desire from users of this.

Do you still feel it's worth implementing on a per-LSP basis? If so, any 
opinion on how it should be done for the pop case? Either a new per-path 
RTA attribute can be added, e.g. RTA_TTL_PROPAGATE, or a new rtnh flag 
could be added, e.g. RTNH_F_TTL_PROPAGATE.

> Similarly with the related behavior of what to do if the mpls ttl is
> exhausted during the trip through the tunnel.  Drop or dig through the
> packet and send an ICMP error message at the ip layer.

That's an interesting suggestion, but I don't think it will be useful 
when carrying another LSP over the LSP in question, since the LSR will 
have no idea what the label is being used for (i.e. the payload). If 
there is only one label in the packet then the router should know what 
the payload is of the label and since this is implicitly IPv4 or IPv6 at 
the moment (since those are the only types of traffic for which the 
labels can be used) then surely the ICMP should always be generated in 
that case?

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-31  0:41 ` David Ahern
@ 2017-01-31 12:00   ` Robert Shearman
  0 siblings, 0 replies; 25+ messages in thread
From: Robert Shearman @ 2017-01-31 12:00 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev, roopa, ebiederm

On 31/01/17 00:41, David Ahern wrote:
> On 1/30/17 1:36 PM, Robert Shearman wrote:
>> It is sometimes desirable to present an MPLS transport network as a
>> single hop to traffic transiting it because it prevents confusion when
>> diagnosing failures. An example of where confusion can be generated is
>> when addresses used in the provider network overlap with addresses in
>> the overlay network and the addresses get exposed through ICMP errors
>> generated as packets transit the provider network.
>>
>> Therefore, provide the ability to control whether the TTL value from
>> an MPLS packet is propagated to an IPv4/IPv6 packet when the last
>> label is popped through the addition of a new per-namespace sysctl:
>> "net.mpls.ip_ttl_propagate" which defaults to enabled.
>>
>> Use the same sysctl to control whether the TTL is propagated from IP
>> packets into the MPLS header. If the TTL isn't propagated then a
>> default TTL value is used which can be configured via a new sysctl:
>> "net.mpls.default_ttl".
>>
>> Signed-off-by: Robert Shearman <rshearma@brocade.com>
>> ---
>>  Documentation/networking/mpls-sysctl.txt | 19 +++++++++
>>  include/net/netns/mpls.h                 |  3 ++
>>  net/mpls/af_mpls.c                       | 70 ++++++++++++++++++++++++--------
>>  net/mpls/mpls_iptunnel.c                 | 12 +++++-
>>  4 files changed, 85 insertions(+), 19 deletions(-)
>>
>> diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
>> index 15d8d16934fd..b8f0725ff09e 100644
>> --- a/Documentation/networking/mpls-sysctl.txt
>> +++ b/Documentation/networking/mpls-sysctl.txt
>> @@ -19,6 +19,25 @@ platform_labels - INTEGER
>>  	Possible values: 0 - 1048575
>>  	Default: 0
>>
>> +ip_ttl_propagate - BOOL
>> +	Control whether TTL is propagated from the IPv4/IPv6 header to
>> +	the MPLS header on imposing labels and propagated from the
>> +	MPLS header to the IPv4/IPv6 header on popping the last label.
>> +
>> +	If disabled, the MPLS transport network will appear as a
>> +	single hop to transit traffic.
>> +
>> +	0 - disabled
>> +	1 - enabled (default)
>> +
>
> It seems like you are going after RFC 3443 with this change. Can you add comment to that effect? i.e.,  ip_ttl_propagate enabled is the Uniform Model and ip_ttl_propagate disabled is the Short Pipe Model.
>

Good idea, will add it in the appropriate place depending on the chosen API.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-31  1:09 ` David Ahern
@ 2017-01-31 12:01   ` Robert Shearman
  0 siblings, 0 replies; 25+ messages in thread
From: Robert Shearman @ 2017-01-31 12:01 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev, roopa, ebiederm

On 31/01/17 01:09, David Ahern wrote:
> On 1/30/17 1:36 PM, Robert Shearman wrote:
>> @@ -243,24 +245,29 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
>>  		payload_type = ip_hdr(skb)->version;
>>
>>  	switch (payload_type) {
>> -	case MPT_IPV4: {
>> -		struct iphdr *hdr4 = ip_hdr(skb);
>> -		skb->protocol = htons(ETH_P_IP);
>> -		csum_replace2(&hdr4->check,
>> -			      htons(hdr4->ttl << 8),
>> -			      htons(dec.ttl << 8));
>> -		hdr4->ttl = dec.ttl;
>> +	case MPT_IPV4:
>> +		if (net->mpls.ip_ttl_propagate) {
>> +			struct iphdr *hdr4 = ip_hdr(skb);
>> +
>> +			skb->protocol = htons(ETH_P_IP);
>
> The protocol setting here and ...
>
>> +			csum_replace2(&hdr4->check,
>> +				      htons(hdr4->ttl << 8),
>> +				      htons(dec.ttl << 8));
>> +			hdr4->ttl = dec.ttl;
>> +		}
>>  		success = true;
>>  		break;
>> -	}
>> -	case MPT_IPV6: {
>> -		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
>> -		skb->protocol = htons(ETH_P_IPV6);
>> -		hdr6->hop_limit = dec.ttl;
>> +	case MPT_IPV6:
>> +		if (net->mpls.ip_ttl_propagate) {
>> +			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
>> +
>> +			skb->protocol = htons(ETH_P_IPV6);
>
> here need to be done outside of net->mpls.ip_ttl_propagate otherwise ...
>
>> +			hdr6->hop_limit = dec.ttl;
>> +		}
>>  		success = true;
>>  		break;
>> -	}
>>  	case MPT_UNSPEC:
>> +		/* Should have decided which protocol it is by now */
>>  		break;
>>  	}
>>
>
> disabling ip_ttl_propagate causes a corrupted packet to show up at the end host (after the LSP):

Oops, good catch. Will fix in v2.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-31 11:59   ` Robert Shearman
@ 2017-02-03  3:21     ` Eric W. Biederman
  2017-02-03  4:02       ` David Ahern
  0 siblings, 1 reply; 25+ messages in thread
From: Eric W. Biederman @ 2017-02-03  3:21 UTC (permalink / raw)
  To: Robert Shearman; +Cc: davem, netdev, roopa, David Ahern

Robert Shearman <rshearma@brocade.com> writes:

> On 31/01/17 00:17, Eric W. Biederman wrote:
>> Robert Shearman <rshearma@brocade.com> writes:
>>
>>> It is sometimes desirable to present an MPLS transport network as a
>>> single hop to traffic transiting it because it prevents confusion when
>>> diagnosing failures. An example of where confusion can be generated is
>>> when addresses used in the provider network overlap with addresses in
>>> the overlay network and the addresses get exposed through ICMP errors
>>> generated as packets transit the provider network.
>>>
>>> Therefore, provide the ability to control whether the TTL value from
>>> an MPLS packet is propagated to an IPv4/IPv6 packet when the last
>>> label is popped through the addition of a new per-namespace sysctl:
>>> "net.mpls.ip_ttl_propagate" which defaults to enabled.
>>>
>>> Use the same sysctl to control whether the TTL is propagated from IP
>>> packets into the MPLS header. If the TTL isn't propagated then a
>>> default TTL value is used which can be configured via a new sysctl:
>>> "net.mpls.default_ttl".
>>
>> Instead of having a global sysctl can we please have a different way
>> to configure the ingress/egress?
>>
>> My general memory is that this makes sense for a slightly different
>> tunnel type.   Making it a per mpls tunnel property instead of global
>> property feels like it should be much more maintainable.
>
> RFC 3443 that David Ahern referenced does indeed infer that this
> should be a per-LSP property. However, it says:
>
>>    We also note here that signaling the LSP type (Pipe, Short Pipe or
>>    Uniform Model) is out of the scope of this document, and that is also
>>    not addressed in the current versions of the label distribution
>>    protocols, e.g. LDP [MPLS-LDP] and RSVP-TE [MPLS-RSVP].  Currently,
>>    the LSP type is configured by the network operator manually by means
>>    of either a command line or network management interface.
>
> AIUI, the situation of label distribution protocols not signaling this
> property hasn't changed from when this RFC has written, which limits
> the usefulness of a per-LSP property, and perhaps also indicates a
> lack of desire from users of this.
>
> Do you still feel it's worth implementing on a per-LSP basis? If so,
> any opinion on how it should be done for the pop case? Either a new
> per-path RTA attribute can be added, e.g. RTA_TTL_PROPAGATE, or a new
> rtnh flag could be added, e.g. RTNH_F_TTL_PROPAGATE.

My brain is mostly elswhere right now so I don't have an implementation
on how it should be implemented.   However Linux fundamentally gets used
interesting ways, and if we don't implement the option as per mpls exit
now someone will come along and need to do the work later.

Perhaps it will only be used with hard coded static configurations, and
it is fundamentally a per tunnel property.

It will be less work to maintain, and the code will run faster in the
long run if we don't have two code paths to maintain.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured
  2017-02-03  3:21     ` Eric W. Biederman
@ 2017-02-03  4:02       ` David Ahern
  0 siblings, 0 replies; 25+ messages in thread
From: David Ahern @ 2017-02-03  4:02 UTC (permalink / raw)
  To: Eric W. Biederman, Robert Shearman, roopa; +Cc: davem, netdev

On 2/2/17 8:21 PM, Eric W. Biederman wrote:
> 
> My brain is mostly elswhere right now so I don't have an implementation
> on how it should be implemented.   However Linux fundamentally gets used
> interesting ways, and if we don't implement the option as per mpls exit
> now someone will come along and need to do the work later.
> 
> Perhaps it will only be used with hard coded static configurations, and
> it is fundamentally a per tunnel property.
> 
> It will be less work to maintain, and the code will run faster in the
> long run if we don't have two code paths to maintain.

I can see the argument for per-tunnel knobs, but looking at ios and nx-os docs it seems appropriate to have a global knob as well. In that regards having sysctl knobs solves the global setting and when/if needed we can add MPLS_IPTUNNEL_ZZZZZ encap attributes for the per-tunnel settings.

Does that seem reasonable?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net-next v2 0/2] mpls: allow TTL propagation to/from IP packets to be configured
  2017-01-30 20:36 [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured Robert Shearman
                   ` (2 preceding siblings ...)
  2017-01-31  1:09 ` David Ahern
@ 2017-03-08  0:46 ` Robert Shearman
  2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
                     ` (2 more replies)
  3 siblings, 3 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-08  0:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

It is sometimes desirable to present an MPLS transport network as a
single hop to traffic transiting it because it prevents confusion when
diagnosing failures. An example of where confusion can be generated is
when addresses used in the provider network overlap with addresses in
the overlay network and the addresses get exposed through ICMP errors
generated as packets transit the provider network.

In addition, RFC 3443 defines two methods of deriving TTL for an
outgoing packet: Uniform Model where the TTL is propagated to/from the
MPLS header and both Pipe Models and Short Pipe Models (with and
without PHP) where the TTL is not propagated to/from the MPLS header.

Changes in v2:
 - add references to RFC 3443 as suggested by David Ahern
 - fix setting of skb->protocol as noticed by David Ahern
 - implement per-route/per-LWT configurability as suggested by Eric
   Biederman
 - split into two patches for ease of review

Robert Shearman (2):
  mpls: allow TTL propagation to IP packets to be configured
  mpls: allow TTL propagation from IP packets to be configured

 Documentation/networking/mpls-sysctl.txt | 19 ++++++
 include/net/mpls_iptunnel.h              |  2 +
 include/net/netns/mpls.h                 |  3 +
 include/uapi/linux/mpls_iptunnel.h       |  2 +
 include/uapi/linux/rtnetlink.h           |  1 +
 net/mpls/af_mpls.c                       | 99 ++++++++++++++++++++++++++------
 net/mpls/internal.h                      |  7 +++
 net/mpls/mpls_iptunnel.c                 | 64 ++++++++++++++++-----
 8 files changed, 168 insertions(+), 29 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net-next v2 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
@ 2017-03-08  0:46   ` Robert Shearman
  2017-03-10  2:00     ` David Ahern
  2017-03-10  2:40     ` David Ahern
  2017-03-08  0:46   ` [PATCH net-next v2 2/2] mpls: allow TTL propagation from " Robert Shearman
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
  2 siblings, 2 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-08  0:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

Provide the ability to control on a per-route basis whether the TTL
value from an MPLS packet is propagated to an IPv4/IPv6 packet when
the last label is popped as per the theoretical model in RFC 3443
through a new route attribute, RTA_TTL_PROPAGATE which can be 0 to
mean disable propagation and 1 to mean enable propagation.

In order to provide the ability to change the behaviour for packets
arriving with IPv4/IPv6 Explicit Null labels and to provide an easy
way for a user to change the behaviour for all existing routes without
having to reprogram them, a global knob is provided. This is done
through the addition of a new per-namespace sysctl,
"net.mpls.ip_ttl_propagate", which defaults to enabled. If the
per-route attribute is set (either enabled or disabled) then it
overrides the global configuration.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 Documentation/networking/mpls-sysctl.txt | 11 ++++
 include/net/netns/mpls.h                 |  2 +
 include/uapi/linux/rtnetlink.h           |  1 +
 net/mpls/af_mpls.c                       | 88 ++++++++++++++++++++++++++------
 net/mpls/internal.h                      |  7 +++
 5 files changed, 93 insertions(+), 16 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 15d8d16934fd..9badd1d6685f 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -19,6 +19,17 @@ platform_labels - INTEGER
 	Possible values: 0 - 1048575
 	Default: 0
 
+ip_ttl_propagate - BOOL
+	Control whether TTL is propagated from the IPv4/IPv6 header to
+	the MPLS header on imposing labels and propagated from the
+	MPLS header to the IPv4/IPv6 header on popping the last label.
+
+	If disabled, the MPLS transport network will appear as a
+	single hop to transit traffic.
+
+	0 - disabled / RFC 3443 [Short] Pipe Model
+	1 - enabled / RFC 3443 Uniform Model (default)
+
 conf/<interface>/input - BOOL
 	Control whether packets can be input on this interface.
 
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index d29203651c01..58e0e46c4a5c 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -10,7 +10,9 @@ struct ctl_table_header;
 
 struct netns_mpls {
 	size_t platform_labels;
+	int ip_ttl_propagate;
 	struct mpls_route __rcu * __rcu *platform_label;
+
 	struct ctl_table_header *ctl;
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 6546917d605a..30fb25e851db 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -319,6 +319,7 @@ enum rtattr_type_t {
 	RTA_EXPIRES,
 	RTA_PAD,
 	RTA_UID,
+	RTA_TTL_PROPAGATE,
 	__RTA_MAX
 };
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b2..d4a51da8a0ce 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -32,6 +32,7 @@
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
 static int zero = 0;
+static int one = 1;
 static int label_limit = (1 << 20) - 1;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
@@ -220,8 +221,8 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
 	return &rt->rt_nh[nh_index];
 }
 
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
-			struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+			struct sk_buff *skb, struct mpls_entry_decoded dec)
 {
 	enum mpls_payload_type payload_type;
 	bool success = false;
@@ -244,24 +245,33 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
 		payload_type = ip_hdr(skb)->version;
 
 	switch (payload_type) {
-	case MPT_IPV4: {
-		struct iphdr *hdr4 = ip_hdr(skb);
+	case MPT_IPV4:
+		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+		     net->mpls.ip_ttl_propagate)) {
+			struct iphdr *hdr4 = ip_hdr(skb);
+
+			csum_replace2(&hdr4->check,
+				      htons(hdr4->ttl << 8),
+				      htons(dec.ttl << 8));
+			hdr4->ttl = dec.ttl;
+		}
 		skb->protocol = htons(ETH_P_IP);
-		csum_replace2(&hdr4->check,
-			      htons(hdr4->ttl << 8),
-			      htons(dec.ttl << 8));
-		hdr4->ttl = dec.ttl;
 		success = true;
 		break;
-	}
-	case MPT_IPV6: {
-		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+	case MPT_IPV6:
+		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+		     net->mpls.ip_ttl_propagate)) {
+			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+
+			hdr6->hop_limit = dec.ttl;
+		}
 		skb->protocol = htons(ETH_P_IPV6);
-		hdr6->hop_limit = dec.ttl;
 		success = true;
 		break;
-	}
 	case MPT_UNSPEC:
+		/* Should have decided which protocol it is by now */
 		break;
 	}
 
@@ -361,7 +371,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 
 	if (unlikely(!new_header_size && dec.bos)) {
 		/* Penultimate hop popping */
-		if (!mpls_egress(rt, skb, dec))
+		if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
 			goto err;
 	} else {
 		bool bos;
@@ -412,6 +422,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_TTL_PROPAGATE]	= { .type = NLA_U8 },
 };
 
 struct mpls_route_config {
@@ -421,6 +432,7 @@ struct mpls_route_config {
 	u8			rc_via_alen;
 	u8			rc_via[MAX_VIA_ALEN];
 	u32			rc_label;
+	u8			rc_ttl_propagate;
 	u8			rc_output_labels;
 	u32			rc_output_label[MAX_NEW_LABELS];
 	u32			rc_nlflags;
@@ -856,6 +868,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 
 	rt->rt_protocol = cfg->rc_protocol;
 	rt->rt_payload_type = cfg->rc_payload_type;
+	rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
 
 	if (cfg->rc_mp)
 		err = mpls_nh_build_multi(cfg, rt);
@@ -1576,6 +1589,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	cfg->rc_label		= LABEL_NOT_SPECIFIED;
 	cfg->rc_protocol	= rtm->rtm_protocol;
 	cfg->rc_via_table	= MPLS_NEIGH_TABLE_UNSPEC;
+	cfg->rc_ttl_propagate	= MPLS_TTL_PROP_DEFAULT;
 	cfg->rc_nlflags		= nlh->nlmsg_flags;
 	cfg->rc_nlinfo.portid	= NETLINK_CB(skb).portid;
 	cfg->rc_nlinfo.nlh	= nlh;
@@ -1622,6 +1636,17 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			cfg->rc_mp_len = nla_len(nla);
 			break;
 		}
+		case RTA_TTL_PROPAGATE:
+		{
+			u8 ttl_propagate = nla_get_u8(nla);
+
+			if (ttl_propagate > 1)
+				goto errout;
+			cfg->rc_ttl_propagate = ttl_propagate ?
+				MPLS_TTL_PROP_ENABLED :
+				MPLS_TTL_PROP_DISABLED;
+			break;
+		}
 		default:
 			/* Unsupported attribute */
 			goto errout;
@@ -1682,6 +1707,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
 
 	if (nla_put_labels(skb, RTA_DST, 1, &label))
 		goto nla_put_failure;
+
+	if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+		bool ttl_propagate =
+			rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+		if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+			       ttl_propagate))
+			goto nla_put_failure;
+	}
 	if (rt->rt_nhn == 1) {
 		const struct mpls_nh *nh = rt->rt_nh;
 
@@ -1792,7 +1826,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
 {
 	size_t payload =
 		NLMSG_ALIGN(sizeof(struct rtmsg))
-		+ nla_total_size(4);			/* RTA_DST */
+		+ nla_total_size(4)			/* RTA_DST */
+		+ nla_total_size(1);			/* RTA_TTL_PROPAGATE */
 
 	if (rt->rt_nhn == 1) {
 		struct mpls_nh *nh = rt->rt_nh;
@@ -1876,6 +1911,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
 		rt0->rt_protocol = RTPROT_KERNEL;
 		rt0->rt_payload_type = MPT_IPV4;
+		rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
 		rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
 		rt0->rt_nh->nh_via_alen = lo->addr_len;
 		memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1889,6 +1925,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
 		rt2->rt_protocol = RTPROT_KERNEL;
 		rt2->rt_payload_type = MPT_IPV6;
+		rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
 		rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
 		rt2->rt_nh->nh_via_alen = lo->addr_len;
 		memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1970,6 +2007,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
 	return ret;
 }
 
+#define MPLS_NS_SYSCTL_OFFSET(field)		\
+	(&((struct net *)0)->field)
+
 static const struct ctl_table mpls_table[] = {
 	{
 		.procname	= "platform_labels",
@@ -1978,21 +2018,37 @@ static const struct ctl_table mpls_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mpls_platform_labels,
 	},
+	{
+		.procname	= "ip_ttl_propagate",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
 static int mpls_net_init(struct net *net)
 {
 	struct ctl_table *table;
+	int i;
 
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
+	net->mpls.ip_ttl_propagate = 1;
 
 	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
 	if (table == NULL)
 		return -ENOMEM;
 
-	table[0].data = net;
+	/* Table data contains only offsets relative to the base of
+	 * the mdev at this point, so make them absolute.
+	 */
+	for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+		table[i].data = (char *)net + (uintptr_t)table[i].data;
+
 	net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
 	if (net->mpls.ctl == NULL) {
 		kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..62928d8fabd1 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */
 	u8			nh_via_table;
 };
 
+enum mpls_ttl_propagation {
+	MPLS_TTL_PROP_DEFAULT,
+	MPLS_TTL_PROP_ENABLED,
+	MPLS_TTL_PROP_DISABLED,
+};
+
 /* The route, nexthops and vias are stored together in the same memory
  * block:
  *
@@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */
 	u8			rt_protocol;
 	u8			rt_payload_type;
 	u8			rt_max_alen;
+	u8			rt_ttl_propagate;
 	unsigned int		rt_nhn;
 	unsigned int		rt_nhn_alive;
 	struct mpls_nh		rt_nh[0];
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH net-next v2 2/2] mpls: allow TTL propagation from IP packets to be configured
  2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
  2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
@ 2017-03-08  0:46   ` Robert Shearman
  2017-03-10  2:54     ` David Ahern
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
  2 siblings, 1 reply; 25+ messages in thread
From: Robert Shearman @ 2017-03-08  0:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

Allow TTL propagation from IP packets to MPLS packets to be
configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which
allows the TTL to be set in the resulting MPLS packet, with the value
of 0 having the semantics of enabling propagation of the TTL from the
IP header (i.e. non-zero values disable propagation).

Also allow the configuration to be overridden globally by reusing the
same sysctl to control whether the TTL is propagated from IP packets
into the MPLS header. If the per-LWT attribute is set then it
overrides the global configuration. If the TTL isn't propagated then a
default TTL value is used which can be configured via a new sysctl,
"net.mpls.default_ttl". This is kept separate from the configuration
of whether IP TTL propagation is enabled as it can be used in the
future when non-IP payloads are supported (i.e. where there is no
payload TTL that can be propagated).

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 Documentation/networking/mpls-sysctl.txt |  8 ++++
 include/net/mpls_iptunnel.h              |  2 +
 include/net/netns/mpls.h                 |  1 +
 include/uapi/linux/mpls_iptunnel.h       |  2 +
 net/mpls/af_mpls.c                       | 11 ++++++
 net/mpls/mpls_iptunnel.c                 | 64 +++++++++++++++++++++++++-------
 6 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 9badd1d6685f..2f24a1912a48 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -30,6 +30,14 @@ ip_ttl_propagate - BOOL
 	0 - disabled / RFC 3443 [Short] Pipe Model
 	1 - enabled / RFC 3443 Uniform Model (default)
 
+default_ttl - BOOL
+	Default TTL value to use for MPLS packets where it cannot be
+	propagated from an IP header, either because one isn't present
+	or ip_ttl_propagate has been disabled.
+
+	Possible values: 1 - 255
+	Default: 255
+
 conf/<interface>/input - BOOL
 	Control whether packets can be input on this interface.
 
diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h
index 179253f9dcfd..a18af6a16eb5 100644
--- a/include/net/mpls_iptunnel.h
+++ b/include/net/mpls_iptunnel.h
@@ -19,6 +19,8 @@
 struct mpls_iptunnel_encap {
 	u32	label[MAX_NEW_LABELS];
 	u8	labels;
+	u8	ttl_propagate;
+	u8	default_ttl;
 };
 
 static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 58e0e46c4a5c..1b68aed6e1b9 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -11,6 +11,7 @@ struct ctl_table_header;
 struct netns_mpls {
 	size_t platform_labels;
 	int ip_ttl_propagate;
+	int default_ttl;
 	struct mpls_route __rcu * __rcu *platform_label;
 
 	struct ctl_table_header *ctl;
diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h
index d80a0498f77e..f5e45095b0bb 100644
--- a/include/uapi/linux/mpls_iptunnel.h
+++ b/include/uapi/linux/mpls_iptunnel.h
@@ -16,11 +16,13 @@
 /* MPLS tunnel attributes
  * [RTA_ENCAP] = {
  *     [MPLS_IPTUNNEL_DST]
+ *     [MPLS_IPTUNNEL_TTL]
  * }
  */
 enum {
 	MPLS_IPTUNNEL_UNSPEC,
 	MPLS_IPTUNNEL_DST,
+	MPLS_IPTUNNEL_TTL,
 	__MPLS_IPTUNNEL_MAX,
 };
 #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d4a51da8a0ce..a8710d334a60 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -34,6 +34,7 @@
 static int zero = 0;
 static int one = 1;
 static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
 		       struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -2027,6 +2028,15 @@ static const struct ctl_table mpls_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "default_ttl",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &ttl_max,
+	},
 	{ }
 };
 
@@ -2038,6 +2048,7 @@ static int mpls_net_init(struct net *net)
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
 	net->mpls.ip_ttl_propagate = 1;
+	net->mpls.default_ttl = 255;
 
 	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
 	if (table == NULL)
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..da2fb02e0f27 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
 
 static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
 	[MPLS_IPTUNNEL_DST]	= { .type = NLA_U32 },
+	[MPLS_IPTUNNEL_TTL]	= { .type = NLA_U8 },
 };
 
 static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
 	struct rtable *rt = NULL;
 	struct rt6_info *rt6 = NULL;
 	struct mpls_dev *out_mdev;
+	struct net *net;
 	int err = 0;
 	bool bos;
 	int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
 
 	/* Find the output device */
 	out_dev = dst->dev;
-
-	/* Obtain the ttl */
-	if (dst->ops->family == AF_INET) {
-		ttl = ip_hdr(skb)->ttl;
-		rt = (struct rtable *)dst;
-	} else if (dst->ops->family == AF_INET6) {
-		ttl = ipv6_hdr(skb)->hop_limit;
-		rt6 = (struct rt6_info *)dst;
-	} else {
-		goto drop;
-	}
+	net = dev_net(out_dev);
 
 	skb_orphan(skb);
 
@@ -78,6 +70,29 @@ static int mpls_xmit(struct sk_buff *skb)
 
 	tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
 
+	/* Obtain the ttl */
+	if (dst->ops->family == AF_INET) {
+		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+			ttl = tun_encap_info->default_ttl;
+		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+			 !net->mpls.ip_ttl_propagate)
+			ttl = net->mpls.default_ttl;
+		else
+			ttl = ip_hdr(skb)->ttl;
+		rt = (struct rtable *)dst;
+	} else if (dst->ops->family == AF_INET6) {
+		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+			ttl = tun_encap_info->default_ttl;
+		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+			 !net->mpls.ip_ttl_propagate)
+			ttl = net->mpls.default_ttl;
+		else
+			ttl = ipv6_hdr(skb)->hop_limit;
+		rt6 = (struct rt6_info *)dst;
+	} else {
+		goto drop;
+	}
+
 	/* Verify the destination can hold the packet */
 	new_header_size = mpls_encap_size(tun_encap_info);
 	mtu = mpls_dev_mtu(out_dev);
@@ -160,6 +175,17 @@ static int mpls_build_state(struct nlattr *nla,
 			     &tun_encap_info->labels, tun_encap_info->label);
 	if (ret)
 		goto errout;
+
+	tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+	if (tb[MPLS_IPTUNNEL_TTL]) {
+		tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+		/* TTL 0 implies propagate from IP header */
+		tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+			MPLS_TTL_PROP_DISABLED :
+			MPLS_TTL_PROP_ENABLED;
+	}
+
 	newts->type = LWTUNNEL_ENCAP_MPLS;
 	newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
 	newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +212,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
 			   tun_encap_info->label))
 		goto nla_put_failure;
 
+	if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+	    nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -195,10 +225,16 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
 static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
+	int nlsize;
 
 	tun_encap_info = mpls_lwtunnel_encap(lwtstate);
 
-	return nla_total_size(tun_encap_info->labels * 4);
+	nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+	if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+		nlsize += nla_total_size(1);
+
+	return nlsize;
 }
 
 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,7 +243,9 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 	struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
 	int l;
 
-	if (a_hdr->labels != b_hdr->labels)
+	if (a_hdr->labels != b_hdr->labels ||
+	    a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+	    a_hdr->default_ttl != b_hdr->default_ttl)
 		return 1;
 
 	for (l = 0; l < MAX_NEW_LABELS; l++)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
@ 2017-03-10  2:00     ` David Ahern
  2017-03-10 10:12       ` Robert Shearman
  2017-03-10  2:40     ` David Ahern
  1 sibling, 1 reply; 25+ messages in thread
From: David Ahern @ 2017-03-10  2:00 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, Eric W. Biederman, roopa

On 3/7/17 5:46 PM, Robert Shearman wrote:
> @@ -244,24 +245,33 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
>  		payload_type = ip_hdr(skb)->version;
>  
>  	switch (payload_type) {
> -	case MPT_IPV4: {
> -		struct iphdr *hdr4 = ip_hdr(skb);
> +	case MPT_IPV4:
> +		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
> +		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
> +		     net->mpls.ip_ttl_propagate)) {
> +			struct iphdr *hdr4 = ip_hdr(skb);
> +
> +			csum_replace2(&hdr4->check,
> +				      htons(hdr4->ttl << 8),
> +				      htons(dec.ttl << 8));
> +			hdr4->ttl = dec.ttl;
> +		}
>  		skb->protocol = htons(ETH_P_IP);
> -		csum_replace2(&hdr4->check,
> -			      htons(hdr4->ttl << 8),
> -			      htons(dec.ttl << 8));
> -		hdr4->ttl = dec.ttl;
>  		success = true;
>  		break;
> -	}
> -	case MPT_IPV6: {
> -		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
> +	case MPT_IPV6:
> +		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
> +		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
> +		     net->mpls.ip_ttl_propagate)) {
> +			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
> +
> +			hdr6->hop_limit = dec.ttl;
> +		}
>  		skb->protocol = htons(ETH_P_IPV6);
> -		hdr6->hop_limit = dec.ttl;
>  		success = true;
>  		break;
> -	}

What decrements the TTL if it is not propagated from MPLS to IP?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
  2017-03-10  2:00     ` David Ahern
@ 2017-03-10  2:40     ` David Ahern
  2017-03-10 10:12       ` Robert Shearman
  1 sibling, 1 reply; 25+ messages in thread
From: David Ahern @ 2017-03-10  2:40 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, Eric W. Biederman, roopa

On 3/7/17 5:46 PM, Robert Shearman wrote:
> diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
> index d29203651c01..58e0e46c4a5c 100644
> --- a/include/net/netns/mpls.h
> +++ b/include/net/netns/mpls.h
> @@ -10,7 +10,9 @@ struct ctl_table_header;
>  
>  struct netns_mpls {
>  	size_t platform_labels;
> +	int ip_ttl_propagate;
>  	struct mpls_route __rcu * __rcu *platform_label;
> +
>  	struct ctl_table_header *ctl;
>  };
>  

I'd prefer the platform_labels stay with platform_label. ie., put the
new ip_ttl_propagate above platform_labels.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 2/2] mpls: allow TTL propagation from IP packets to be configured
  2017-03-08  0:46   ` [PATCH net-next v2 2/2] mpls: allow TTL propagation from " Robert Shearman
@ 2017-03-10  2:54     ` David Ahern
  2017-03-10 10:12       ` Robert Shearman
  0 siblings, 1 reply; 25+ messages in thread
From: David Ahern @ 2017-03-10  2:54 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, Eric W. Biederman, roopa

On 3/7/17 5:46 PM, Robert Shearman wrote:
> @@ -78,6 +70,29 @@ static int mpls_xmit(struct sk_buff *skb)
>  
>  	tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
>  
> +	/* Obtain the ttl */
> +	if (dst->ops->family == AF_INET) {
> +		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
> +			ttl = tun_encap_info->default_ttl;
> +		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
> +			 !net->mpls.ip_ttl_propagate)
> +			ttl = net->mpls.default_ttl;
> +		else
> +			ttl = ip_hdr(skb)->ttl;

After staring at that for a while, an explanation above this if {} else
{} section on the ttl selection will be very helpful.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-10  2:40     ` David Ahern
@ 2017-03-10 10:12       ` Robert Shearman
  0 siblings, 0 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 10:12 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev, Eric W. Biederman, roopa

On 10/03/17 02:40, David Ahern wrote:
> On 3/7/17 5:46 PM, Robert Shearman wrote:
>> diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
>> index d29203651c01..58e0e46c4a5c 100644
>> --- a/include/net/netns/mpls.h
>> +++ b/include/net/netns/mpls.h
>> @@ -10,7 +10,9 @@ struct ctl_table_header;
>>
>>  struct netns_mpls {
>>  	size_t platform_labels;
>> +	int ip_ttl_propagate;
>>  	struct mpls_route __rcu * __rcu *platform_label;
>> +
>>  	struct ctl_table_header *ctl;
>>  };
>>
>
> I'd prefer the platform_labels stay with platform_label. ie., put the
> new ip_ttl_propagate above platform_labels.
>

Ok, will do in v3.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-10  2:00     ` David Ahern
@ 2017-03-10 10:12       ` Robert Shearman
  0 siblings, 0 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 10:12 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev, Eric W. Biederman, roopa

On 10/03/17 02:00, David Ahern wrote:
> On 3/7/17 5:46 PM, Robert Shearman wrote:
>> @@ -244,24 +245,33 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
>>  		payload_type = ip_hdr(skb)->version;
>>
>>  	switch (payload_type) {
>> -	case MPT_IPV4: {
>> -		struct iphdr *hdr4 = ip_hdr(skb);
>> +	case MPT_IPV4:
>> +		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
>> +		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
>> +		     net->mpls.ip_ttl_propagate)) {
>> +			struct iphdr *hdr4 = ip_hdr(skb);
>> +
>> +			csum_replace2(&hdr4->check,
>> +				      htons(hdr4->ttl << 8),
>> +				      htons(dec.ttl << 8));
>> +			hdr4->ttl = dec.ttl;
>> +		}
>>  		skb->protocol = htons(ETH_P_IP);
>> -		csum_replace2(&hdr4->check,
>> -			      htons(hdr4->ttl << 8),
>> -			      htons(dec.ttl << 8));
>> -		hdr4->ttl = dec.ttl;
>>  		success = true;
>>  		break;
>> -	}
>> -	case MPT_IPV6: {
>> -		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
>> +	case MPT_IPV6:
>> +		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
>> +		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
>> +		     net->mpls.ip_ttl_propagate)) {
>> +			struct ipv6hdr *hdr6 = ipv6_hdr(skb);
>> +
>> +			hdr6->hop_limit = dec.ttl;
>> +		}
>>  		skb->protocol = htons(ETH_P_IPV6);
>> -		hdr6->hop_limit = dec.ttl;
>>  		success = true;
>>  		break;
>> -	}
>
> What decrements the TTL if it is not propagated from MPLS to IP?
>

Good point. Will address in v3.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v2 2/2] mpls: allow TTL propagation from IP packets to be configured
  2017-03-10  2:54     ` David Ahern
@ 2017-03-10 10:12       ` Robert Shearman
  0 siblings, 0 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 10:12 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev, Eric W. Biederman, roopa

On 10/03/17 02:54, David Ahern wrote:
> On 3/7/17 5:46 PM, Robert Shearman wrote:
>> @@ -78,6 +70,29 @@ static int mpls_xmit(struct sk_buff *skb)
>>
>>  	tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
>>
>> +	/* Obtain the ttl */
>> +	if (dst->ops->family == AF_INET) {
>> +		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
>> +			ttl = tun_encap_info->default_ttl;
>> +		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
>> +			 !net->mpls.ip_ttl_propagate)
>> +			ttl = net->mpls.default_ttl;
>> +		else
>> +			ttl = ip_hdr(skb)->ttl;
>
> After staring at that for a while, an explanation above this if {} else
> {} section on the ttl selection will be very helpful.
>

Ok, would a comment like the following improve things sufficiently?

	/* Obtain the ttl using the following set of rules.
	 *
	 * LWT ttl propagation setting:
	 *  - disabled => use default TTL value from LWT
	 *  - enabled  => use TTL value from IPv4/IPv6 header
	 *  - default  =>
	 *   Global ttl propagation setting:
	 *    - disabled => use default TTL value from global setting
	 *    - enabled => use TTL value from IPv4/IPv6 header
	 */

Thanks,
Rob

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from IP packets to be configured
  2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
  2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
  2017-03-08  0:46   ` [PATCH net-next v2 2/2] mpls: allow TTL propagation from " Robert Shearman
@ 2017-03-10 20:43   ` Robert Shearman
  2017-03-10 20:43     ` [PATCH net-next v3 1/2] mpls: allow TTL propagation to " Robert Shearman
                       ` (3 more replies)
  2 siblings, 4 replies; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 20:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

It is sometimes desirable to present an MPLS transport network as a
single hop to traffic transiting it because it prevents confusion when
diagnosing failures. An example of where confusion can be generated is
when addresses used in the provider network overlap with addresses in
the overlay network and the addresses get exposed through ICMP errors
generated as packets transit the provider network.

In addition, RFC 3443 defines two methods of deriving TTL for an
outgoing packet: Uniform Model where the TTL is propagated to/from the
MPLS header and both Pipe Models and Short Pipe Models (with and
without PHP) where the TTL is not propagated to/from the MPLS header.

Changes in v3:
 - decrement ttl on popping last label when not doing ttl propagation,
   as suggested by David Ahern.
 - add comment to describe what the somewhat complex conditionals are
   doing to work out what ttl to use in mpls_iptunnel.c.
 - rearrange fields fields in struct netns_mpls to keep the platform
   label fields together, as suggested by David Ahern.

Changes in v2:
 - add references to RFC 3443 as suggested by David Ahern
 - fix setting of skb->protocol as noticed by David Ahern
 - implement per-route/per-LWT configurability as suggested by Eric
   Biederman
 - split into two patches for ease of review

Robert Shearman (2):
  mpls: allow TTL propagation to IP packets to be configured
  mpls: allow TTL propagation from IP packets to be configured

 Documentation/networking/mpls-sysctl.txt | 19 +++++++
 include/net/mpls_iptunnel.h              |  2 +
 include/net/netns/mpls.h                 |  3 +
 include/uapi/linux/mpls_iptunnel.h       |  2 +
 include/uapi/linux/rtnetlink.h           |  1 +
 net/mpls/af_mpls.c                       | 98 +++++++++++++++++++++++++++++---
 net/mpls/internal.h                      |  7 +++
 net/mpls/mpls_iptunnel.c                 | 73 +++++++++++++++++++-----
 8 files changed, 184 insertions(+), 21 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net-next v3 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
@ 2017-03-10 20:43     ` Robert Shearman
  2017-03-13 18:42       ` David Ahern
  2017-03-10 20:43     ` [PATCH net-next v3 2/2] mpls: allow TTL propagation from " Robert Shearman
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 20:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

Provide the ability to control on a per-route basis whether the TTL
value from an MPLS packet is propagated to an IPv4/IPv6 packet when
the last label is popped as per the theoretical model in RFC 3443
through a new route attribute, RTA_TTL_PROPAGATE which can be 0 to
mean disable propagation and 1 to mean enable propagation.

In order to provide the ability to change the behaviour for packets
arriving with IPv4/IPv6 Explicit Null labels and to provide an easy
way for a user to change the behaviour for all existing routes without
having to reprogram them, a global knob is provided. This is done
through the addition of a new per-namespace sysctl,
"net.mpls.ip_ttl_propagate", which defaults to enabled. If the
per-route attribute is set (either enabled or disabled) then it
overrides the global configuration.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 Documentation/networking/mpls-sysctl.txt | 11 ++++
 include/net/netns/mpls.h                 |  2 +
 include/uapi/linux/rtnetlink.h           |  1 +
 net/mpls/af_mpls.c                       | 87 +++++++++++++++++++++++++++++---
 net/mpls/internal.h                      |  7 +++
 5 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 15d8d16934fd..9badd1d6685f 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -19,6 +19,17 @@ platform_labels - INTEGER
 	Possible values: 0 - 1048575
 	Default: 0
 
+ip_ttl_propagate - BOOL
+	Control whether TTL is propagated from the IPv4/IPv6 header to
+	the MPLS header on imposing labels and propagated from the
+	MPLS header to the IPv4/IPv6 header on popping the last label.
+
+	If disabled, the MPLS transport network will appear as a
+	single hop to transit traffic.
+
+	0 - disabled / RFC 3443 [Short] Pipe Model
+	1 - enabled / RFC 3443 Uniform Model (default)
+
 conf/<interface>/input - BOOL
 	Control whether packets can be input on this interface.
 
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index d29203651c01..08652eedabb2 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -9,8 +9,10 @@ struct mpls_route;
 struct ctl_table_header;
 
 struct netns_mpls {
+	int ip_ttl_propagate;
 	size_t platform_labels;
 	struct mpls_route __rcu * __rcu *platform_label;
+
 	struct ctl_table_header *ctl;
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 6546917d605a..30fb25e851db 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -319,6 +319,7 @@ enum rtattr_type_t {
 	RTA_EXPIRES,
 	RTA_PAD,
 	RTA_UID,
+	RTA_TTL_PROPAGATE,
 	__RTA_MAX
 };
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b2..0e1046f21af8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -32,6 +32,7 @@
 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
 
 static int zero = 0;
+static int one = 1;
 static int label_limit = (1 << 20) - 1;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
@@ -220,8 +221,8 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
 	return &rt->rt_nh[nh_index];
 }
 
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
-			struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+			struct sk_buff *skb, struct mpls_entry_decoded dec)
 {
 	enum mpls_payload_type payload_type;
 	bool success = false;
@@ -246,22 +247,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
 	switch (payload_type) {
 	case MPT_IPV4: {
 		struct iphdr *hdr4 = ip_hdr(skb);
+		u8 new_ttl;
 		skb->protocol = htons(ETH_P_IP);
+
+		/* If propagating TTL, take the decremented TTL from
+		 * the incoming MPLS header, otherwise decrement the
+		 * TTL, but only if not 0 to avoid underflow.
+		 */
+		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+		     net->mpls.ip_ttl_propagate))
+			new_ttl = dec.ttl;
+		else
+			new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
 		csum_replace2(&hdr4->check,
 			      htons(hdr4->ttl << 8),
-			      htons(dec.ttl << 8));
-		hdr4->ttl = dec.ttl;
+			      htons(new_ttl << 8));
+		hdr4->ttl = new_ttl;
 		success = true;
 		break;
 	}
 	case MPT_IPV6: {
 		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
 		skb->protocol = htons(ETH_P_IPV6);
-		hdr6->hop_limit = dec.ttl;
+
+		/* If propagating TTL, take the decremented TTL from
+		 * the incoming MPLS header, otherwise decrement the
+		 * hop limit, but only if not 0 to avoid underflow.
+		 */
+		if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+		    (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+		     net->mpls.ip_ttl_propagate))
+			hdr6->hop_limit = dec.ttl;
+		else if (hdr6->hop_limit)
+			hdr6->hop_limit = hdr6->hop_limit - 1;
 		success = true;
 		break;
 	}
 	case MPT_UNSPEC:
+		/* Should have decided which protocol it is by now */
 		break;
 	}
 
@@ -361,7 +386,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 
 	if (unlikely(!new_header_size && dec.bos)) {
 		/* Penultimate hop popping */
-		if (!mpls_egress(rt, skb, dec))
+		if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
 			goto err;
 	} else {
 		bool bos;
@@ -412,6 +437,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_TTL_PROPAGATE]	= { .type = NLA_U8 },
 };
 
 struct mpls_route_config {
@@ -421,6 +447,7 @@ struct mpls_route_config {
 	u8			rc_via_alen;
 	u8			rc_via[MAX_VIA_ALEN];
 	u32			rc_label;
+	u8			rc_ttl_propagate;
 	u8			rc_output_labels;
 	u32			rc_output_label[MAX_NEW_LABELS];
 	u32			rc_nlflags;
@@ -856,6 +883,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 
 	rt->rt_protocol = cfg->rc_protocol;
 	rt->rt_payload_type = cfg->rc_payload_type;
+	rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
 
 	if (cfg->rc_mp)
 		err = mpls_nh_build_multi(cfg, rt);
@@ -1576,6 +1604,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	cfg->rc_label		= LABEL_NOT_SPECIFIED;
 	cfg->rc_protocol	= rtm->rtm_protocol;
 	cfg->rc_via_table	= MPLS_NEIGH_TABLE_UNSPEC;
+	cfg->rc_ttl_propagate	= MPLS_TTL_PROP_DEFAULT;
 	cfg->rc_nlflags		= nlh->nlmsg_flags;
 	cfg->rc_nlinfo.portid	= NETLINK_CB(skb).portid;
 	cfg->rc_nlinfo.nlh	= nlh;
@@ -1622,6 +1651,17 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			cfg->rc_mp_len = nla_len(nla);
 			break;
 		}
+		case RTA_TTL_PROPAGATE:
+		{
+			u8 ttl_propagate = nla_get_u8(nla);
+
+			if (ttl_propagate > 1)
+				goto errout;
+			cfg->rc_ttl_propagate = ttl_propagate ?
+				MPLS_TTL_PROP_ENABLED :
+				MPLS_TTL_PROP_DISABLED;
+			break;
+		}
 		default:
 			/* Unsupported attribute */
 			goto errout;
@@ -1682,6 +1722,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
 
 	if (nla_put_labels(skb, RTA_DST, 1, &label))
 		goto nla_put_failure;
+
+	if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+		bool ttl_propagate =
+			rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+		if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+			       ttl_propagate))
+			goto nla_put_failure;
+	}
 	if (rt->rt_nhn == 1) {
 		const struct mpls_nh *nh = rt->rt_nh;
 
@@ -1792,7 +1841,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
 {
 	size_t payload =
 		NLMSG_ALIGN(sizeof(struct rtmsg))
-		+ nla_total_size(4);			/* RTA_DST */
+		+ nla_total_size(4)			/* RTA_DST */
+		+ nla_total_size(1);			/* RTA_TTL_PROPAGATE */
 
 	if (rt->rt_nhn == 1) {
 		struct mpls_nh *nh = rt->rt_nh;
@@ -1876,6 +1926,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
 		rt0->rt_protocol = RTPROT_KERNEL;
 		rt0->rt_payload_type = MPT_IPV4;
+		rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
 		rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
 		rt0->rt_nh->nh_via_alen = lo->addr_len;
 		memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1889,6 +1940,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
 		rt2->rt_protocol = RTPROT_KERNEL;
 		rt2->rt_payload_type = MPT_IPV6;
+		rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
 		rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
 		rt2->rt_nh->nh_via_alen = lo->addr_len;
 		memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1970,6 +2022,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
 	return ret;
 }
 
+#define MPLS_NS_SYSCTL_OFFSET(field)		\
+	(&((struct net *)0)->field)
+
 static const struct ctl_table mpls_table[] = {
 	{
 		.procname	= "platform_labels",
@@ -1978,21 +2033,37 @@ static const struct ctl_table mpls_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mpls_platform_labels,
 	},
+	{
+		.procname	= "ip_ttl_propagate",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
 static int mpls_net_init(struct net *net)
 {
 	struct ctl_table *table;
+	int i;
 
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
+	net->mpls.ip_ttl_propagate = 1;
 
 	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
 	if (table == NULL)
 		return -ENOMEM;
 
-	table[0].data = net;
+	/* Table data contains only offsets relative to the base of
+	 * the mdev at this point, so make them absolute.
+	 */
+	for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+		table[i].data = (char *)net + (uintptr_t)table[i].data;
+
 	net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
 	if (net->mpls.ctl == NULL) {
 		kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..62928d8fabd1 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */
 	u8			nh_via_table;
 };
 
+enum mpls_ttl_propagation {
+	MPLS_TTL_PROP_DEFAULT,
+	MPLS_TTL_PROP_ENABLED,
+	MPLS_TTL_PROP_DISABLED,
+};
+
 /* The route, nexthops and vias are stored together in the same memory
  * block:
  *
@@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */
 	u8			rt_protocol;
 	u8			rt_payload_type;
 	u8			rt_max_alen;
+	u8			rt_ttl_propagate;
 	unsigned int		rt_nhn;
 	unsigned int		rt_nhn_alive;
 	struct mpls_nh		rt_nh[0];
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH net-next v3 2/2] mpls: allow TTL propagation from IP packets to be configured
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
  2017-03-10 20:43     ` [PATCH net-next v3 1/2] mpls: allow TTL propagation to " Robert Shearman
@ 2017-03-10 20:43     ` Robert Shearman
  2017-03-13 18:43       ` David Ahern
  2017-03-13 20:28     ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Roopa Prabhu
  2017-03-13 22:29     ` David Miller
  3 siblings, 1 reply; 25+ messages in thread
From: Robert Shearman @ 2017-03-10 20:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric W. Biederman, roopa, David Ahern, Robert Shearman

Allow TTL propagation from IP packets to MPLS packets to be
configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which
allows the TTL to be set in the resulting MPLS packet, with the value
of 0 having the semantics of enabling propagation of the TTL from the
IP header (i.e. non-zero values disable propagation).

Also allow the configuration to be overridden globally by reusing the
same sysctl to control whether the TTL is propagated from IP packets
into the MPLS header. If the per-LWT attribute is set then it
overrides the global configuration. If the TTL isn't propagated then a
default TTL value is used which can be configured via a new sysctl,
"net.mpls.default_ttl". This is kept separate from the configuration
of whether IP TTL propagation is enabled as it can be used in the
future when non-IP payloads are supported (i.e. where there is no
payload TTL that can be propagated).

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 Documentation/networking/mpls-sysctl.txt |  8 ++++
 include/net/mpls_iptunnel.h              |  2 +
 include/net/netns/mpls.h                 |  1 +
 include/uapi/linux/mpls_iptunnel.h       |  2 +
 net/mpls/af_mpls.c                       | 11 +++++
 net/mpls/mpls_iptunnel.c                 | 73 ++++++++++++++++++++++++++------
 6 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 9badd1d6685f..2f24a1912a48 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -30,6 +30,14 @@ ip_ttl_propagate - BOOL
 	0 - disabled / RFC 3443 [Short] Pipe Model
 	1 - enabled / RFC 3443 Uniform Model (default)
 
+default_ttl - BOOL
+	Default TTL value to use for MPLS packets where it cannot be
+	propagated from an IP header, either because one isn't present
+	or ip_ttl_propagate has been disabled.
+
+	Possible values: 1 - 255
+	Default: 255
+
 conf/<interface>/input - BOOL
 	Control whether packets can be input on this interface.
 
diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h
index 179253f9dcfd..a18af6a16eb5 100644
--- a/include/net/mpls_iptunnel.h
+++ b/include/net/mpls_iptunnel.h
@@ -19,6 +19,8 @@
 struct mpls_iptunnel_encap {
 	u32	label[MAX_NEW_LABELS];
 	u8	labels;
+	u8	ttl_propagate;
+	u8	default_ttl;
 };
 
 static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 08652eedabb2..6608b3693385 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -10,6 +10,7 @@ struct ctl_table_header;
 
 struct netns_mpls {
 	int ip_ttl_propagate;
+	int default_ttl;
 	size_t platform_labels;
 	struct mpls_route __rcu * __rcu *platform_label;
 
diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h
index d80a0498f77e..f5e45095b0bb 100644
--- a/include/uapi/linux/mpls_iptunnel.h
+++ b/include/uapi/linux/mpls_iptunnel.h
@@ -16,11 +16,13 @@
 /* MPLS tunnel attributes
  * [RTA_ENCAP] = {
  *     [MPLS_IPTUNNEL_DST]
+ *     [MPLS_IPTUNNEL_TTL]
  * }
  */
 enum {
 	MPLS_IPTUNNEL_UNSPEC,
 	MPLS_IPTUNNEL_DST,
+	MPLS_IPTUNNEL_TTL,
 	__MPLS_IPTUNNEL_MAX,
 };
 #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0e1046f21af8..0c5d111abe36 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -34,6 +34,7 @@
 static int zero = 0;
 static int one = 1;
 static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
 
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
 		       struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -2042,6 +2043,15 @@ static const struct ctl_table mpls_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "default_ttl",
+		.data		= MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &ttl_max,
+	},
 	{ }
 };
 
@@ -2053,6 +2063,7 @@ static int mpls_net_init(struct net *net)
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
 	net->mpls.ip_ttl_propagate = 1;
+	net->mpls.default_ttl = 255;
 
 	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
 	if (table == NULL)
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..22f71fce0bfb 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
 
 static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
 	[MPLS_IPTUNNEL_DST]	= { .type = NLA_U32 },
+	[MPLS_IPTUNNEL_TTL]	= { .type = NLA_U8 },
 };
 
 static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
 	struct rtable *rt = NULL;
 	struct rt6_info *rt6 = NULL;
 	struct mpls_dev *out_mdev;
+	struct net *net;
 	int err = 0;
 	bool bos;
 	int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
 
 	/* Find the output device */
 	out_dev = dst->dev;
-
-	/* Obtain the ttl */
-	if (dst->ops->family == AF_INET) {
-		ttl = ip_hdr(skb)->ttl;
-		rt = (struct rtable *)dst;
-	} else if (dst->ops->family == AF_INET6) {
-		ttl = ipv6_hdr(skb)->hop_limit;
-		rt6 = (struct rt6_info *)dst;
-	} else {
-		goto drop;
-	}
+	net = dev_net(out_dev);
 
 	skb_orphan(skb);
 
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
 
 	tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
 
+	/* Obtain the ttl using the following set of rules.
+	 *
+	 * LWT ttl propagation setting:
+	 *  - disabled => use default TTL value from LWT
+	 *  - enabled  => use TTL value from IPv4/IPv6 header
+	 *  - default  =>
+	 *   Global ttl propagation setting:
+	 *    - disabled => use default TTL value from global setting
+	 *    - enabled => use TTL value from IPv4/IPv6 header
+	 */
+	if (dst->ops->family == AF_INET) {
+		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+			ttl = tun_encap_info->default_ttl;
+		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+			 !net->mpls.ip_ttl_propagate)
+			ttl = net->mpls.default_ttl;
+		else
+			ttl = ip_hdr(skb)->ttl;
+		rt = (struct rtable *)dst;
+	} else if (dst->ops->family == AF_INET6) {
+		if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+			ttl = tun_encap_info->default_ttl;
+		else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+			 !net->mpls.ip_ttl_propagate)
+			ttl = net->mpls.default_ttl;
+		else
+			ttl = ipv6_hdr(skb)->hop_limit;
+		rt6 = (struct rt6_info *)dst;
+	} else {
+		goto drop;
+	}
+
 	/* Verify the destination can hold the packet */
 	new_header_size = mpls_encap_size(tun_encap_info);
 	mtu = mpls_dev_mtu(out_dev);
@@ -160,6 +184,17 @@ static int mpls_build_state(struct nlattr *nla,
 			     &tun_encap_info->labels, tun_encap_info->label);
 	if (ret)
 		goto errout;
+
+	tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+	if (tb[MPLS_IPTUNNEL_TTL]) {
+		tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+		/* TTL 0 implies propagate from IP header */
+		tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+			MPLS_TTL_PROP_DISABLED :
+			MPLS_TTL_PROP_ENABLED;
+	}
+
 	newts->type = LWTUNNEL_ENCAP_MPLS;
 	newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
 	newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +221,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
 			   tun_encap_info->label))
 		goto nla_put_failure;
 
+	if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+	    nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -195,10 +234,16 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
 static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
+	int nlsize;
 
 	tun_encap_info = mpls_lwtunnel_encap(lwtstate);
 
-	return nla_total_size(tun_encap_info->labels * 4);
+	nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+	if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+		nlsize += nla_total_size(1);
+
+	return nlsize;
 }
 
 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,7 +252,9 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 	struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
 	int l;
 
-	if (a_hdr->labels != b_hdr->labels)
+	if (a_hdr->labels != b_hdr->labels ||
+	    a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+	    a_hdr->default_ttl != b_hdr->default_ttl)
 		return 1;
 
 	for (l = 0; l < MAX_NEW_LABELS; l++)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v3 1/2] mpls: allow TTL propagation to IP packets to be configured
  2017-03-10 20:43     ` [PATCH net-next v3 1/2] mpls: allow TTL propagation to " Robert Shearman
@ 2017-03-13 18:42       ` David Ahern
  0 siblings, 0 replies; 25+ messages in thread
From: David Ahern @ 2017-03-13 18:42 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, Eric W. Biederman, roopa

On 3/10/17 1:43 PM, Robert Shearman wrote:
> Provide the ability to control on a per-route basis whether the TTL
> value from an MPLS packet is propagated to an IPv4/IPv6 packet when
> the last label is popped as per the theoretical model in RFC 3443
> through a new route attribute, RTA_TTL_PROPAGATE which can be 0 to
> mean disable propagation and 1 to mean enable propagation.
> 
> In order to provide the ability to change the behaviour for packets
> arriving with IPv4/IPv6 Explicit Null labels and to provide an easy
> way for a user to change the behaviour for all existing routes without
> having to reprogram them, a global knob is provided. This is done
> through the addition of a new per-namespace sysctl,
> "net.mpls.ip_ttl_propagate", which defaults to enabled. If the
> per-route attribute is set (either enabled or disabled) then it
> overrides the global configuration.
> 
> Signed-off-by: Robert Shearman <rshearma@brocade.com>
> ---
>  Documentation/networking/mpls-sysctl.txt | 11 ++++
>  include/net/netns/mpls.h                 |  2 +
>  include/uapi/linux/rtnetlink.h           |  1 +
>  net/mpls/af_mpls.c                       | 87 +++++++++++++++++++++++++++++---
>  net/mpls/internal.h                      |  7 +++
>  5 files changed, 100 insertions(+), 8 deletions(-)

Acked-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v3 2/2] mpls: allow TTL propagation from IP packets to be configured
  2017-03-10 20:43     ` [PATCH net-next v3 2/2] mpls: allow TTL propagation from " Robert Shearman
@ 2017-03-13 18:43       ` David Ahern
  0 siblings, 0 replies; 25+ messages in thread
From: David Ahern @ 2017-03-13 18:43 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev, Eric W. Biederman, roopa

On 3/10/17 1:43 PM, Robert Shearman wrote:
> Allow TTL propagation from IP packets to MPLS packets to be
> configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which
> allows the TTL to be set in the resulting MPLS packet, with the value
> of 0 having the semantics of enabling propagation of the TTL from the
> IP header (i.e. non-zero values disable propagation).
> 
> Also allow the configuration to be overridden globally by reusing the
> same sysctl to control whether the TTL is propagated from IP packets
> into the MPLS header. If the per-LWT attribute is set then it
> overrides the global configuration. If the TTL isn't propagated then a
> default TTL value is used which can be configured via a new sysctl,
> "net.mpls.default_ttl". This is kept separate from the configuration
> of whether IP TTL propagation is enabled as it can be used in the
> future when non-IP payloads are supported (i.e. where there is no
> payload TTL that can be propagated).
> 
> Signed-off-by: Robert Shearman <rshearma@brocade.com>
> ---
>  Documentation/networking/mpls-sysctl.txt |  8 ++++
>  include/net/mpls_iptunnel.h              |  2 +
>  include/net/netns/mpls.h                 |  1 +
>  include/uapi/linux/mpls_iptunnel.h       |  2 +
>  net/mpls/af_mpls.c                       | 11 +++++
>  net/mpls/mpls_iptunnel.c                 | 73 ++++++++++++++++++++++++++------
>  6 files changed, 84 insertions(+), 13 deletions(-)

Acked-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from IP packets to be configured
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
  2017-03-10 20:43     ` [PATCH net-next v3 1/2] mpls: allow TTL propagation to " Robert Shearman
  2017-03-10 20:43     ` [PATCH net-next v3 2/2] mpls: allow TTL propagation from " Robert Shearman
@ 2017-03-13 20:28     ` Roopa Prabhu
  2017-03-13 22:29     ` David Miller
  3 siblings, 0 replies; 25+ messages in thread
From: Roopa Prabhu @ 2017-03-13 20:28 UTC (permalink / raw)
  To: Robert Shearman; +Cc: davem, netdev, Eric W. Biederman, David Ahern

On 3/10/17, 12:43 PM, Robert Shearman wrote:
> It is sometimes desirable to present an MPLS transport network as a
> single hop to traffic transiting it because it prevents confusion when
> diagnosing failures. An example of where confusion can be generated is
> when addresses used in the provider network overlap with addresses in
> the overlay network and the addresses get exposed through ICMP errors
> generated as packets transit the provider network.
>
> In addition, RFC 3443 defines two methods of deriving TTL for an
> outgoing packet: Uniform Model where the TTL is propagated to/from the
> MPLS header and both Pipe Models and Short Pipe Models (with and
> without PHP) where the TTL is not propagated to/from the MPLS header.
>
> Changes in v3:
>  - decrement ttl on popping last label when not doing ttl propagation,
>    as suggested by David Ahern.
>  - add comment to describe what the somewhat complex conditionals are
>    doing to work out what ttl to use in mpls_iptunnel.c.
>  - rearrange fields fields in struct netns_mpls to keep the platform
>    label fields together, as suggested by David Ahern.
>
> Changes in v2:
>  - add references to RFC 3443 as suggested by David Ahern
>  - fix setting of skb->protocol as noticed by David Ahern
>  - implement per-route/per-LWT configurability as suggested by Eric
>    Biederman
>  - split into two patches for ease of review
>
> Robert Shearman (2):
>   mpls: allow TTL propagation to IP packets to be configured
>   mpls: allow TTL propagation from IP packets to be configured
>
>  
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from IP packets to be configured
  2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
                       ` (2 preceding siblings ...)
  2017-03-13 20:28     ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Roopa Prabhu
@ 2017-03-13 22:29     ` David Miller
  3 siblings, 0 replies; 25+ messages in thread
From: David Miller @ 2017-03-13 22:29 UTC (permalink / raw)
  To: rshearma; +Cc: netdev, ebiederm, roopa, dsa

From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 10 Mar 2017 20:43:23 +0000

> It is sometimes desirable to present an MPLS transport network as a
> single hop to traffic transiting it because it prevents confusion when
> diagnosing failures. An example of where confusion can be generated is
> when addresses used in the provider network overlap with addresses in
> the overlay network and the addresses get exposed through ICMP errors
> generated as packets transit the provider network.
> 
> In addition, RFC 3443 defines two methods of deriving TTL for an
> outgoing packet: Uniform Model where the TTL is propagated to/from the
> MPLS header and both Pipe Models and Short Pipe Models (with and
> without PHP) where the TTL is not propagated to/from the MPLS header.
> 
> Changes in v3:
>  - decrement ttl on popping last label when not doing ttl propagation,
>    as suggested by David Ahern.
>  - add comment to describe what the somewhat complex conditionals are
>    doing to work out what ttl to use in mpls_iptunnel.c.
>  - rearrange fields fields in struct netns_mpls to keep the platform
>    label fields together, as suggested by David Ahern.
> 
> Changes in v2:
>  - add references to RFC 3443 as suggested by David Ahern
>  - fix setting of skb->protocol as noticed by David Ahern
>  - implement per-route/per-LWT configurability as suggested by Eric
>    Biederman
>  - split into two patches for ease of review

Series applied, thanks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2017-03-13 22:29 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-30 20:36 [PATCH net-next] mpls: allow TTL propagation to/from IP packets to be configured Robert Shearman
2017-01-31  0:17 ` Eric W. Biederman
2017-01-31 11:59   ` Robert Shearman
2017-02-03  3:21     ` Eric W. Biederman
2017-02-03  4:02       ` David Ahern
2017-01-31  0:41 ` David Ahern
2017-01-31 12:00   ` Robert Shearman
2017-01-31  1:09 ` David Ahern
2017-01-31 12:01   ` Robert Shearman
2017-03-08  0:46 ` [PATCH net-next v2 0/2] " Robert Shearman
2017-03-08  0:46   ` [PATCH net-next v2 1/2] mpls: allow TTL propagation to " Robert Shearman
2017-03-10  2:00     ` David Ahern
2017-03-10 10:12       ` Robert Shearman
2017-03-10  2:40     ` David Ahern
2017-03-10 10:12       ` Robert Shearman
2017-03-08  0:46   ` [PATCH net-next v2 2/2] mpls: allow TTL propagation from " Robert Shearman
2017-03-10  2:54     ` David Ahern
2017-03-10 10:12       ` Robert Shearman
2017-03-10 20:43   ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Robert Shearman
2017-03-10 20:43     ` [PATCH net-next v3 1/2] mpls: allow TTL propagation to " Robert Shearman
2017-03-13 18:42       ` David Ahern
2017-03-10 20:43     ` [PATCH net-next v3 2/2] mpls: allow TTL propagation from " Robert Shearman
2017-03-13 18:43       ` David Ahern
2017-03-13 20:28     ` [PATCH net-next v3 0/2] mpls: allow TTL propagation to/from " Roopa Prabhu
2017-03-13 22:29     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).