All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: David Miller <davem@davemloft.net>
Cc: <netdev@vger.kernel.org>, roopa <roopa@cumulusnetworks.com>,
	Stephen Hemminger <stephen@networkplumber.org>,
	santiago@crfreenet.org, Simon Horman <horms@verge.net.au>
Subject: [PATCH net-next 3/7] mpls: Add a sysctl to control the size of the mpls label table
Date: Tue, 03 Mar 2015 19:11:20 -0600	[thread overview]
Message-ID: <87egp5tvlz.fsf_-_@x220.int.ebiederm.org> (raw)
In-Reply-To: <87vbihtvts.fsf_-_@x220.int.ebiederm.org> (Eric W. Biederman's message of "Tue, 03 Mar 2015 19:06:39 -0600")


This sysctl gives two benefits.  By defaulting the table size to 0
mpls even when compiled in and enabled defaults to not forwarding
any packets.  This prevents unpleasant surprises for users.

The other benefit is that as mpls labels are allocated locally a dense
table a small dense label table may be used which saves memory and
is extremely simple and efficient to implement.

This sysctl allows userspace to choose the restrictions on the label
table size userspace applications need to cope with.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 Documentation/networking/mpls-sysctl.txt |  20 +++++
 include/net/netns/mpls.h                 |   2 +
 net/mpls/af_mpls.c                       | 146 +++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+)
 create mode 100644 Documentation/networking/mpls-sysctl.txt

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
new file mode 100644
index 000000000000..639ddf0ece9b
--- /dev/null
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -0,0 +1,20 @@
+/proc/sys/net/mpls/* Variables:
+
+platform_labels - INTEGER
+	Number of entries in the platform label table.  It is not
+	possible to configure forwarding for label values equal to or
+	greater than the number of platform labels.
+
+	A dense utliziation of the entries in the platform label table
+	is possible and expected aas the platform labels are locally
+	allocated.
+
+	If the number of platform label table entries is set to 0 no
+	label will be recognized by the kernel and mpls forwarding
+	will be disabled.
+
+	Reducing this value will remove all label routing entries that
+	no longer fit in the table.
+
+	Possible values: 0 - 1048575
+	Default: 0
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index f90aaf8d4f89..d29203651c01 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -6,10 +6,12 @@
 #define __NETNS_MPLS_H__
 
 struct mpls_route;
+struct ctl_table_header;
 
 struct netns_mpls {
 	size_t platform_labels;
 	struct mpls_route __rcu * __rcu *platform_label;
+	struct ctl_table_header *ctl;
 };
 
 #endif /* __NETNS_MPLS_H__ */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 924377736b2a..b097125dfa33 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1,6 +1,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/socket.h>
+#include <linux/sysctl.h>
 #include <linux/net.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
@@ -31,6 +32,9 @@ struct mpls_route { /* next hop label forwarding entry */
 	u8			rt_via[0];
 };
 
+static int zero = 0;
+static int label_limit = (1 << 20) - 1;
+
 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
 {
 	struct mpls_route *rt = NULL;
@@ -273,18 +277,160 @@ static struct notifier_block mpls_dev_notifier = {
 	.notifier_call = mpls_dev_notify,
 };
 
+static int resize_platform_label_table(struct net *net, size_t limit)
+{
+	size_t size = sizeof(struct mpls_route *) * limit;
+	size_t old_limit;
+	size_t cp_size;
+	struct mpls_route __rcu **labels = NULL, **old;
+	struct mpls_route *rt0 = NULL, *rt2 = NULL;
+	unsigned index;
+
+	if (size) {
+		labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (!labels)
+			labels = vzalloc(size);
+
+		if (!labels)
+			goto nolabels;
+	}
+
+	/* In case the predefined labels need to be populated */
+	if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+		struct net_device *lo = net->loopback_dev;
+		rt0 = mpls_rt_alloc(lo->addr_len);
+		if (!rt0)
+			goto nort0;
+		rt0->rt_dev = lo;
+		rt0->rt_protocol = RTPROT_KERNEL;
+		rt0->rt_via_family = AF_PACKET;
+		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+	}
+	if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+		struct net_device *lo = net->loopback_dev;
+		rt2 = mpls_rt_alloc(lo->addr_len);
+		if (!rt2)
+			goto nort2;
+		rt2->rt_dev = lo;
+		rt2->rt_protocol = RTPROT_KERNEL;
+		rt2->rt_via_family = AF_PACKET;
+		memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+	}
+
+	rtnl_lock();
+	/* Remember the original table */
+	old = net->mpls.platform_label;
+	old_limit = net->mpls.platform_labels;
+
+	/* Free any labels beyond the new table */
+	for (index = limit; index < old_limit; index++)
+		mpls_route_update(net, index, NULL, NULL, NULL);
+
+	/* Copy over the old labels */
+	cp_size = size;
+	if (old_limit < limit)
+		cp_size = old_limit * sizeof(struct mpls_route *);
+
+	memcpy(labels, old, cp_size);
+
+	/* If needed set the predefined labels */
+	if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
+	    (limit > LABEL_IPV6_EXPLICIT_NULL)) {
+		labels[LABEL_IPV6_EXPLICIT_NULL] = rt2;
+		rt2 = NULL;
+	}
+
+	if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
+	    (limit > LABEL_IPV4_EXPLICIT_NULL)) {
+		labels[LABEL_IPV4_EXPLICIT_NULL] = rt0;
+		rt0 = NULL;
+	}
+
+	/* Update the global pointers */
+	net->mpls.platform_labels = limit;
+	net->mpls.platform_label = labels;
+
+	rtnl_unlock();
+
+	mpls_rt_free(rt2);
+	mpls_rt_free(rt0);
+
+	if (old) {
+		synchronize_rcu();
+		kvfree(old);
+	}
+	return 0;
+
+nort2:
+	mpls_rt_free(rt0);
+nort0:
+	kvfree(labels);
+nolabels:
+	return -ENOMEM;
+}
+
+static int mpls_platform_labels(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct net *net = table->data;
+	int platform_labels = net->mpls.platform_labels;
+	int ret;
+	struct ctl_table tmp = {
+		.procname	= table->procname,
+		.data		= &platform_labels,
+		.maxlen		= sizeof(int),
+		.mode		= table->mode,
+		.extra1		= &zero,
+		.extra2		= &label_limit,
+	};
+
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+	if (write && ret == 0)
+		ret = resize_platform_label_table(net, platform_labels);
+
+	return ret;
+}
+
+static struct ctl_table mpls_table[] = {
+	{
+		.procname	= "platform_labels",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= mpls_platform_labels,
+	},
+	{ }
+};
+
 static int mpls_net_init(struct net *net)
 {
+	struct ctl_table *table;
+
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
 
+	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
+	if (table == NULL)
+		return -ENOMEM;
+
+	table[0].data = net;
+	net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
+	if (net->mpls.ctl == NULL)
+		return -ENOMEM;
+
 	return 0;
 }
 
 static void mpls_net_exit(struct net *net)
 {
+	struct ctl_table *table;
 	unsigned int index;
 
+	table = net->mpls.ctl->ctl_table_arg;
+	unregister_net_sysctl_table(net->mpls.ctl);
+	kfree(table);
+
 	/* An rcu grace period haselapsed since there was a device in
 	 * the network namespace (and thus the last in fqlight packet)
 	 * left this network namespace.  This is because
-- 
2.2.1

  parent reply	other threads:[~2015-03-04  1:15 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-25 17:09 [PATCH net-next 0/8] Basic MPLS support Eric W. Biederman
2015-02-25 17:13 ` [PATCH net-next 1/8] mpls: Refactor how the mpls module is built Eric W. Biederman
2015-02-26  2:05   ` Simon Horman
2015-02-26  2:15     ` Eric W. Biederman
2015-02-26  2:28       ` Simon Horman
2015-02-25 17:14 ` [PATCH net-next 2/8] mpls: Basic routing support Eric W. Biederman
2015-02-25 17:15 ` [PATCH net-next 3/8] mpls: Add a sysctl to control the size of the mpls label table Eric W. Biederman
2015-02-25 17:16 ` [PATCH net-next 4/8] mpls: Basic support for adding and removing routes Eric W. Biederman
2015-02-25 17:16 ` [PATCH net-next 5/8] mpls: Functions for reading and wrinting mpls labels over netlink Eric W. Biederman
2015-02-25 17:17 ` [PATCH net-next 6/8] mpls: Netlink commands to add, remove, and dump routes Eric W. Biederman
2015-02-25 17:18 ` [PATCH net-next 8/8] ipmpls: Basic device for injecting packets into an mpls tunnel Eric W. Biederman
2015-03-05  9:17   ` Vivek Venkatraman
2015-03-05 14:00     ` Eric W. Biederman
2015-03-05 16:25       ` Vivek Venkatraman
2015-03-05 19:52         ` Eric W. Biederman
2015-03-06  6:05           ` Vivek Venkatraman
2015-03-07 10:36           ` Robert Shearman
2015-03-07 21:12             ` Eric W. Biederman
2015-02-25 17:19 ` [PATCH net-next 7/8] mpls: Multicast route table change notifications Eric W. Biederman
2015-02-26  7:21   ` roopa
2015-02-26 14:03     ` Eric W. Biederman
2015-02-26 15:12       ` roopa
2015-03-05  1:56         ` Andy Gospodarek
2015-02-25 17:37 ` [PATCH iproute2] mpls: Add basic mpls support to iproute Eric W. Biederman
2015-02-26  6:58 ` [PATCH net-next 0/8] Basic MPLS support roopa
2015-02-27 21:21 ` David Miller
2015-02-28  0:58   ` Eric W. Biederman
2015-03-02  0:05     ` Shrijeet Mukherjee
2015-03-02  4:03     ` David Miller
2015-03-02  5:10       ` Eric W. Biederman
2015-03-02  5:53         ` David Miller
2015-03-02  5:59         ` [PATCH net-next 0/15] Neighbour table and ax25 cleanups Eric W. Biederman
2015-03-02  5:59           ` [PATCH net-next 01/15] ax25: In ax25_rebuild_header add missing kfree_skb Eric W. Biederman
2015-03-02  6:01           ` [PATCH net-next 02/15] rose: Set the destination address in rose_header Eric W. Biederman
2015-03-02  6:02           ` [PATCH net-next 03/15] rose: Transmit packets in rose_xmit not rose_rebuild_header Eric W. Biederman
2015-03-02  6:03           ` [PATCH net-next 04/15] ax25/kiss: Replace ax_header_ops with ax25_header_ops Eric W. Biederman
2015-03-02  6:03           ` [PATCH net-next 05/15] ax25/6pack: Replace sp_header_ops " Eric W. Biederman
2015-03-02  6:04           ` [PATCH net-next 06/15] ax25: Make ax25_header and ax25_rebuild_header static Eric W. Biederman
2015-03-02  6:05           ` [PATCH net-next 07/15] ax25: Refactor to use private neighbour operations Eric W. Biederman
2015-03-02  6:06           ` [PATCH net-next 08/15] arp: Remove special case to give AX25 it's open arp operations Eric W. Biederman
2015-03-02  6:07           ` [PATCH net-next 09/15] neigh: Move neigh_compat_output into ax25_ip.c Eric W. Biederman
2015-03-02  6:08           ` [PATCH net-next 10/15] ax25: Stop calling/abusing dev_rebuild_header Eric W. Biederman
2015-03-02  6:09           ` [PATCH net-next 11/15] ax25: Stop depending on arp_find Eric W. Biederman
2015-03-02  6:11           ` [PATCH net-next 12/15] net: Kill dev_rebuild_header Eric W. Biederman
2015-03-02  6:12           ` [PATCH net-next 13/15] arp: Kill arp_find Eric W. Biederman
2015-03-02  6:13           ` [PATCH net-next 14/15] neigh: Don't require dst in neigh_hh_init Eric W. Biederman
2015-03-02  6:14           ` [PATCH net-next 15/15] neigh: Don't require a dst in neigh_resolve_output Eric W. Biederman
2015-03-02 21:44           ` [PATCH net-next 0/15] Neighbour table and ax25 cleanups David Miller
2015-03-03 15:41             ` [PATCH net-next] ax25: Stop using magic neighbour cache operations Eric W. Biederman
2015-03-03 19:45               ` David Miller
2015-03-03 20:22                 ` Eric W. Biederman
2015-03-03 20:33                   ` David Miller
2015-03-03 23:09                     ` [PATCH net-next 0/2] Neighbour table prep for MPLS Eric W. Biederman
2015-03-03 23:10                       ` [PATCH net-next 1/2] neigh: Factor out ___neigh_lookup_noref Eric W. Biederman
2015-03-04 14:53                         ` Andy Gospodarek
2015-03-04 15:58                           ` Eric W. Biederman
2015-03-04 16:30                             ` Andy Gospodarek
2015-03-03 23:11                       ` [PATCH net-next 2/2] neigh: Add helper function neigh_xmit Eric W. Biederman
2015-03-04  1:06                       ` [PATCH net-next 0/7] Basic MPLS support take 2 Eric W. Biederman
2015-03-04  1:10                         ` [PATCH net-next 1/7] mpls: Refactor how the mpls module is built Eric W. Biederman
2015-03-04  1:10                         ` [PATCH net-next 2/7] mpls: Basic routing support Eric W. Biederman
2015-03-05 16:36                           ` Vivek Venkatraman
2015-03-05 18:42                             ` Eric W. Biederman
2015-03-04  1:11                         ` Eric W. Biederman [this message]
2015-03-05  9:45                           ` [PATCH net-next 3/7] mpls: Add a sysctl to control the size of the mpls label table Vivek Venkatraman
2015-03-05 13:22                             ` Eric W. Biederman
2015-03-05 14:38                               ` Eric W. Biederman
2015-03-05 16:49                                 ` Vivek Venkatraman
2015-03-04  1:12                         ` [PATCH net-next 4/7] mpls: Basic support for adding and removing routes Eric W. Biederman
2015-03-04  8:13                           ` roopa
2015-03-04 20:36                             ` Eric W. Biederman
2015-03-05  0:30                               ` roopa
2015-03-05  2:50                               ` Bill Fink
2015-03-05 11:54                                 ` Eric W. Biederman
2015-03-05 19:10                                   ` Bill Fink
2015-03-04  1:13                         ` [PATCH net-next 5/7] mpls: Functions for reading and wrinting mpls labels over netlink Eric W. Biederman
2015-03-04  1:13                         ` [PATCH net-next 6/7] mpls: Netlink commands to add, remove, and dump routes Eric W. Biederman
2015-03-04  1:14                         ` [PATCH net-next 7/7] mpls: Multicast route table change notifications Eric W. Biederman
2015-03-04  5:27                         ` [PATCH net-next 0/7] Basic MPLS support take 2 David Miller
2015-03-04  6:13                           ` Eric W. Biederman
2015-03-04  5:25                       ` [PATCH net-next 0/2] Neighbour table prep for MPLS David Miller
2015-03-04  5:53                         ` Eric W. Biederman
2015-03-04 14:56                           ` Andy Gospodarek
2015-03-04 21:04                           ` David Miller
2015-03-05 12:35                             ` Eric W. Biederman
2015-03-05 10:14                   ` [PATCH net-next] ax25: Stop using magic neighbour cache operations Steven Whitehouse
2015-03-06 20:44                     ` Eric W. Biederman
2015-03-14  0:33                       ` Steven Whitehouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87egp5tvlz.fsf_-_@x220.int.ebiederm.org \
    --to=ebiederm@xmission.com \
    --cc=davem@davemloft.net \
    --cc=horms@verge.net.au \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@cumulusnetworks.com \
    --cc=santiago@crfreenet.org \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.