All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v5] user namespace: use union in {g,u}idmap struct
@ 2017-10-22 18:40 Christian Brauner
  2017-10-22 18:40 ` [PATCH 2/2 v5] user namespaces: bump idmap limits to 340 Christian Brauner
  0 siblings, 1 reply; 4+ messages in thread
From: Christian Brauner @ 2017-10-22 18:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: serge, stgraber, tycho, Christian Brauner

- Add a struct containing two pointer to extents and wrap both the static extent
  array and the struct into a union. This is done in preparation for bumping the
  {g,u}idmap limits for user namespaces.
- Add brackets around anonymous union when using designated initializers to
  initialize members in order to please gcc <= 4.4.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
Changelog 2017-10-22:
* no changes

Changelog 2017-10-19:
* kernel/user.c: Use brackets around anonymous union when using designated
  initializers to initialize members. This is done to please gcc <= 4.4.
---
 include/linux/user_namespace.h | 18 +++++++++++++-----
 kernel/user.c                  | 30 ++++++++++++++++++------------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index c18e01252346..7c83d7f6289b 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -12,13 +12,21 @@
 
 #define UID_GID_MAP_MAX_EXTENTS 5
 
+struct uid_gid_extent {
+	u32 first;
+	u32 lower_first;
+	u32 count;
+};
+
 struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 	u32 nr_extents;
-	struct uid_gid_extent {
-		u32 first;
-		u32 lower_first;
-		u32 count;
-	} extent[UID_GID_MAP_MAX_EXTENTS];
+	union {
+		struct uid_gid_extent extent[UID_GID_MAP_MAX_EXTENTS];
+		struct {
+			struct uid_gid_extent *forward;
+			struct uid_gid_extent *reverse;
+		};
+	};
 };
 
 #define USERNS_SETGROUPS_ALLOWED 1UL
diff --git a/kernel/user.c b/kernel/user.c
index 00281add65b2..9a20acce460d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -26,26 +26,32 @@
 struct user_namespace init_user_ns = {
 	.uid_map = {
 		.nr_extents = 1,
-		.extent[0] = {
-			.first = 0,
-			.lower_first = 0,
-			.count = 4294967295U,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
 		},
 	},
 	.gid_map = {
 		.nr_extents = 1,
-		.extent[0] = {
-			.first = 0,
-			.lower_first = 0,
-			.count = 4294967295U,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
 		},
 	},
 	.projid_map = {
 		.nr_extents = 1,
-		.extent[0] = {
-			.first = 0,
-			.lower_first = 0,
-			.count = 4294967295U,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
 		},
 	},
 	.count = ATOMIC_INIT(3),
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2 v5] user namespaces: bump idmap limits to 340
  2017-10-22 18:40 [PATCH 1/2 v5] user namespace: use union in {g,u}idmap struct Christian Brauner
@ 2017-10-22 18:40 ` Christian Brauner
  2017-10-23  8:02   ` Tycho Andersen
  0 siblings, 1 reply; 4+ messages in thread
From: Christian Brauner @ 2017-10-22 18:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: serge, stgraber, tycho, Christian Brauner, Eric Biederman

There are quite some use cases where users run into the current limit for
{g,u}id mappings. Consider a user requesting us to map everything but 999, and
1001 for a given range of 1000000000 with a sub{g,u}id layout of:

some-user:100000:1000000000
some-user:999:1
some-user:1000:1
some-user:1001:1
some-user:1002:1

This translates to:

MAPPING-TYPE | CONTAINER |    HOST |     RANGE |
-------------|-----------|---------|-----------|
         uid |       999 |     999 |         1 |
         uid |      1001 |    1001 |         1 |
         uid |         0 | 1000000 |       999 |
         uid |      1000 | 1001000 |         1 |
         uid |      1002 | 1001002 | 999998998 |
------------------------------------------------
         gid |       999 |     999 |         1 |
         gid |      1001 |    1001 |         1 |
         gid |         0 | 1000000 |       999 |
         gid |      1000 | 1001000 |         1 |
         gid |      1002 | 1001002 | 999998998 |

which is already the current limit.

As discussed at LPC simply bumping the number of limits is not going to work
since this would mean that struct uid_gid_map won't fit into a single cache-line
anymore thereby regressing performance for the base-cases. The same problem
seems to arise when using a single pointer. So the idea is to use

struct uid_gid_extent {
	u32 first;
	u32 lower_first;
	u32 count;
};

struct uid_gid_map { /* 64 bytes -- 1 cache line */
	u32 nr_extents;
	union {
		struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
		struct {
			struct uid_gid_extent *forward;
			struct uid_gid_extent *reverse;
		};
	};
};

For the base cases we will only use the struct uid_gid_extent extent member. If
we go over UID_GID_MAP_MAX_BASE_EXTENTS mappings we perform a single 4k
kmalloc() which means we can have a maximum of 340 mappings
(340 * size(struct uid_gid_extent) = 4080). For the latter case we use two
pointers "forward" and "reverse". The forward pointer points to an array sorted
by "first" and the reverse pointer points to an array sorted by "lower_first".
We can then perform binary search on those arrays.

Performance Testing:
When Eric introduced the extent-based struct uid_gid_map approach he measured
the performanc impact of his idmap changes:

> My benchmark consisted of going to single user mode where nothing else was
> running. On an ext4 filesystem opening 1,000,000 files and looping through all
> of the files 1000 times and calling fstat on the individuals files. This was
> to ensure I was benchmarking stat times where the inodes were in the kernels
> cache, but the inode values were not in the processors cache. My results:

> v3.4-rc1:         ~= 156ns (unmodified v3.4-rc1 with user namespace support disabled)
> v3.4-rc1-userns-: ~= 155ns (v3.4-rc1 with my user namespace patches and user namespace support disabled)
> v3.4-rc1-userns+: ~= 164ns (v3.4-rc1 with my user namespace patches and user namespace support enabled)

I used an identical approach on my laptop. Here's a thorough description of what
I did. I built a 4.14.0-rc4 mainline kernel with my new idmap patches applied. I
booted into single user mode and used an ext4 filesystem to open/create
1,000,000 files. Then I looped through all of the files calling fstat() on each
of them 1000 times and calculated the mean fstat() time for a single file. (The
test program can be found below.)

Here are the results. For fun, I compared the first version of my patch which
scaled linearly with the new version of the patch:

|   # MAPPINGS |   PATCH-V1 | PATCH-NEW |
|--------------|------------|-----------|
|   0 mappings |     158 ns |   158 ns  |
|   1 mappings |     164 ns |   157 ns  |
|   2 mappings |     170 ns |   158 ns  |
|   3 mappings |     175 ns |   161 ns  |
|   5 mappings |     187 ns |   165 ns  |
|  10 mappings |     218 ns |   199 ns  |
|  50 mappings |     528 ns |   218 ns  |
| 100 mappings |     980 ns |   229 ns  |
| 200 mappings |    1880 ns |   239 ns  |
| 300 mappings |    2760 ns |   240 ns  |
| 340 mappings | not tested |   248 ns  |

Here's the test program I used. I asked Eric what he did and this is a more
"advanced" implementation of the idea. It's pretty straight-forward:

 #define __GNU_SOURCE
 #define __STDC_FORMAT_MACROS
 #include <errno.h>
 #include <dirent.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>

 int main(int argc, char *argv[])
 {
 	int ret;
 	size_t i, k;
 	int fd[1000000];
 	int times[1000];
 	char pathname[4096];
 	struct stat st;
 	struct timeval t1, t2;
 	uint64_t time_in_mcs;
 	uint64_t sum = 0;

 	if (argc != 2) {
 		fprintf(stderr, "Please specify a directory where to create "
 				"the test files\n");
 		exit(EXIT_FAILURE);
 	}

 	for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++) {
 		sprintf(pathname, "%s/idmap_test_%zu", argv[1], i);
 		fd[i]= open(pathname, O_RDWR | O_CREAT, S_IXUSR | S_IXGRP | S_IXOTH);
 		if (fd[i] < 0) {
 			ssize_t j;
 			for (j = i; j >= 0; j--)
 				close(fd[j]);
 			exit(EXIT_FAILURE);
 		}
 	}

 	for (k = 0; k < 1000; k++) {
 		ret = gettimeofday(&t1, NULL);
 		if (ret < 0)
 			goto close_all;

 		for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++) {
 			ret = fstat(fd[i], &st);
 			if (ret < 0)
 				goto close_all;
 		}

 		ret = gettimeofday(&t2, NULL);
 		if (ret < 0)
 			goto close_all;

 		time_in_mcs = (1000000 * t2.tv_sec + t2.tv_usec) -
 			      (1000000 * t1.tv_sec + t1.tv_usec);
 		printf("Total time in micro seconds:       %" PRIu64 "\n",
 		       time_in_mcs);
 		printf("Total time in nanoseconds:         %" PRIu64 "\n",
 		       time_in_mcs * 1000);
 		printf("Time per file in nanoseconds:      %" PRIu64 "\n",
 		       (time_in_mcs * 1000) / 1000000);
 		times[k] = (time_in_mcs * 1000) / 1000000;
 	}

 close_all:
 	for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++)
 		close(fd[i]);

 	if (ret < 0)
 		exit(EXIT_FAILURE);

 	for (k = 0; k < 1000; k++) {
 		sum += times[k];
 	}

 	printf("Mean time per file in nanoseconds: %" PRIu64 "\n", sum / 1000);

 	exit(EXIT_SUCCESS);;
 }

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
CC: Serge Hallyn <serge@hallyn.com>
CC: Eric Biederman <ebiederm@xmission.com>
---
Changelog 2017-10-22:
* kernel/user_namespace.c:sort_idmaps():
  Check for NULL after kmemdup() instead of IS_ERR().
  Before this codepath did:

  if (IS_ERR(map->reverse)) {
          map->reverse = NULL;
          return PTR_ERR(map->reverse);
 }

 But kmemdup() does not return IS_ERR() but rather NULL on failure. So let's
 change this to:

 if (!map->reverse)
          return -ENOMEM;

Changelog 2017-10-19:
* kernel/user_namespace.c: Remove unnecessary mutex when freeing extents.
* non-functional: Fix typos in commit message.
* non-functional: Add headers to test program in commit message to make tests I
  performed easily reproducible.
---
 include/linux/user_namespace.h |   7 +-
 kernel/user_namespace.c        | 353 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 327 insertions(+), 33 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7c83d7f6289b..1c1046a60fb4 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -10,7 +10,8 @@
 #include <linux/sysctl.h>
 #include <linux/err.h>
 
-#define UID_GID_MAP_MAX_EXTENTS 5
+#define UID_GID_MAP_MAX_BASE_EXTENTS 5
+#define UID_GID_MAP_MAX_EXTENTS 340
 
 struct uid_gid_extent {
 	u32 first;
@@ -18,10 +19,10 @@ struct uid_gid_extent {
 	u32 count;
 };
 
-struct uid_gid_map {	/* 64 bytes -- 1 cache line */
+struct uid_gid_map { /* 64 bytes -- 1 cache line */
 	u32 nr_extents;
 	union {
-		struct uid_gid_extent extent[UID_GID_MAP_MAX_EXTENTS];
+		struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
 		struct {
 			struct uid_gid_extent *forward;
 			struct uid_gid_extent *reverse;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c490f1e4313b..54f1d4a6dec9 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -23,6 +23,8 @@
 #include <linux/ctype.h>
 #include <linux/projid.h>
 #include <linux/fs_struct.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
 static DEFINE_MUTEX(userns_state_mutex);
@@ -181,6 +183,18 @@ static void free_user_ns(struct work_struct *work)
 	do {
 		struct ucounts *ucounts = ns->ucounts;
 		parent = ns->parent;
+		if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->gid_map.forward);
+			kfree(ns->gid_map.reverse);
+		}
+		if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->uid_map.forward);
+			kfree(ns->uid_map.reverse);
+		}
+		if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->projid_map.forward);
+			kfree(ns->projid_map.reverse);
+		}
 		retire_userns_sysctls(ns);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
@@ -198,7 +212,84 @@ void __put_user_ns(struct user_namespace *ns)
 }
 EXPORT_SYMBOL(__put_user_ns);
 
-static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
+/**
+ * idmap_key struct holds the information necessary to find an idmapping in a
+ * sorted idmap array. It is passed to cmp_map_id() as first argument.
+ */
+struct idmap_key {
+	bool map_up; /* true  -> id from kid; false -> kid from id */
+	u32 id; /* id to find */
+	u32 count; /* == 0 unless used with map_id_range_down() */
+};
+
+/**
+ * cmp_map_id - Function to be passed to bsearch() to find the requested
+ * idmapping. Expects struct idmap_key to be passed via @k.
+ */
+static int cmp_map_id(const void *k, const void *e)
+{
+	u32 first, last, id2;
+	const struct idmap_key *key = k;
+	const struct uid_gid_extent *el = e;
+
+	/* handle map_id_range_down() */
+	if (key->count)
+		id2 = key->id + key->count - 1;
+	else
+		id2 = key->id;
+
+	/* handle map_id_{down,up}() */
+	if (key->map_up)
+		first = el->lower_first;
+	else
+		first = el->first;
+
+	last = first + el->count - 1;
+
+	if (key->id >= first && key->id <= last &&
+	    (id2 >= first && id2 <= last))
+		return 0;
+
+	if (key->id < first || id2 < first)
+		return -1;
+
+	return 1;
+}
+
+/**
+ * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
+ * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static u32 map_id_range_down_max(struct uid_gid_map *map, u32 id, u32 count)
+{
+	u32 extents;
+	struct uid_gid_extent *extent;
+	struct idmap_key key;
+
+	key.map_up = false;
+	key.count = count;
+	key.id = id;
+
+	extents = map->nr_extents;
+	smp_rmb();
+
+	extent = bsearch(&key, map->forward, extents,
+			 sizeof(struct uid_gid_extent), cmp_map_id);
+	/* Map the id or note failure */
+	if (extent)
+		id = (id - extent->first) + extent->lower_first;
+	else
+		id = (u32) -1;
+
+	return id;
+}
+
+/**
+ * map_id_range_down_base - Find idmap via binary search in static extent array.
+ * Can only be called if number of mappings is equal or less than
+ * UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static u32 map_id_range_down_base(struct uid_gid_map *map, u32 id, u32 count)
 {
 	unsigned idx, extents;
 	u32 first, last, id2;
@@ -224,7 +315,23 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
 	return id;
 }
 
-static u32 map_id_down(struct uid_gid_map *map, u32 id)
+static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
+{
+	u32 extents = map->nr_extents;
+	smp_rmb();
+
+	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+		return map_id_range_down_base(map, id, count);
+
+	return map_id_range_down_max(map, id, count);
+}
+
+/**
+ * map_id_down_base - Find idmap via binary search in static extent array.
+ * Can only be called if number of mappings is equal or less than
+ * UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static u32 map_id_down_base(struct uid_gid_map *map, u32 id)
 {
 	unsigned idx, extents;
 	u32 first, last;
@@ -247,7 +354,23 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id)
 	return id;
 }
 
-static u32 map_id_up(struct uid_gid_map *map, u32 id)
+static u32 map_id_down(struct uid_gid_map *map, u32 id)
+{
+	u32 extents = map->nr_extents;
+	smp_rmb();
+
+	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+		return map_id_down_base(map, id);
+
+	return map_id_range_down_max(map, id, 0);
+}
+
+/**
+ * map_id_up_base - Find idmap via binary search in static extent array.
+ * Can only be called if number of mappings is equal or less than
+ * UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static u32 map_id_up_base(struct uid_gid_map *map, u32 id)
 {
 	unsigned idx, extents;
 	u32 first, last;
@@ -270,6 +393,45 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)
 	return id;
 }
 
+/**
+ * map_id_up_max - Find idmap via binary search in ordered idmap array.
+ * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static u32 map_id_up_max(struct uid_gid_map *map, u32 id)
+{
+	u32 extents;
+	struct uid_gid_extent *extent;
+	struct idmap_key key;
+
+	key.map_up = true;
+	key.count = 0;
+	key.id = id;
+
+	extents = map->nr_extents;
+	smp_rmb();
+
+	extent = bsearch(&key, map->reverse, extents,
+			 sizeof(struct uid_gid_extent), cmp_map_id);
+	/* Map the id or note failure */
+	if (extent)
+		id = (id - extent->lower_first) + extent->first;
+	else
+		id = (u32) -1;
+
+	return id;
+}
+
+static u32 map_id_up(struct uid_gid_map *map, u32 id)
+{
+	u32 extents = map->nr_extents;
+	smp_rmb();
+
+	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+		return map_id_up_base(map, id);
+
+	return map_id_up_max(map, id);
+}
+
 /**
  *	make_kuid - Map a user-namespace uid pair into a kuid.
  *	@ns:  User namespace that the uid is in
@@ -540,13 +702,15 @@ static int projid_m_show(struct seq_file *seq, void *v)
 static void *m_start(struct seq_file *seq, loff_t *ppos,
 		     struct uid_gid_map *map)
 {
-	struct uid_gid_extent *extent = NULL;
 	loff_t pos = *ppos;
 
-	if (pos < map->nr_extents)
-		extent = &map->extent[pos];
+	if (pos >= map->nr_extents)
+		return NULL;
+
+	if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+		return &map->extent[pos];
 
-	return extent;
+	return &map->forward[pos];
 }
 
 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
@@ -618,7 +782,10 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
 		u32 prev_upper_last, prev_lower_last;
 		struct uid_gid_extent *prev;
 
-		prev = &new_map->extent[idx];
+		if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+			prev = &new_map->extent[idx];
+		else
+			prev = &new_map->forward[idx];
 
 		prev_upper_first = prev->first;
 		prev_lower_first = prev->lower_first;
@@ -638,6 +805,102 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
 	return false;
 }
 
+/**
+ * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
+ * Takes care to allocate a 4K block of memory if the number of mappings exceeds
+ * UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
+{
+	if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS) {
+		map->extent[map->nr_extents].first = extent->first;
+		map->extent[map->nr_extents].lower_first = extent->lower_first;
+		map->extent[map->nr_extents].count = extent->count;
+		return 0;
+	}
+
+	if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
+		struct uid_gid_extent *forward;
+
+		/* Allocate memory for 340 mappings. */
+		forward = kmalloc(sizeof(struct uid_gid_extent) *
+				 UID_GID_MAP_MAX_EXTENTS, GFP_KERNEL);
+		if (IS_ERR(forward))
+			return PTR_ERR(forward);
+
+		/* Copy over memory. Only set up memory for the forward pointer.
+		 * Defer the memory setup for the reverse pointer.
+		 */
+		memcpy(forward, map->extent,
+		       map->nr_extents * sizeof(map->extent[0]));
+
+		map->forward = forward;
+		map->reverse = NULL;
+	}
+
+	map->forward[map->nr_extents].first = extent->first;
+	map->forward[map->nr_extents].lower_first = extent->lower_first;
+	map->forward[map->nr_extents].count = extent->count;
+	return 0;
+}
+
+/* cmp function to sort() forward mappings */
+static int cmp_extents_forward(const void *a, const void *b)
+{
+	const struct uid_gid_extent *e1 = a;
+	const struct uid_gid_extent *e2 = b;
+
+	if (e1->first < e2->first)
+		return -1;
+
+	if (e1->first > e2->first)
+		return 1;
+
+	return 0;
+}
+
+/* cmp function to sort() reverse mappings */
+static int cmp_extents_reverse(const void *a, const void *b)
+{
+	const struct uid_gid_extent *e1 = a;
+	const struct uid_gid_extent *e2 = b;
+
+	if (e1->lower_first < e2->lower_first)
+		return -1;
+
+	if (e1->lower_first > e2->lower_first)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * sort_idmaps - Sorts an array of idmap entries.
+ * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
+ */
+static int sort_idmaps(struct uid_gid_map *map)
+{
+	if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+		return 0;
+
+	/* Sort forward array. */
+	sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
+	     cmp_extents_forward, NULL);
+
+	/* Only copy the memory from forward we actually need. */
+	map->reverse = kmemdup(map->forward,
+			       map->nr_extents * sizeof(struct uid_gid_extent),
+			       GFP_KERNEL);
+	if (!map->reverse)
+		return -ENOMEM;
+
+	/* Sort reverse array. */
+	sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
+	     cmp_extents_reverse, NULL);
+
+	return 0;
+}
+
 static ssize_t map_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos,
 			 int cap_setid,
@@ -648,7 +911,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	struct user_namespace *ns = seq->private;
 	struct uid_gid_map new_map;
 	unsigned idx;
-	struct uid_gid_extent *extent = NULL;
+	struct uid_gid_extent extent;
 	char *kbuf = NULL, *pos, *next_line;
 	ssize_t ret = -EINVAL;
 
@@ -673,6 +936,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	 */
 	mutex_lock(&userns_state_mutex);
 
+	memset(&new_map, 0, sizeof(struct uid_gid_map));
+
 	ret = -EPERM;
 	/* Only allow one successful write to the map */
 	if (map->nr_extents != 0)
@@ -700,9 +965,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	/* Parse the user data */
 	ret = -EINVAL;
 	pos = kbuf;
-	new_map.nr_extents = 0;
 	for (; pos; pos = next_line) {
-		extent = &new_map.extent[new_map.nr_extents];
 
 		/* Find the end of line and ensure I don't look past it */
 		next_line = strchr(pos, '\n');
@@ -714,17 +977,17 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 		}
 
 		pos = skip_spaces(pos);
-		extent->first = simple_strtoul(pos, &pos, 10);
+		extent.first = simple_strtoul(pos, &pos, 10);
 		if (!isspace(*pos))
 			goto out;
 
 		pos = skip_spaces(pos);
-		extent->lower_first = simple_strtoul(pos, &pos, 10);
+		extent.lower_first = simple_strtoul(pos, &pos, 10);
 		if (!isspace(*pos))
 			goto out;
 
 		pos = skip_spaces(pos);
-		extent->count = simple_strtoul(pos, &pos, 10);
+		extent.count = simple_strtoul(pos, &pos, 10);
 		if (*pos && !isspace(*pos))
 			goto out;
 
@@ -734,29 +997,33 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 			goto out;
 
 		/* Verify we have been given valid starting values */
-		if ((extent->first == (u32) -1) ||
-		    (extent->lower_first == (u32) -1))
+		if ((extent.first == (u32) -1) ||
+		    (extent.lower_first == (u32) -1))
 			goto out;
 
 		/* Verify count is not zero and does not cause the
 		 * extent to wrap
 		 */
-		if ((extent->first + extent->count) <= extent->first)
+		if ((extent.first + extent.count) <= extent.first)
 			goto out;
-		if ((extent->lower_first + extent->count) <=
-		     extent->lower_first)
+		if ((extent.lower_first + extent.count) <=
+		     extent.lower_first)
 			goto out;
 
 		/* Do the ranges in extent overlap any previous extents? */
-		if (mappings_overlap(&new_map, extent))
+		if (mappings_overlap(&new_map, &extent))
 			goto out;
 
-		new_map.nr_extents++;
-
-		/* Fail if the file contains too many extents */
-		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
+		if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
 		    (next_line != NULL))
 			goto out;
+
+		ret = insert_extent(&new_map, &extent);
+		if (ret < 0)
+			goto out;
+		ret = -EINVAL;
+
+		new_map.nr_extents++;
 	}
 	/* Be very certaint the new map actually exists */
 	if (new_map.nr_extents == 0)
@@ -767,16 +1034,26 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
 		goto out;
 
+	ret = sort_idmaps(&new_map);
+	if (ret < 0)
+		goto out;
+
+	ret = -EPERM;
 	/* Map the lower ids from the parent user namespace to the
 	 * kernel global id space.
 	 */
 	for (idx = 0; idx < new_map.nr_extents; idx++) {
+		struct uid_gid_extent *e;
 		u32 lower_first;
-		extent = &new_map.extent[idx];
+
+		if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+			e = &new_map.extent[idx];
+		else
+			e = &new_map.forward[idx];
 
 		lower_first = map_id_range_down(parent_map,
-						extent->lower_first,
-						extent->count);
+						e->lower_first,
+						e->count);
 
 		/* Fail if we can not map the specified extent to
 		 * the kernel global id space.
@@ -784,18 +1061,34 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 		if (lower_first == (u32) -1)
 			goto out;
 
-		extent->lower_first = lower_first;
+		e->lower_first = lower_first;
 	}
 
 	/* Install the map */
-	memcpy(map->extent, new_map.extent,
-		new_map.nr_extents*sizeof(new_map.extent[0]));
+	if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
+		memcpy(map->extent, new_map.extent,
+		       new_map.nr_extents * sizeof(new_map.extent[0]));
+	} else {
+		map->forward = new_map.forward;
+		map->reverse = new_map.reverse;
+	}
 	smp_wmb();
 	map->nr_extents = new_map.nr_extents;
 
 	*ppos = count;
 	ret = count;
 out:
+	if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+		if (new_map.forward)
+			kfree(new_map.forward);
+		map->forward = NULL;
+
+		if (new_map.reverse)
+			kfree(new_map.reverse);
+		map->reverse = NULL;
+		map->nr_extents = 0;
+	}
+
 	mutex_unlock(&userns_state_mutex);
 	kfree(kbuf);
 	return ret;
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2 v5] user namespaces: bump idmap limits to 340
  2017-10-22 18:40 ` [PATCH 2/2 v5] user namespaces: bump idmap limits to 340 Christian Brauner
@ 2017-10-23  8:02   ` Tycho Andersen
  2017-10-23 12:37     ` Christian Brauner
  0 siblings, 1 reply; 4+ messages in thread
From: Tycho Andersen @ 2017-10-23  8:02 UTC (permalink / raw)
  To: Christian Brauner; +Cc: linux-kernel, serge, stgraber, Eric Biederman

On Sun, Oct 22, 2017 at 08:40:36PM +0200, Christian Brauner wrote:
> +/**
> + * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
> + * Takes care to allocate a 4K block of memory if the number of mappings exceeds
> + * UID_GID_MAP_MAX_BASE_EXTENTS.
> + */
> +static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
> +{
> +	if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		map->extent[map->nr_extents].first = extent->first;
> +		map->extent[map->nr_extents].lower_first = extent->lower_first;
> +		map->extent[map->nr_extents].count = extent->count;
> +		return 0;
> +	}
> +
> +	if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		struct uid_gid_extent *forward;
> +
> +		/* Allocate memory for 340 mappings. */
> +		forward = kmalloc(sizeof(struct uid_gid_extent) *
> +				 UID_GID_MAP_MAX_EXTENTS, GFP_KERNEL);
> +		if (IS_ERR(forward))

I think you have the same issue here as in v4, kmalloc doesn't return
-ENOMEM, it returns NULL on failure.

>  static ssize_t map_write(struct file *file, const char __user *buf,
>  			 size_t count, loff_t *ppos,
>  			 int cap_setid,
> @@ -648,7 +911,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  	struct user_namespace *ns = seq->private;
>  	struct uid_gid_map new_map;
>  	unsigned idx;
> -	struct uid_gid_extent *extent = NULL;
> +	struct uid_gid_extent extent;

struct uid_gid_extent = {}; maybe? Then you can drop the memset().

>  	char *kbuf = NULL, *pos, *next_line;
>  	ssize_t ret = -EINVAL;
>  
> @@ -673,6 +936,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  	 */
>  	mutex_lock(&userns_state_mutex);
>  
> +	memset(&new_map, 0, sizeof(struct uid_gid_map));
> +
>  	ret = -EPERM;
>  	/* Only allow one successful write to the map */
>  	if (map->nr_extents != 0)
> @@ -700,9 +965,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  	/* Parse the user data */
>  	ret = -EINVAL;
>  	pos = kbuf;
> -	new_map.nr_extents = 0;
>  	for (; pos; pos = next_line) {
> -		extent = &new_map.extent[new_map.nr_extents];
>  
>  		/* Find the end of line and ensure I don't look past it */
>  		next_line = strchr(pos, '\n');
> @@ -714,17 +977,17 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  		}
>  
>  		pos = skip_spaces(pos);
> -		extent->first = simple_strtoul(pos, &pos, 10);
> +		extent.first = simple_strtoul(pos, &pos, 10);
>  		if (!isspace(*pos))
>  			goto out;
>  
>  		pos = skip_spaces(pos);
> -		extent->lower_first = simple_strtoul(pos, &pos, 10);
> +		extent.lower_first = simple_strtoul(pos, &pos, 10);
>  		if (!isspace(*pos))
>  			goto out;
>  
>  		pos = skip_spaces(pos);
> -		extent->count = simple_strtoul(pos, &pos, 10);
> +		extent.count = simple_strtoul(pos, &pos, 10);
>  		if (*pos && !isspace(*pos))
>  			goto out;
>  
> @@ -734,29 +997,33 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  			goto out;
>  
>  		/* Verify we have been given valid starting values */
> -		if ((extent->first == (u32) -1) ||
> -		    (extent->lower_first == (u32) -1))
> +		if ((extent.first == (u32) -1) ||
> +		    (extent.lower_first == (u32) -1))
>  			goto out;
>  
>  		/* Verify count is not zero and does not cause the
>  		 * extent to wrap
>  		 */
> -		if ((extent->first + extent->count) <= extent->first)
> +		if ((extent.first + extent.count) <= extent.first)
>  			goto out;
> -		if ((extent->lower_first + extent->count) <=
> -		     extent->lower_first)
> +		if ((extent.lower_first + extent.count) <=
> +		     extent.lower_first)
>  			goto out;
>  
>  		/* Do the ranges in extent overlap any previous extents? */
> -		if (mappings_overlap(&new_map, extent))
> +		if (mappings_overlap(&new_map, &extent))
>  			goto out;
>  
> -		new_map.nr_extents++;
> -
> -		/* Fail if the file contains too many extents */
> -		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
> +		if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
>  		    (next_line != NULL))
>  			goto out;
> +
> +		ret = insert_extent(&new_map, &extent);
> +		if (ret < 0)
> +			goto out;
> +		ret = -EINVAL;
> +
> +		new_map.nr_extents++;
>  	}
>  	/* Be very certaint the new map actually exists */
>  	if (new_map.nr_extents == 0)
> @@ -767,16 +1034,26 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
>  		goto out;
>  
> +	ret = sort_idmaps(&new_map);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = -EPERM;
>  	/* Map the lower ids from the parent user namespace to the
>  	 * kernel global id space.
>  	 */
>  	for (idx = 0; idx < new_map.nr_extents; idx++) {
> +		struct uid_gid_extent *e;
>  		u32 lower_first;
> -		extent = &new_map.extent[idx];
> +
> +		if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
> +			e = &new_map.extent[idx];
> +		else
> +			e = &new_map.forward[idx];
>  
>  		lower_first = map_id_range_down(parent_map,
> -						extent->lower_first,
> -						extent->count);
> +						e->lower_first,
> +						e->count);
>  
>  		/* Fail if we can not map the specified extent to
>  		 * the kernel global id space.
> @@ -784,18 +1061,34 @@ static ssize_t map_write(struct file *file, const char __user *buf,
>  		if (lower_first == (u32) -1)
>  			goto out;
>  
> -		extent->lower_first = lower_first;
> +		e->lower_first = lower_first;
>  	}
>  
>  	/* Install the map */
> -	memcpy(map->extent, new_map.extent,
> -		new_map.nr_extents*sizeof(new_map.extent[0]));
> +	if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		memcpy(map->extent, new_map.extent,
> +		       new_map.nr_extents * sizeof(new_map.extent[0]));
> +	} else {
> +		map->forward = new_map.forward;
> +		map->reverse = new_map.reverse;
> +	}
>  	smp_wmb();
>  	map->nr_extents = new_map.nr_extents;
>  
>  	*ppos = count;
>  	ret = count;
>  out:
> +	if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		if (new_map.forward)
> +			kfree(new_map.forward);

It's safe to pass null to kfree(), so I think you can just do,

kfree(new_map.forward);

and in the other places in this function.

Tycho

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2 v5] user namespaces: bump idmap limits to 340
  2017-10-23  8:02   ` Tycho Andersen
@ 2017-10-23 12:37     ` Christian Brauner
  0 siblings, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2017-10-23 12:37 UTC (permalink / raw)
  To: Tycho Andersen
  Cc: Christian Brauner, linux-kernel, serge, stgraber, Eric Biederman

On Mon, Oct 23, 2017 at 10:02:23AM +0200, Tycho Andersen wrote:
> On Sun, Oct 22, 2017 at 08:40:36PM +0200, Christian Brauner wrote:
> > +/**
> > + * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
> > + * Takes care to allocate a 4K block of memory if the number of mappings exceeds
> > + * UID_GID_MAP_MAX_BASE_EXTENTS.
> > + */
> > +static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
> > +{
> > +	if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS) {
> > +		map->extent[map->nr_extents].first = extent->first;
> > +		map->extent[map->nr_extents].lower_first = extent->lower_first;
> > +		map->extent[map->nr_extents].count = extent->count;
> > +		return 0;
> > +	}
> > +
> > +	if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
> > +		struct uid_gid_extent *forward;
> > +
> > +		/* Allocate memory for 340 mappings. */
> > +		forward = kmalloc(sizeof(struct uid_gid_extent) *
> > +				 UID_GID_MAP_MAX_EXTENTS, GFP_KERNEL);
> > +		if (IS_ERR(forward))
> 
> I think you have the same issue here as in v4, kmalloc doesn't return
> -ENOMEM, it returns NULL on failure.

Ah, I somehow was convinced that only kmemdup() simply returned NULL. Thanks,
Tycho.

> 
> >  static ssize_t map_write(struct file *file, const char __user *buf,
> >  			 size_t count, loff_t *ppos,
> >  			 int cap_setid,
> > @@ -648,7 +911,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  	struct user_namespace *ns = seq->private;
> >  	struct uid_gid_map new_map;
> >  	unsigned idx;
> > -	struct uid_gid_extent *extent = NULL;
> > +	struct uid_gid_extent extent;
> 
> struct uid_gid_extent = {}; maybe? Then you can drop the memset().

I don't think this is right. First, I don't care about struct uid_gid_extent to
be zeroed. What I care about is struct uid_gid_map new_map to be zeroed. This
needs to be done otherwise when the condition if (map->nr_extents != 0) is true
further down below you'd jump to unitialized memory in the cleanup code below.
Also, any modification of struct uid_gid_map_new new_map needs to be done while
holding the mutex lock. So I think keeping the memset() here is ok.

> 
> >  	char *kbuf = NULL, *pos, *next_line;
> >  	ssize_t ret = -EINVAL;
> >  
> > @@ -673,6 +936,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  	 */
> >  	mutex_lock(&userns_state_mutex);
> >  
> > +	memset(&new_map, 0, sizeof(struct uid_gid_map));
> > +
> >  	ret = -EPERM;
> >  	/* Only allow one successful write to the map */
> >  	if (map->nr_extents != 0)
> > @@ -700,9 +965,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  	/* Parse the user data */
> >  	ret = -EINVAL;
> >  	pos = kbuf;
> > -	new_map.nr_extents = 0;
> >  	for (; pos; pos = next_line) {
> > -		extent = &new_map.extent[new_map.nr_extents];
> >  
> >  		/* Find the end of line and ensure I don't look past it */
> >  		next_line = strchr(pos, '\n');
> > @@ -714,17 +977,17 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  		}
> >  
> >  		pos = skip_spaces(pos);
> > -		extent->first = simple_strtoul(pos, &pos, 10);
> > +		extent.first = simple_strtoul(pos, &pos, 10);
> >  		if (!isspace(*pos))
> >  			goto out;
> >  
> >  		pos = skip_spaces(pos);
> > -		extent->lower_first = simple_strtoul(pos, &pos, 10);
> > +		extent.lower_first = simple_strtoul(pos, &pos, 10);
> >  		if (!isspace(*pos))
> >  			goto out;
> >  
> >  		pos = skip_spaces(pos);
> > -		extent->count = simple_strtoul(pos, &pos, 10);
> > +		extent.count = simple_strtoul(pos, &pos, 10);
> >  		if (*pos && !isspace(*pos))
> >  			goto out;
> >  
> > @@ -734,29 +997,33 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  			goto out;
> >  
> >  		/* Verify we have been given valid starting values */
> > -		if ((extent->first == (u32) -1) ||
> > -		    (extent->lower_first == (u32) -1))
> > +		if ((extent.first == (u32) -1) ||
> > +		    (extent.lower_first == (u32) -1))
> >  			goto out;
> >  
> >  		/* Verify count is not zero and does not cause the
> >  		 * extent to wrap
> >  		 */
> > -		if ((extent->first + extent->count) <= extent->first)
> > +		if ((extent.first + extent.count) <= extent.first)
> >  			goto out;
> > -		if ((extent->lower_first + extent->count) <=
> > -		     extent->lower_first)
> > +		if ((extent.lower_first + extent.count) <=
> > +		     extent.lower_first)
> >  			goto out;
> >  
> >  		/* Do the ranges in extent overlap any previous extents? */
> > -		if (mappings_overlap(&new_map, extent))
> > +		if (mappings_overlap(&new_map, &extent))
> >  			goto out;
> >  
> > -		new_map.nr_extents++;
> > -
> > -		/* Fail if the file contains too many extents */
> > -		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
> > +		if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
> >  		    (next_line != NULL))
> >  			goto out;
> > +
> > +		ret = insert_extent(&new_map, &extent);
> > +		if (ret < 0)
> > +			goto out;
> > +		ret = -EINVAL;
> > +
> > +		new_map.nr_extents++;
> >  	}
> >  	/* Be very certaint the new map actually exists */
> >  	if (new_map.nr_extents == 0)
> > @@ -767,16 +1034,26 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
> >  		goto out;
> >  
> > +	ret = sort_idmaps(&new_map);
> > +	if (ret < 0)
> > +		goto out;
> > +
> > +	ret = -EPERM;
> >  	/* Map the lower ids from the parent user namespace to the
> >  	 * kernel global id space.
> >  	 */
> >  	for (idx = 0; idx < new_map.nr_extents; idx++) {
> > +		struct uid_gid_extent *e;
> >  		u32 lower_first;
> > -		extent = &new_map.extent[idx];
> > +
> > +		if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
> > +			e = &new_map.extent[idx];
> > +		else
> > +			e = &new_map.forward[idx];
> >  
> >  		lower_first = map_id_range_down(parent_map,
> > -						extent->lower_first,
> > -						extent->count);
> > +						e->lower_first,
> > +						e->count);
> >  
> >  		/* Fail if we can not map the specified extent to
> >  		 * the kernel global id space.
> > @@ -784,18 +1061,34 @@ static ssize_t map_write(struct file *file, const char __user *buf,
> >  		if (lower_first == (u32) -1)
> >  			goto out;
> >  
> > -		extent->lower_first = lower_first;
> > +		e->lower_first = lower_first;
> >  	}
> >  
> >  	/* Install the map */
> > -	memcpy(map->extent, new_map.extent,
> > -		new_map.nr_extents*sizeof(new_map.extent[0]));
> > +	if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
> > +		memcpy(map->extent, new_map.extent,
> > +		       new_map.nr_extents * sizeof(new_map.extent[0]));
> > +	} else {
> > +		map->forward = new_map.forward;
> > +		map->reverse = new_map.reverse;
> > +	}
> >  	smp_wmb();
> >  	map->nr_extents = new_map.nr_extents;
> >  
> >  	*ppos = count;
> >  	ret = count;
> >  out:
> > +	if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> > +		if (new_map.forward)
> > +			kfree(new_map.forward);
> 
> It's safe to pass null to kfree(), so I think you can just do,
> 
> kfree(new_map.forward);
> 
> and in the other places in this function.

Yeah, that sounds right. Thanks!

> 
> Tycho

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-10-23 12:37 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-22 18:40 [PATCH 1/2 v5] user namespace: use union in {g,u}idmap struct Christian Brauner
2017-10-22 18:40 ` [PATCH 2/2 v5] user namespaces: bump idmap limits to 340 Christian Brauner
2017-10-23  8:02   ` Tycho Andersen
2017-10-23 12:37     ` Christian Brauner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.