All of lore.kernel.org
 help / color / mirror / Atom feed
From: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Manfred Spraul <manfred@colorfullife.com>,
	Kees Cook <keescook@chromium.org>,
	Davidlohr Bueso <dave@stgolabs.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Serge Hallyn <serge@hallyn.com>, Andrey Vagin <avagin@openvz.org>,
	Guillaume Knispel <guillaume.knispel@supersonicimagine.com>,
	Marc Pardo <marc.pardo@supersonicimagine.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH] ipc: optimize semget/shmget/msgget for lots of keys
Date: Mon, 31 Jul 2017 10:42:37 +0200	[thread overview]
Message-ID: <20170731084237.GA123231@ubuntu> (raw)

ipc_findkey() scanned all objects to look for the wanted key. This is
slow when using a high number of keys, for example on an i5 laptop the
following loop took 17 s, with last semget calls taking ~1 ms each.

    for (int i = 0, k=0x424242; i < 31900; ++i)
        semget(k++, 1, IPC_CREAT | 0600);

This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so
that one lookup cease to be O(n). The above loop now takes ~10 ms. Each
lookup-only semget() call can take between ~120 ns and a few µs. Rarely,
some creations can take a few dozen of µs.

I also ran 'syscalls-ipc' and 'containers' of ltp 20170516, including
with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB,
CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
CONFIG_DEBUG_ATOMIC_SLEEP, and CONFIG_DEBUG_OBJECTS_RCU_HEAD, and did
not detect any regression (compared to 4.13-rc2, running Ubuntu 16.04.2)
[I did not manage to enable CONFIG_PROVE_RCU]

Signed-off-by: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Reviewed-by: Marc Pardo <marc.pardo@supersonicimagine.com>
---
 include/linux/ipc.h           |  3 ++
 include/linux/ipc_namespace.h |  3 ++
 ipc/msg.c                     | 10 +++--
 ipc/namespace.c               | 20 +++++++--
 ipc/sem.c                     | 11 +++--
 ipc/shm.c                     | 14 +++---
 ipc/util.c                    | 99 ++++++++++++++++++++++++++++++++-----------
 ipc/util.h                    | 21 +++++----
 8 files changed, 130 insertions(+), 51 deletions(-)

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index fadd579..f480f22 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -3,6 +3,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/uidgid.h>
+#include <linux/rhashtable.h>
 #include <uapi/linux/ipc.h>
 
 #define IPCMNI 32768  /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
@@ -21,6 +22,8 @@ struct kern_ipc_perm {
 	unsigned long	seq;
 	void		*security;
 
+	struct rhash_head khtnode;
+
 	struct rcu_head rcu;
 	atomic_t refcount;
 } ____cacheline_aligned_in_smp __randomize_layout;
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 65327ee..aea3b81 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -7,15 +7,18 @@
 #include <linux/notifier.h>
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
+#include <linux/rhashtable.h>
 
 struct user_namespace;
 
 struct ipc_ids {
 	int in_use;
 	unsigned short seq;
+	bool tables_initialized;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
 	int next_id;
+	struct rhashtable key_ht;
 };
 
 struct ipc_namespace {
diff --git a/ipc/msg.c b/ipc/msg.c
index 5b25e07..f123042 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1011,7 +1011,7 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 }
 
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
@@ -1019,7 +1019,7 @@ void msg_init_ns(struct ipc_namespace *ns)
 
 	atomic_set(&ns->msg_bytes, 0);
 	atomic_set(&ns->msg_hdrs, 0);
-	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
 }
 
 #ifdef CONFIG_IPC_NS
@@ -1027,6 +1027,7 @@ void msg_exit_ns(struct ipc_namespace *ns)
 {
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
+	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
 }
 #endif
 
@@ -1057,11 +1058,12 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
 }
 #endif
 
-void __init msg_init(void)
+int __init msg_init(void)
 {
-	msg_init_ns(&init_ipc_ns);
+	const int err = msg_init_ns(&init_ipc_ns);
 
 	ipc_init_proc_interface("sysvipc/msg",
 				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
 				IPC_MSG_IDS, sysvipc_msg_proc_show);
+	return err;
 }
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b4d80f9..4130f48 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -54,16 +54,28 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	ns->user_ns = get_user_ns(user_ns);
 	ns->ucounts = ucounts;
 
-	err = mq_init_ns(ns);
+	err = sem_init_ns(ns);
 	if (err)
 		goto fail_put;
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_destroy_sem;
+	err = shm_init_ns(ns);
+	if (err)
+		goto fail_destroy_msg;
 
-	sem_init_ns(ns);
-	msg_init_ns(ns);
-	shm_init_ns(ns);
+	err = mq_init_ns(ns);
+	if (err)
+		goto fail_destroy_shm;
 
 	return ns;
 
+fail_destroy_shm:
+	shm_exit_ns(ns);
+fail_destroy_msg:
+	msg_exit_ns(ns);
+fail_destroy_sem:
+	sem_exit_ns(ns);
 fail_put:
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
diff --git a/ipc/sem.c b/ipc/sem.c
index 9e70cd7..b13ecfd 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -185,14 +185,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #define sc_semopm	sem_ctls[2]
 #define sc_semmni	sem_ctls[3]
 
-void sem_init_ns(struct ipc_namespace *ns)
+int sem_init_ns(struct ipc_namespace *ns)
 {
 	ns->sc_semmsl = SEMMSL;
 	ns->sc_semmns = SEMMNS;
 	ns->sc_semopm = SEMOPM;
 	ns->sc_semmni = SEMMNI;
 	ns->used_sems = 0;
-	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
+	return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
 }
 
 #ifdef CONFIG_IPC_NS
@@ -200,15 +200,18 @@ void sem_exit_ns(struct ipc_namespace *ns)
 {
 	free_ipcs(ns, &sem_ids(ns), freeary);
 	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
+	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
 }
 #endif
 
-void __init sem_init(void)
+int __init sem_init(void)
 {
-	sem_init_ns(&init_ipc_ns);
+	const int err = sem_init_ns(&init_ipc_ns);
+
 	ipc_init_proc_interface("sysvipc/sem",
 				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
 				IPC_SEM_IDS, sysvipc_sem_proc_show);
+	return err;
 }
 
 /**
diff --git a/ipc/shm.c b/ipc/shm.c
index 28a4448..5fa7576 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -72,14 +72,14 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
 
-void shm_init_ns(struct ipc_namespace *ns)
+int shm_init_ns(struct ipc_namespace *ns)
 {
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
 	ns->shm_rmid_forced = 0;
 	ns->shm_tot = 0;
-	ipc_init_ids(&shm_ids(ns));
+	return ipc_init_ids(&shm_ids(ns));
 }
 
 /*
@@ -95,7 +95,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	if (shp->shm_nattch) {
 		shp->shm_perm.mode |= SHM_DEST;
 		/* Do not find it any more */
-		shp->shm_perm.key = IPC_PRIVATE;
+		ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
 		shm_unlock(shp);
 	} else
 		shm_destroy(ns, shp);
@@ -106,16 +106,18 @@ void shm_exit_ns(struct ipc_namespace *ns)
 {
 	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
 	idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
+	rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
 }
 #endif
 
 static int __init ipc_ns_init(void)
 {
-	shm_init_ns(&init_ipc_ns);
-	return 0;
+	const int err = shm_init_ns(&init_ipc_ns);
+	WARN(err, "ipc: sysV shm_init_ns failed: %d\n", err);
+	return err;
 }
 
-pure_initcall(ipc_ns_init);
+core_initcall(ipc_ns_init);
 
 void __init shm_init(void)
 {
diff --git a/ipc/util.c b/ipc/util.c
index 1a2cb02..621e9fc 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -83,27 +83,45 @@ struct ipc_proc_iface {
  */
 static int __init ipc_init(void)
 {
-	sem_init();
-	msg_init();
+	int err_sem, err_msg;
+
+	err_sem = sem_init();
+	WARN(err_sem, "ipc: sysV sem_init failed: %d\n", err_sem);
+	err_msg = msg_init();
+	WARN(err_msg, "ipc: sysV msg_init failed: %d\n", err_msg);
 	shm_init();
-	return 0;
+
+	return err_msg ? err_msg : err_sem;
 }
 device_initcall(ipc_init);
 
+static const struct rhashtable_params ipc_kht_params = {
+	.head_offset		= offsetof(struct kern_ipc_perm, khtnode),
+	.key_offset		= offsetof(struct kern_ipc_perm, key),
+	.key_len		= FIELD_SIZEOF(struct kern_ipc_perm, key),
+	.automatic_shrinking	= true,
+};
+
 /**
  * ipc_init_ids	- initialise ipc identifiers
  * @ids: ipc identifier set
  *
  * Set up the sequence range to use for the ipc identifier range (limited
- * below IPCMNI) then initialise the ids idr.
+ * below IPCMNI) then initialise the keys hashtable and ids idr.
  */
-void ipc_init_ids(struct ipc_ids *ids)
+int ipc_init_ids(struct ipc_ids *ids)
 {
+	int err;
 	ids->in_use = 0;
 	ids->seq = 0;
 	ids->next_id = -1;
 	init_rwsem(&ids->rwsem);
+	err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
+	if (err)
+		return err;
 	idr_init(&ids->ipcs_idr);
+	ids->tables_initialized = true;
+	return 0;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -147,28 +165,20 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  * Returns the locked pointer to the ipc structure if found or NULL
  * otherwise. If key is found ipc points to the owning ipc structure
  *
- * Called with ipc_ids.rwsem held.
+ * Called with writer ipc_ids.rwsem held.
  */
 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	struct kern_ipc_perm *ipc;
-	int next_id;
-	int total;
-
-	for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
-		ipc = idr_find(&ids->ipcs_idr, next_id);
-
-		if (ipc == NULL)
-			continue;
+	struct kern_ipc_perm *ipcp = NULL;
 
-		if (ipc->key != key) {
-			total++;
-			continue;
-		}
+	if (likely(ids->tables_initialized))
+		ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+					      ipc_kht_params);
 
+	if (ipcp) {
 		rcu_read_lock();
-		ipc_lock_object(ipc);
-		return ipc;
+		ipc_lock_object(ipcp);
+		return ipcp;
 	}
 
 	return NULL;
@@ -221,13 +231,13 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 {
 	kuid_t euid;
 	kgid_t egid;
-	int id;
+	int id, err;
 	int next_id = ids->next_id;
 
 	if (size > IPCMNI)
 		size = IPCMNI;
 
-	if (ids->in_use >= size)
+	if (!ids->tables_initialized || ids->in_use >= size)
 		return -ENOSPC;
 
 	idr_preload(GFP_KERNEL);
@@ -246,6 +256,15 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 		       (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0,
 		       GFP_NOWAIT);
 	idr_preload_end();
+
+	if (id >= 0 && new->key != IPC_PRIVATE) {
+		err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
+					     ipc_kht_params);
+		if (err < 0) {
+			idr_remove(&ids->ipcs_idr, id);
+			id = err;
+		}
+	}
 	if (id < 0) {
 		spin_unlock(&new->lock);
 		rcu_read_unlock();
@@ -377,6 +396,20 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
 	return err;
 }
 
+/**
+ * ipc_kht_remove - remove an ipc from the key hashtable
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to remove
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+	if (ipcp->key != IPC_PRIVATE)
+		rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+				       ipc_kht_params);
+}
 
 /**
  * ipc_rmid - remove an ipc identifier
@@ -391,10 +424,25 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 	int lid = ipcid_to_idx(ipcp->id);
 
 	idr_remove(&ids->ipcs_idr, lid);
+	ipc_kht_remove(ids, ipcp);
 	ids->in_use--;
 	ipcp->deleted = true;
 }
 
+/**
+ * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to modify
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+	ipc_kht_remove(ids, ipcp);
+	ipcp->key = IPC_PRIVATE;
+}
+
 int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
 	return atomic_inc_not_zero(&ptr->refcount);
@@ -485,7 +533,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
 }
 
 /**
- * ipc_obtain_object
+ * ipc_obtain_object_idr
  * @ids: ipc identifier set
  * @id: ipc id to look for
  *
@@ -499,6 +547,9 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
 	struct kern_ipc_perm *out;
 	int lid = ipcid_to_idx(id);
 
+	if (unlikely(!ids->tables_initialized))
+		return ERR_PTR(-EINVAL);
+
 	out = idr_find(&ids->ipcs_idr, lid);
 	if (!out)
 		return ERR_PTR(-EINVAL);
diff --git a/ipc/util.h b/ipc/util.h
index c692010..80c9f51 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -15,8 +15,8 @@
 
 #define SEQ_MULTIPLIER	(IPCMNI)
 
-void sem_init(void);
-void msg_init(void);
+int sem_init(void);
+int msg_init(void);
 void shm_init(void);
 
 struct ipc_namespace;
@@ -30,17 +30,17 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 #endif
 
 #ifdef CONFIG_SYSVIPC
-void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
-void shm_init_ns(struct ipc_namespace *ns);
+int sem_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
+int shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
-static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
-static inline void shm_init_ns(struct ipc_namespace *ns) { }
+static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
 static inline void msg_exit_ns(struct ipc_namespace *ns) { }
@@ -79,7 +79,7 @@ struct ipc_ops {
 struct seq_file;
 struct ipc_ids;
 
-void ipc_init_ids(struct ipc_ids *);
+int ipc_init_ids(struct ipc_ids *);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
 		int ids, int (*show)(struct seq_file *, void *));
@@ -104,6 +104,9 @@ int ipc_get_maxid(struct ipc_ids *);
 /* must be called with both locks acquired. */
 void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 
+/* must be called with both locks acquired. */
+void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
+
 /* must be called with ipcp locked */
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
-- 
2.7.4

             reply	other threads:[~2017-07-31  8:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-31  8:42 Guillaume Knispel [this message]
2017-07-31 15:45 ` [PATCH] ipc: optimize semget/shmget/msgget for lots of keys Davidlohr Bueso
2017-08-01  1:17   ` Guillaume Knispel
2017-08-01 15:51     ` Davidlohr Bueso
2017-08-02 20:06 ` Davidlohr Bueso
2017-08-03 17:14   ` Guillaume Knispel
2017-08-07 17:33     ` Davidlohr Bueso
2017-08-07 18:21 ` Davidlohr Bueso
2017-08-09 16:15   ` Guillaume Knispel
2017-08-14  6:05 ` [lkp-robot] [ipc] cb6268f05d: reaim.jobs_per_min 865% improvement kernel test robot
2017-08-14  6:05   ` kernel test robot
2017-08-14 17:59   ` Kees Cook
2017-08-14 17:59     ` Kees Cook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170731084237.GA123231@ubuntu \
    --to=guillaume.knispel@supersonicimagine.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=avagin@openvz.org \
    --cc=bigeasy@linutronix.de \
    --cc=dave@stgolabs.net \
    --cc=ebiederm@xmission.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=marc.pardo@supersonicimagine.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=serge@hallyn.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.