All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-22 15:25 ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-22 15:25 UTC (permalink / raw)
  To: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

This patch adds support for shm_rmid_forced sysctl.  If set to 1, all
shared memory objects in current ipc namespace will be automatically
forced to use IPC_RMID.  POSIX way of handling shmem allows to create
shm objects and call shmdt() leaving shm object associated with no
process, thus consuming memory not counted via rlimits.  With
shm_rmid_forced=1 the shared memory object is counted at least for one
process, so OOM killer may effectively kill the fat process holding
the shared memory.

It obviously breaks POSIX, some programs relying on the feature would
stop working.  So, set shm_rmid_forced=1 only if you're sure nobody uses
"orphaned" memory.  shm_rmid_forced=0 by default for compatability
reasons.

The feature was previously impemented in -ow as a configure option.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
---

 Documentation/sysctl/kernel.txt |   22 ++++++++++
 include/linux/ipc_namespace.h   |    3 +
 include/linux/shm.h             |    5 ++
 ipc/ipc_sysctl.c                |   33 +++++++++++++++
 ipc/shm.c                       |   87 +++++++++++++++++++++++++++++++++++++--
 kernel/exit.c                   |    1 +
 6 files changed, 147 insertions(+), 4 deletions(-)

---
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 5e7cb39..7a0ecfd 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -62,6 +62,7 @@ show up in /proc/sys/kernel:
 - shmall
 - shmmax                      [ sysv ipc ]
 - shmmni
+- shm_rmid_forced
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/sysrq.txt
 - tainted
@@ -475,6 +476,27 @@ kernel.  This value defaults to SHMMAX.
 
 ==============================================================
 
+shm_rmid_forced:
+
+Linux lets you set resource limits, including on how much memory one
+process can consume, via setrlimit(2).  Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits.  If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination.  It will
+also destroy segments that were created, but never attached to, on exit
+from the process.  The only use left for IPC_RMID is to immediately
+destroy an unattached segment.  Of course, this breaks the way things are
+defined, so some applications might stop working.  Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+==============================================================
+
 softlockup_thresh:
 
 This value can be used to lower the softlockup tolerance threshold.  The
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a6d1655..1647795 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -44,6 +44,7 @@ struct ipc_namespace {
 	size_t		shm_ctlall;
 	int		shm_ctlmni;
 	int		shm_tot;
+	int		shm_rmid_forced;
 
 	struct notifier_block ipcns_nb;
 
@@ -72,6 +73,7 @@ extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
 extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
+extern void shm_destroy_orphaned(struct ipc_namespace *ns);
 #else /* CONFIG_SYSVIPC */
 static inline int register_ipcns_notifier(struct ipc_namespace *ns)
 { return 0; }
@@ -79,6 +81,7 @@ static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
 { return 0; }
 static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
 static inline int ipcns_notify(unsigned long l) { return 0; }
+static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #endif /* CONFIG_SYSVIPC */
 
 #ifdef CONFIG_POSIX_MQUEUE
diff --git a/include/linux/shm.h b/include/linux/shm.h
index eca6235..b030a4e 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -106,6 +106,7 @@ struct shmid_kernel /* private to the kernel */
 #ifdef CONFIG_SYSVIPC
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr);
 extern int is_file_shm_hugepages(struct file *file);
+extern void exit_shm(struct task_struct *task);
 #else
 static inline long do_shmat(int shmid, char __user *shmaddr,
 				int shmflg, unsigned long *addr)
@@ -116,6 +117,10 @@ static inline int is_file_shm_hugepages(struct file *file)
 {
 	return 0;
 }
+static inline void exit_shm(struct task_struct *task)
+{
+	return 0;
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 56410fa..1d36879 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -37,6 +37,28 @@ static int proc_ipc_dointvec(ctl_table *table, int write,
 	return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 }
 
+static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table ipc_table;
+	memcpy(&ipc_table, table, sizeof(ipc_table));
+	ipc_table.data = get_ipc(table);
+
+	return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+}
+
+static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+	int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (ns->shm_rmid_forced)
+		shm_destroy_orphaned(ns);
+	return err;
+}
+
 static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -125,6 +147,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 #else
 #define proc_ipc_doulongvec_minmax NULL
 #define proc_ipc_dointvec	   NULL
+#define proc_ipc_dointvec_minmax   NULL
+#define proc_ipc_dointvec_minmax_orphans   NULL
 #define proc_ipc_callback_dointvec NULL
 #define proc_ipcauto_dointvec_minmax NULL
 #endif
@@ -155,6 +179,15 @@ static struct ctl_table ipc_kern_table[] = {
 		.proc_handler	= proc_ipc_dointvec,
 	},
 	{
+		.procname	= "shm_rmid_forced",
+		.data		= &init_ipc_ns.shm_rmid_forced,
+		.maxlen		= sizeof(init_ipc_ns.shm_rmid_forced),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax_orphans,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
 		.procname	= "msgmax",
 		.data		= &init_ipc_ns.msg_ctlmax,
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmax),
diff --git a/ipc/shm.c b/ipc/shm.c
index ab3385a..91b3026 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
+	ns->shm_rmid_forced = 0;
 	ns->shm_tot = 0;
 	ipc_init_ids(&shm_ids(ns));
 }
@@ -186,6 +187,13 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	ipc_rcu_putref(shp);
 }
 
+static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+	return (shp->shm_nattch == 0) &&
+	       (ns->shm_rmid_forced ||
+		(shp->shm_perm.mode & SHM_DEST));
+}
+
 /*
  * remove the attach descriptor vma.
  * free memory for segment if it is marked destroyed.
@@ -206,11 +214,83 @@ static void shm_close(struct vm_area_struct *vma)
 	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
-	if(shp->shm_nattch == 0 &&
-	   shp->shm_perm.mode & SHM_DEST)
+	if (shm_may_destroy(ns, shp))
+		shm_destroy(ns, shp);
+	else
+		shm_unlock(shp);
+	up_write(&shm_ids(ns).rw_mutex);
+}
+
+static int shm_try_destroy_current(int id, void *p, void *data)
+{
+	struct ipc_namespace *ns = data;
+	struct shmid_kernel *shp = shm_lock(ns, id);
+
+	if (IS_ERR(shp))
+		return 0;
+
+	if (shp->shm_cprid != task_tgid_vnr(current)) {
+		shm_unlock(shp);
+		return 0;
+	}
+
+	if (shm_may_destroy(ns, shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
+	return 0;
+}
+
+static int shm_try_destroy_orphaned(int id, void *p, void *data)
+{
+	struct ipc_namespace *ns = data;
+	struct shmid_kernel *shp = shm_lock(ns, id);
+	struct task_struct *task;
+
+	if (IS_ERR(shp))
+		return 0;
+
+	/*
+	 * We want to destroy segments without users and with already
+	 * exit'ed originating process.
+	 *
+	 * XXX: the originating process may exist in another pid namespace.
+	 */
+	task = find_task_by_vpid(shp->shm_cprid);
+	if (task != NULL) {
+		shm_unlock(shp);
+		return 0;
+	}
+
+	if (shm_may_destroy(ns, shp))
+		shm_destroy(ns, shp);
+	else
+		shm_unlock(shp);
+	return 0;
+}
+
+void shm_destroy_orphaned(struct ipc_namespace *ns)
+{
+	down_write(&shm_ids(ns).rw_mutex);
+	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
+	up_write(&shm_ids(ns).rw_mutex);
+}
+
+
+void exit_shm(struct task_struct *task)
+{
+	struct nsproxy *nsp = task->nsproxy;
+	struct ipc_namespace *ns;
+
+	if (!nsp)
+		return;
+	ns = nsp->ipc_ns;
+	if (!ns || !ns->shm_rmid_forced)
+		return;
+
+	/* Destroy all already created segments, but not mapped yet */
+	down_write(&shm_ids(ns).rw_mutex);
+	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
 
@@ -950,8 +1030,7 @@ out_nattch:
 	shp = shm_lock(ns, shmid);
 	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
-	if(shp->shm_nattch == 0 &&
-	   shp->shm_perm.mode & SHM_DEST)
+	if (shm_may_destroy(ns, shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
diff --git a/kernel/exit.c b/kernel/exit.c
index 20a4064..816c610 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -991,6 +991,7 @@ NORET_TYPE void do_exit(long code)
 	trace_sched_process_exit(tsk);
 
 	exit_sem(tsk);
+	exit_shm(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
 	check_stack_usage();
---

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [kernel-hardening] [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-22 15:25 ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-22 15:25 UTC (permalink / raw)
  To: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

This patch adds support for shm_rmid_forced sysctl.  If set to 1, all
shared memory objects in current ipc namespace will be automatically
forced to use IPC_RMID.  POSIX way of handling shmem allows to create
shm objects and call shmdt() leaving shm object associated with no
process, thus consuming memory not counted via rlimits.  With
shm_rmid_forced=1 the shared memory object is counted at least for one
process, so OOM killer may effectively kill the fat process holding
the shared memory.

It obviously breaks POSIX, some programs relying on the feature would
stop working.  So, set shm_rmid_forced=1 only if you're sure nobody uses
"orphaned" memory.  shm_rmid_forced=0 by default for compatability
reasons.

The feature was previously impemented in -ow as a configure option.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
---

 Documentation/sysctl/kernel.txt |   22 ++++++++++
 include/linux/ipc_namespace.h   |    3 +
 include/linux/shm.h             |    5 ++
 ipc/ipc_sysctl.c                |   33 +++++++++++++++
 ipc/shm.c                       |   87 +++++++++++++++++++++++++++++++++++++--
 kernel/exit.c                   |    1 +
 6 files changed, 147 insertions(+), 4 deletions(-)

---
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 5e7cb39..7a0ecfd 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -62,6 +62,7 @@ show up in /proc/sys/kernel:
 - shmall
 - shmmax                      [ sysv ipc ]
 - shmmni
+- shm_rmid_forced
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/sysrq.txt
 - tainted
@@ -475,6 +476,27 @@ kernel.  This value defaults to SHMMAX.
 
 ==============================================================
 
+shm_rmid_forced:
+
+Linux lets you set resource limits, including on how much memory one
+process can consume, via setrlimit(2).  Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits.  If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination.  It will
+also destroy segments that were created, but never attached to, on exit
+from the process.  The only use left for IPC_RMID is to immediately
+destroy an unattached segment.  Of course, this breaks the way things are
+defined, so some applications might stop working.  Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+==============================================================
+
 softlockup_thresh:
 
 This value can be used to lower the softlockup tolerance threshold.  The
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a6d1655..1647795 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -44,6 +44,7 @@ struct ipc_namespace {
 	size_t		shm_ctlall;
 	int		shm_ctlmni;
 	int		shm_tot;
+	int		shm_rmid_forced;
 
 	struct notifier_block ipcns_nb;
 
@@ -72,6 +73,7 @@ extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
 extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
+extern void shm_destroy_orphaned(struct ipc_namespace *ns);
 #else /* CONFIG_SYSVIPC */
 static inline int register_ipcns_notifier(struct ipc_namespace *ns)
 { return 0; }
@@ -79,6 +81,7 @@ static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
 { return 0; }
 static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
 static inline int ipcns_notify(unsigned long l) { return 0; }
+static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #endif /* CONFIG_SYSVIPC */
 
 #ifdef CONFIG_POSIX_MQUEUE
diff --git a/include/linux/shm.h b/include/linux/shm.h
index eca6235..b030a4e 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -106,6 +106,7 @@ struct shmid_kernel /* private to the kernel */
 #ifdef CONFIG_SYSVIPC
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr);
 extern int is_file_shm_hugepages(struct file *file);
+extern void exit_shm(struct task_struct *task);
 #else
 static inline long do_shmat(int shmid, char __user *shmaddr,
 				int shmflg, unsigned long *addr)
@@ -116,6 +117,10 @@ static inline int is_file_shm_hugepages(struct file *file)
 {
 	return 0;
 }
+static inline void exit_shm(struct task_struct *task)
+{
+	return 0;
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 56410fa..1d36879 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -37,6 +37,28 @@ static int proc_ipc_dointvec(ctl_table *table, int write,
 	return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 }
 
+static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table ipc_table;
+	memcpy(&ipc_table, table, sizeof(ipc_table));
+	ipc_table.data = get_ipc(table);
+
+	return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+}
+
+static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+	int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (ns->shm_rmid_forced)
+		shm_destroy_orphaned(ns);
+	return err;
+}
+
 static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -125,6 +147,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 #else
 #define proc_ipc_doulongvec_minmax NULL
 #define proc_ipc_dointvec	   NULL
+#define proc_ipc_dointvec_minmax   NULL
+#define proc_ipc_dointvec_minmax_orphans   NULL
 #define proc_ipc_callback_dointvec NULL
 #define proc_ipcauto_dointvec_minmax NULL
 #endif
@@ -155,6 +179,15 @@ static struct ctl_table ipc_kern_table[] = {
 		.proc_handler	= proc_ipc_dointvec,
 	},
 	{
+		.procname	= "shm_rmid_forced",
+		.data		= &init_ipc_ns.shm_rmid_forced,
+		.maxlen		= sizeof(init_ipc_ns.shm_rmid_forced),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax_orphans,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
 		.procname	= "msgmax",
 		.data		= &init_ipc_ns.msg_ctlmax,
 		.maxlen		= sizeof (init_ipc_ns.msg_ctlmax),
diff --git a/ipc/shm.c b/ipc/shm.c
index ab3385a..91b3026 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
+	ns->shm_rmid_forced = 0;
 	ns->shm_tot = 0;
 	ipc_init_ids(&shm_ids(ns));
 }
@@ -186,6 +187,13 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	ipc_rcu_putref(shp);
 }
 
+static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+	return (shp->shm_nattch == 0) &&
+	       (ns->shm_rmid_forced ||
+		(shp->shm_perm.mode & SHM_DEST));
+}
+
 /*
  * remove the attach descriptor vma.
  * free memory for segment if it is marked destroyed.
@@ -206,11 +214,83 @@ static void shm_close(struct vm_area_struct *vma)
 	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
-	if(shp->shm_nattch == 0 &&
-	   shp->shm_perm.mode & SHM_DEST)
+	if (shm_may_destroy(ns, shp))
+		shm_destroy(ns, shp);
+	else
+		shm_unlock(shp);
+	up_write(&shm_ids(ns).rw_mutex);
+}
+
+static int shm_try_destroy_current(int id, void *p, void *data)
+{
+	struct ipc_namespace *ns = data;
+	struct shmid_kernel *shp = shm_lock(ns, id);
+
+	if (IS_ERR(shp))
+		return 0;
+
+	if (shp->shm_cprid != task_tgid_vnr(current)) {
+		shm_unlock(shp);
+		return 0;
+	}
+
+	if (shm_may_destroy(ns, shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
+	return 0;
+}
+
+static int shm_try_destroy_orphaned(int id, void *p, void *data)
+{
+	struct ipc_namespace *ns = data;
+	struct shmid_kernel *shp = shm_lock(ns, id);
+	struct task_struct *task;
+
+	if (IS_ERR(shp))
+		return 0;
+
+	/*
+	 * We want to destroy segments without users and with already
+	 * exit'ed originating process.
+	 *
+	 * XXX: the originating process may exist in another pid namespace.
+	 */
+	task = find_task_by_vpid(shp->shm_cprid);
+	if (task != NULL) {
+		shm_unlock(shp);
+		return 0;
+	}
+
+	if (shm_may_destroy(ns, shp))
+		shm_destroy(ns, shp);
+	else
+		shm_unlock(shp);
+	return 0;
+}
+
+void shm_destroy_orphaned(struct ipc_namespace *ns)
+{
+	down_write(&shm_ids(ns).rw_mutex);
+	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
+	up_write(&shm_ids(ns).rw_mutex);
+}
+
+
+void exit_shm(struct task_struct *task)
+{
+	struct nsproxy *nsp = task->nsproxy;
+	struct ipc_namespace *ns;
+
+	if (!nsp)
+		return;
+	ns = nsp->ipc_ns;
+	if (!ns || !ns->shm_rmid_forced)
+		return;
+
+	/* Destroy all already created segments, but not mapped yet */
+	down_write(&shm_ids(ns).rw_mutex);
+	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
 
@@ -950,8 +1030,7 @@ out_nattch:
 	shp = shm_lock(ns, shmid);
 	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
-	if(shp->shm_nattch == 0 &&
-	   shp->shm_perm.mode & SHM_DEST)
+	if (shm_may_destroy(ns, shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
diff --git a/kernel/exit.c b/kernel/exit.c
index 20a4064..816c610 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -991,6 +991,7 @@ NORET_TYPE void do_exit(long code)
 	trace_sched_process_exit(tsk);
 
 	exit_sem(tsk);
+	exit_shm(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
 	check_stack_usage();
---

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-22 15:25 ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-06-22 16:03   ` Randy Dunlap
  -1 siblings, 0 replies; 34+ messages in thread
From: Randy Dunlap @ 2011-06-22 16:03 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, 22 Jun 2011 19:25:14 +0400 Vasiliy Kulikov wrote:

> Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
> ---
> 
>  Documentation/sysctl/kernel.txt |   22 ++++++++++
>  include/linux/ipc_namespace.h   |    3 +
>  include/linux/shm.h             |    5 ++
>  ipc/ipc_sysctl.c                |   33 +++++++++++++++
>  ipc/shm.c                       |   87 +++++++++++++++++++++++++++++++++++++--
>  kernel/exit.c                   |    1 +
>  6 files changed, 147 insertions(+), 4 deletions(-)
> 
> ---
> diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
> index 5e7cb39..7a0ecfd 100644
> --- a/Documentation/sysctl/kernel.txt
> +++ b/Documentation/sysctl/kernel.txt
> @@ -62,6 +62,7 @@ show up in /proc/sys/kernel:
>  - shmall
>  - shmmax                      [ sysv ipc ]
>  - shmmni
> +- shm_rmid_forced
>  - stop-a                      [ SPARC only ]
>  - sysrq                       ==> Documentation/sysrq.txt
>  - tainted
> @@ -475,6 +476,27 @@ kernel.  This value defaults to SHMMAX.
>  
>  ==============================================================
>  
> +shm_rmid_forced:
> +
> +Linux lets you set resource limits, including on how much memory one

                                       including how much memory one

> +process can consume, via setrlimit(2).  Unfortunately, shared memory
> +segments are allowed to exist without association with any process, and
> +thus might not be counted against any resource limits.  If enabled,
> +shared memory segments are automatically destroyed when their attach
> +count becomes zero after a detach or a process termination.  It will
> +also destroy segments that were created, but never attached to, on exit
> +from the process.  The only use left for IPC_RMID is to immediately
> +destroy an unattached segment.  Of course, this breaks the way things are
> +defined, so some applications might stop working.  Note that this
> +feature will do you no good unless you also configure your resource
> +limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
> +need this.
> +
> +Note that if you change this from 0 to 1, already created segments
> +without users and with a dead originative process will be destroyed.

                                 originating (?)

> +
> +==============================================================
> +
>  softlockup_thresh:
>  
>  This value can be used to lower the softlockup tolerance threshold.  The



---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-22 16:03   ` Randy Dunlap
  0 siblings, 0 replies; 34+ messages in thread
From: Randy Dunlap @ 2011-06-22 16:03 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, 22 Jun 2011 19:25:14 +0400 Vasiliy Kulikov wrote:

> Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
> ---
> 
>  Documentation/sysctl/kernel.txt |   22 ++++++++++
>  include/linux/ipc_namespace.h   |    3 +
>  include/linux/shm.h             |    5 ++
>  ipc/ipc_sysctl.c                |   33 +++++++++++++++
>  ipc/shm.c                       |   87 +++++++++++++++++++++++++++++++++++++--
>  kernel/exit.c                   |    1 +
>  6 files changed, 147 insertions(+), 4 deletions(-)
> 
> ---
> diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
> index 5e7cb39..7a0ecfd 100644
> --- a/Documentation/sysctl/kernel.txt
> +++ b/Documentation/sysctl/kernel.txt
> @@ -62,6 +62,7 @@ show up in /proc/sys/kernel:
>  - shmall
>  - shmmax                      [ sysv ipc ]
>  - shmmni
> +- shm_rmid_forced
>  - stop-a                      [ SPARC only ]
>  - sysrq                       ==> Documentation/sysrq.txt
>  - tainted
> @@ -475,6 +476,27 @@ kernel.  This value defaults to SHMMAX.
>  
>  ==============================================================
>  
> +shm_rmid_forced:
> +
> +Linux lets you set resource limits, including on how much memory one

                                       including how much memory one

> +process can consume, via setrlimit(2).  Unfortunately, shared memory
> +segments are allowed to exist without association with any process, and
> +thus might not be counted against any resource limits.  If enabled,
> +shared memory segments are automatically destroyed when their attach
> +count becomes zero after a detach or a process termination.  It will
> +also destroy segments that were created, but never attached to, on exit
> +from the process.  The only use left for IPC_RMID is to immediately
> +destroy an unattached segment.  Of course, this breaks the way things are
> +defined, so some applications might stop working.  Note that this
> +feature will do you no good unless you also configure your resource
> +limits (in particular, RLIMIT_AS and RLIMIT_NPROC).  Most systems don't
> +need this.
> +
> +Note that if you change this from 0 to 1, already created segments
> +without users and with a dead originative process will be destroyed.

                                 originating (?)

> +
> +==============================================================
> +
>  softlockup_thresh:
>  
>  This value can be used to lower the softlockup tolerance threshold.  The



---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-22 15:25 ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-06-29 22:14   ` Andrew Morton
  -1 siblings, 0 replies; 34+ messages in thread
From: Andrew Morton @ 2011-06-29 22:14 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, 22 Jun 2011 19:25:14 +0400
Vasiliy Kulikov <segoon@openwall.com> wrote:

> This patch adds support for shm_rmid_forced sysctl.  If set to 1, all
> shared memory objects in current ipc namespace will be automatically
> forced to use IPC_RMID.  POSIX way of handling shmem allows to create
> shm objects and call shmdt() leaving shm object associated with no
> process, thus consuming memory not counted via rlimits.  With
> shm_rmid_forced=1 the shared memory object is counted at least for one
> process, so OOM killer may effectively kill the fat process holding
> the shared memory.
> 
> It obviously breaks POSIX, some programs relying on the feature would
> stop working.  So, set shm_rmid_forced=1 only if you're sure nobody uses
> "orphaned" memory.  shm_rmid_forced=0 by default for compatability
> reasons.
> 
> The feature was previously impemented in -ow as a configure option.
> 

What a horrid patch.  But given the POSIX (mis?)feature I don't see a
better way, and the feature seems desirable.  Sigh.

What sort of users would want to turn this on, and why?

> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -44,6 +44,7 @@ struct ipc_namespace {
>  	size_t		shm_ctlall;
>  	int		shm_ctlmni;
>  	int		shm_tot;
> +	int		shm_rmid_forced;
>  
>  	struct notifier_block ipcns_nb;

Please send a patch which adds a nice comment to this field.

Perhaps shm_rmid_forced should have had bool type.

> --- a/ipc/shm.c
> +++ b/ipc/shm.c
> @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
>  	ns->shm_ctlmax = SHMMAX;
>  	ns->shm_ctlall = SHMALL;
>  	ns->shm_ctlmni = SHMMNI;
> +	ns->shm_rmid_forced = 0;
>  	ns->shm_tot = 0;
>  	ipc_init_ids(&shm_ids(ns));
>  }

The problem is that nobody will test your feature.  So for testing
purposes, let's enable the feature by default.  I assume this:

--- a/ipc/shm.c~ipc-introduce-shm_rmid_forced-sysctl-ipc-introduce-shm_rmid_forced-sysctl-testing
+++ a/ipc/shm.c
@@ -74,7 +74,7 @@ void shm_init_ns(struct ipc_namespace *n
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
-	ns->shm_rmid_forced = 0;
+	ns->shm_rmid_forced = 1;
 	ns->shm_tot = 0;
 	ipc_init_ids(&shm_ids(ns));
 }

will do that?

> +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
> +{
> +	return (shp->shm_nattch == 0) &&
> +	       (ns->shm_rmid_forced ||
> +		(shp->shm_perm.mode & SHM_DEST));
> +}

Just because the existing code is crappily documented doesn't mean that
we have to copy that ;)



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-29 22:14   ` Andrew Morton
  0 siblings, 0 replies; 34+ messages in thread
From: Andrew Morton @ 2011-06-29 22:14 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, 22 Jun 2011 19:25:14 +0400
Vasiliy Kulikov <segoon@openwall.com> wrote:

> This patch adds support for shm_rmid_forced sysctl.  If set to 1, all
> shared memory objects in current ipc namespace will be automatically
> forced to use IPC_RMID.  POSIX way of handling shmem allows to create
> shm objects and call shmdt() leaving shm object associated with no
> process, thus consuming memory not counted via rlimits.  With
> shm_rmid_forced=1 the shared memory object is counted at least for one
> process, so OOM killer may effectively kill the fat process holding
> the shared memory.
> 
> It obviously breaks POSIX, some programs relying on the feature would
> stop working.  So, set shm_rmid_forced=1 only if you're sure nobody uses
> "orphaned" memory.  shm_rmid_forced=0 by default for compatability
> reasons.
> 
> The feature was previously impemented in -ow as a configure option.
> 

What a horrid patch.  But given the POSIX (mis?)feature I don't see a
better way, and the feature seems desirable.  Sigh.

What sort of users would want to turn this on, and why?

> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -44,6 +44,7 @@ struct ipc_namespace {
>  	size_t		shm_ctlall;
>  	int		shm_ctlmni;
>  	int		shm_tot;
> +	int		shm_rmid_forced;
>  
>  	struct notifier_block ipcns_nb;

Please send a patch which adds a nice comment to this field.

Perhaps shm_rmid_forced should have had bool type.

> --- a/ipc/shm.c
> +++ b/ipc/shm.c
> @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
>  	ns->shm_ctlmax = SHMMAX;
>  	ns->shm_ctlall = SHMALL;
>  	ns->shm_ctlmni = SHMMNI;
> +	ns->shm_rmid_forced = 0;
>  	ns->shm_tot = 0;
>  	ipc_init_ids(&shm_ids(ns));
>  }

The problem is that nobody will test your feature.  So for testing
purposes, let's enable the feature by default.  I assume this:

--- a/ipc/shm.c~ipc-introduce-shm_rmid_forced-sysctl-ipc-introduce-shm_rmid_forced-sysctl-testing
+++ a/ipc/shm.c
@@ -74,7 +74,7 @@ void shm_init_ns(struct ipc_namespace *n
 	ns->shm_ctlmax = SHMMAX;
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
-	ns->shm_rmid_forced = 0;
+	ns->shm_rmid_forced = 1;
 	ns->shm_tot = 0;
 	ipc_init_ids(&shm_ids(ns));
 }

will do that?

> +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
> +{
> +	return (shp->shm_nattch == 0) &&
> +	       (ns->shm_rmid_forced ||
> +		(shp->shm_perm.mode & SHM_DEST));
> +}

Just because the existing code is crappily documented doesn't mean that
we have to copy that ;)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-29 22:14   ` [kernel-hardening] " Andrew Morton
@ 2011-06-30  9:21     ` Vasiliy Kulikov
  -1 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-30  9:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, Jun 29, 2011 at 15:14 -0700, Andrew Morton wrote:
> What a horrid patch.  But given the POSIX (mis?)feature I don't see a
> better way, and the feature seems desirable.  Sigh.
> 
> What sort of users would want to turn this on, and why?

A shared environment, e.g. a shared web hosting.  With multiple ipc
namespaces (on a VPS) each namespace can steal a few megabytes of memory
without any per user accounting.  Even if all user's processes are
killed, the orphaned shm is still kept in RAM.  It doesn't lead to OOM,
but it prevents from creating new segments and just annoys.


> > --- a/include/linux/ipc_namespace.h
> > +++ b/include/linux/ipc_namespace.h
> > @@ -44,6 +44,7 @@ struct ipc_namespace {
> >  	size_t		shm_ctlall;
> >  	int		shm_ctlmni;
> >  	int		shm_tot;
> > +	int		shm_rmid_forced;
> >  
> >  	struct notifier_block ipcns_nb;
> 
> Please send a patch which adds a nice comment to this field.

OK.

> Perhaps shm_rmid_forced should have had bool type.

OK.  The sysctl handler should parse it OK if feed it with sizeof(bool).

> > --- a/ipc/shm.c
> > +++ b/ipc/shm.c
> > @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
> >  	ns->shm_ctlmax = SHMMAX;
> >  	ns->shm_ctlall = SHMALL;
> >  	ns->shm_ctlmni = SHMMNI;
> > +	ns->shm_rmid_forced = 0;
> >  	ns->shm_tot = 0;
> >  	ipc_init_ids(&shm_ids(ns));
> >  }
> 
> The problem is that nobody will test your feature.  So for testing
> purposes, let's enable the feature by default.  I assume this:
> 
> --- a/ipc/shm.c~ipc-introduce-shm_rmid_forced-sysctl-ipc-introduce-shm_rmid_forced-sysctl-testing
> +++ a/ipc/shm.c
> @@ -74,7 +74,7 @@ void shm_init_ns(struct ipc_namespace *n
>  	ns->shm_ctlmax = SHMMAX;
>  	ns->shm_ctlall = SHMALL;
>  	ns->shm_ctlmni = SHMMNI;
> -	ns->shm_rmid_forced = 0;
> +	ns->shm_rmid_forced = 1;
>  	ns->shm_tot = 0;
>  	ipc_init_ids(&shm_ids(ns));
>  }
> 
> will do that?

Sure if it helps testing :)

> > +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
> > +{
> > +	return (shp->shm_nattch == 0) &&
> > +	       (ns->shm_rmid_forced ||
> > +		(shp->shm_perm.mode & SHM_DEST));
> > +}
> 
> Just because the existing code is crappily documented doesn't mean that
> we have to copy that ;)

OK ;)


Thank you!

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-30  9:21     ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-30  9:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Wed, Jun 29, 2011 at 15:14 -0700, Andrew Morton wrote:
> What a horrid patch.  But given the POSIX (mis?)feature I don't see a
> better way, and the feature seems desirable.  Sigh.
> 
> What sort of users would want to turn this on, and why?

A shared environment, e.g. a shared web hosting.  With multiple ipc
namespaces (on a VPS) each namespace can steal a few megabytes of memory
without any per user accounting.  Even if all user's processes are
killed, the orphaned shm is still kept in RAM.  It doesn't lead to OOM,
but it prevents from creating new segments and just annoys.


> > --- a/include/linux/ipc_namespace.h
> > +++ b/include/linux/ipc_namespace.h
> > @@ -44,6 +44,7 @@ struct ipc_namespace {
> >  	size_t		shm_ctlall;
> >  	int		shm_ctlmni;
> >  	int		shm_tot;
> > +	int		shm_rmid_forced;
> >  
> >  	struct notifier_block ipcns_nb;
> 
> Please send a patch which adds a nice comment to this field.

OK.

> Perhaps shm_rmid_forced should have had bool type.

OK.  The sysctl handler should parse it OK if feed it with sizeof(bool).

> > --- a/ipc/shm.c
> > +++ b/ipc/shm.c
> > @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
> >  	ns->shm_ctlmax = SHMMAX;
> >  	ns->shm_ctlall = SHMALL;
> >  	ns->shm_ctlmni = SHMMNI;
> > +	ns->shm_rmid_forced = 0;
> >  	ns->shm_tot = 0;
> >  	ipc_init_ids(&shm_ids(ns));
> >  }
> 
> The problem is that nobody will test your feature.  So for testing
> purposes, let's enable the feature by default.  I assume this:
> 
> --- a/ipc/shm.c~ipc-introduce-shm_rmid_forced-sysctl-ipc-introduce-shm_rmid_forced-sysctl-testing
> +++ a/ipc/shm.c
> @@ -74,7 +74,7 @@ void shm_init_ns(struct ipc_namespace *n
>  	ns->shm_ctlmax = SHMMAX;
>  	ns->shm_ctlall = SHMALL;
>  	ns->shm_ctlmni = SHMMNI;
> -	ns->shm_rmid_forced = 0;
> +	ns->shm_rmid_forced = 1;
>  	ns->shm_tot = 0;
>  	ipc_init_ids(&shm_ids(ns));
>  }
> 
> will do that?

Sure if it helps testing :)

> > +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
> > +{
> > +	return (shp->shm_nattch == 0) &&
> > +	       (ns->shm_rmid_forced ||
> > +		(shp->shm_perm.mode & SHM_DEST));
> > +}
> 
> Just because the existing code is crappily documented doesn't mean that
> we have to copy that ;)

OK ;)


Thank you!

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-30  9:21     ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-06-30 13:08       ` Vasiliy Kulikov
  -1 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-30 13:08 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Thu, Jun 30, 2011 at 13:21 +0400, Vasiliy Kulikov wrote:
> > Perhaps shm_rmid_forced should have had bool type.
> 
> OK.  The sysctl handler should parse it OK if feed it with sizeof(bool).

I was wrong, maxlen must not be less than sizeof(int), it makes sense
for arrays only.  So, it is kept being int.

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-06-30 13:08       ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-06-30 13:08 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	Ingo Molnar, linux-doc, linux-kernel, linux-security-module

On Thu, Jun 30, 2011 at 13:21 +0400, Vasiliy Kulikov wrote:
> > Perhaps shm_rmid_forced should have had bool type.
> 
> OK.  The sysctl handler should parse it OK if feed it with sizeof(bool).

I was wrong, maxlen must not be less than sizeof(int), it makes sense
for arrays only.  So, it is kept being int.

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-29 22:14   ` [kernel-hardening] " Andrew Morton
@ 2011-07-01 11:25     ` Ingo Molnar
  -1 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-01 11:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Vasiliy Kulikov, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Andrew Morton <akpm@linux-foundation.org> wrote:

> > --- a/ipc/shm.c
> > +++ b/ipc/shm.c
> > @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
> >  	ns->shm_ctlmax = SHMMAX;
> >  	ns->shm_ctlall = SHMALL;
> >  	ns->shm_ctlmni = SHMMNI;
> > +	ns->shm_rmid_forced = 0;
> >  	ns->shm_tot = 0;
> >  	ipc_init_ids(&shm_ids(ns));
> >  }
> 
> The problem is that nobody will test your feature.  So for testing
> purposes, let's enable the feature by default.  I assume this:

I'd also strongly argue to keep this as a default. OOM-kills are not 
part of POSIX and violate POSIX in a number of ways already.

Furthermore, if testing shows that this is not actually breaking 
anything in a serious way we could also in theory simplify the patch 
and just make this the default behavior with no runtime ability to 
switch it off.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-01 11:25     ` Ingo Molnar
  0 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-01 11:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Vasiliy Kulikov, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Andrew Morton <akpm@linux-foundation.org> wrote:

> > --- a/ipc/shm.c
> > +++ b/ipc/shm.c
> > @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns)
> >  	ns->shm_ctlmax = SHMMAX;
> >  	ns->shm_ctlall = SHMALL;
> >  	ns->shm_ctlmni = SHMMNI;
> > +	ns->shm_rmid_forced = 0;
> >  	ns->shm_tot = 0;
> >  	ipc_init_ids(&shm_ids(ns));
> >  }
> 
> The problem is that nobody will test your feature.  So for testing
> purposes, let's enable the feature by default.  I assume this:

I'd also strongly argue to keep this as a default. OOM-kills are not 
part of POSIX and violate POSIX in a number of ways already.

Furthermore, if testing shows that this is not actually breaking 
anything in a serious way we could also in theory simplify the patch 
and just make this the default behavior with no runtime ability to 
switch it off.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-01 11:25     ` [kernel-hardening] " Ingo Molnar
@ 2011-07-01 11:35       ` Vasiliy Kulikov
  -1 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-01 11:35 UTC (permalink / raw)
  To: Ingo Molnar, solar
  Cc: Andrew Morton, kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	linux-doc, linux-kernel, linux-security-module

On Fri, Jul 01, 2011 at 13:25 +0200, Ingo Molnar wrote:
> Furthermore, if testing shows that this is not actually breaking 
> anything in a serious way we could also in theory simplify the patch 
> and just make this the default behavior with no runtime ability to 
> switch it off.

I'm afraid it's impossible.  From -ow readme:

"Of course, this breaks the way things are defined, so some applications
might stop working. In particular, expect most commercial databases to
break. Apache and PostgreSQL are known to work, though. :-)"

http://www.openwall.com/linux/README.shtml

But as it was written in days of Linux 2.4.x, the situation could have
changed.  A desktop system seems to work.


Thanks,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-01 11:35       ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-01 11:35 UTC (permalink / raw)
  To: Ingo Molnar, solar
  Cc: Andrew Morton, kernel-hardening, Randy Dunlap, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Oleg Nesterov, Tejun Heo,
	linux-doc, linux-kernel, linux-security-module

On Fri, Jul 01, 2011 at 13:25 +0200, Ingo Molnar wrote:
> Furthermore, if testing shows that this is not actually breaking 
> anything in a serious way we could also in theory simplify the patch 
> and just make this the default behavior with no runtime ability to 
> switch it off.

I'm afraid it's impossible.  From -ow readme:

"Of course, this breaks the way things are defined, so some applications
might stop working. In particular, expect most commercial databases to
break. Apache and PostgreSQL are known to work, though. :-)"

http://www.openwall.com/linux/README.shtml

But as it was written in days of Linux 2.4.x, the situation could have
changed.  A desktop system seems to work.


Thanks,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-01 11:35       ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-07-01 12:04         ` Ingo Molnar
  -1 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-01 12:04 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: solar, Andrew Morton, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Vasiliy Kulikov <segoon@openwall.com> wrote:

> On Fri, Jul 01, 2011 at 13:25 +0200, Ingo Molnar wrote:
> > Furthermore, if testing shows that this is not actually breaking 
> > anything in a serious way we could also in theory simplify the patch 
> > and just make this the default behavior with no runtime ability to 
> > switch it off.
> 
> I'm afraid it's impossible.  From -ow readme:
> 
> "Of course, this breaks the way things are defined, so some 
> applications might stop working. In particular, expect most 
> commercial databases to break. Apache and PostgreSQL are known to 
> work, though. :-)"
> 
> http://www.openwall.com/linux/README.shtml
> 
> But as it was written in days of Linux 2.4.x, the situation could 
> have changed.  A desktop system seems to work.

As we really prefer working systems over non-working ones (and lots 
of unattached shm segments can clearly result in a non-working 
system) we can only accept the "this will break stuff" argument if 
it's *demonstrated* to break stuff and if the failure scenario is 
carefully described in the commit.

It would take a serious breakage to override a "system locks up 
swapping itself to death" failure scenario.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-01 12:04         ` Ingo Molnar
  0 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-01 12:04 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: solar, Andrew Morton, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Vasiliy Kulikov <segoon@openwall.com> wrote:

> On Fri, Jul 01, 2011 at 13:25 +0200, Ingo Molnar wrote:
> > Furthermore, if testing shows that this is not actually breaking 
> > anything in a serious way we could also in theory simplify the patch 
> > and just make this the default behavior with no runtime ability to 
> > switch it off.
> 
> I'm afraid it's impossible.  From -ow readme:
> 
> "Of course, this breaks the way things are defined, so some 
> applications might stop working. In particular, expect most 
> commercial databases to break. Apache and PostgreSQL are known to 
> work, though. :-)"
> 
> http://www.openwall.com/linux/README.shtml
> 
> But as it was written in days of Linux 2.4.x, the situation could 
> have changed.  A desktop system seems to work.

As we really prefer working systems over non-working ones (and lots 
of unattached shm segments can clearly result in a non-working 
system) we can only accept the "this will break stuff" argument if 
it's *demonstrated* to break stuff and if the failure scenario is 
carefully described in the commit.

It would take a serious breakage to override a "system locks up 
swapping itself to death" failure scenario.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-01 12:04         ` [kernel-hardening] " Ingo Molnar
@ 2011-07-01 14:18           ` Alan Cox
  -1 siblings, 0 replies; 34+ messages in thread
From: Alan Cox @ 2011-07-01 14:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

> As we really prefer working systems over non-working ones (and lots 
> of unattached shm segments can clearly result in a non-working 
> system) we can only accept the "this will break stuff" argument if 
> it's *demonstrated* to break stuff and if the failure scenario is 
> carefully described in the commit.
> 
> It would take a serious breakage to override a "system locks up 
> swapping itself to death" failure scenario.

Ths shared memory interface is defined to be persistent for good reason
and all sorts of apps rely upon that so no you can't just ignore that. As
a configurable alternative it makes sense (indeed many SYS5 admins used
to run shared memory segment sweepers to clean up long idle ones)

However if it's locking the machine up and not being properly handled by
resource management then

a) your resource management is broken so fix that instead
b) if your resource management is busted or you are not properly
tracking resource commits then the user is going to be able to achieve the
same result by other means (eg a unix domain socket bomb)

If you've got no overcommit set you shouldn't be able to swap to death,
it may be the sysv shared memory objects need to be accounted for
specifically somewhere but that would be the right thing to fix and the
mechanisms to do it exist.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-01 14:18           ` Alan Cox
  0 siblings, 0 replies; 34+ messages in thread
From: Alan Cox @ 2011-07-01 14:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

> As we really prefer working systems over non-working ones (and lots 
> of unattached shm segments can clearly result in a non-working 
> system) we can only accept the "this will break stuff" argument if 
> it's *demonstrated* to break stuff and if the failure scenario is 
> carefully described in the commit.
> 
> It would take a serious breakage to override a "system locks up 
> swapping itself to death" failure scenario.

Ths shared memory interface is defined to be persistent for good reason
and all sorts of apps rely upon that so no you can't just ignore that. As
a configurable alternative it makes sense (indeed many SYS5 admins used
to run shared memory segment sweepers to clean up long idle ones)

However if it's locking the machine up and not being properly handled by
resource management then

a) your resource management is broken so fix that instead
b) if your resource management is busted or you are not properly
tracking resource commits then the user is going to be able to achieve the
same result by other means (eg a unix domain socket bomb)

If you've got no overcommit set you shouldn't be able to swap to death,
it may be the sysv shared memory objects need to be accounted for
specifically somewhere but that would be the right thing to fix and the
mechanisms to do it exist.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-01 11:35       ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-07-02 16:50         ` Solar Designer
  -1 siblings, 0 replies; 34+ messages in thread
From: Solar Designer @ 2011-07-02 16:50 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: Ingo Molnar, Andrew Morton, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

On Fri, Jul 01, 2011 at 03:35:33PM +0400, Vasiliy Kulikov wrote:
> From -ow readme:
> 
> "Of course, this breaks the way things are defined, so some applications
> might stop working. In particular, expect most commercial databases to
> break. Apache and PostgreSQL are known to work, though. :-)"
> 
> http://www.openwall.com/linux/README.shtml
> 
> But as it was written in days of Linux 2.4.x, the situation could have
> changed.  A desktop system seems to work.

I wrote the above in 2.0.x days (circa 1998) and it was based on FUD
rather than on any real evidence of any breakage.  Apache and PostgreSQL
were a couple of known users of shared memory segments, and these did
not break.  I was not aware of any programs that presumably did break.
Of course, those probably do exist, but I don't recall anyone ever
reporting any to me (as maintainer of -ow patches), although the warning
quoted above might have played a role in such non-reporting.

Alexander

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-02 16:50         ` Solar Designer
  0 siblings, 0 replies; 34+ messages in thread
From: Solar Designer @ 2011-07-02 16:50 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: Ingo Molnar, Andrew Morton, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

On Fri, Jul 01, 2011 at 03:35:33PM +0400, Vasiliy Kulikov wrote:
> From -ow readme:
> 
> "Of course, this breaks the way things are defined, so some applications
> might stop working. In particular, expect most commercial databases to
> break. Apache and PostgreSQL are known to work, though. :-)"
> 
> http://www.openwall.com/linux/README.shtml
> 
> But as it was written in days of Linux 2.4.x, the situation could have
> changed.  A desktop system seems to work.

I wrote the above in 2.0.x days (circa 1998) and it was based on FUD
rather than on any real evidence of any breakage.  Apache and PostgreSQL
were a couple of known users of shared memory segments, and these did
not break.  I was not aware of any programs that presumably did break.
Of course, those probably do exist, but I don't recall anyone ever
reporting any to me (as maintainer of -ow patches), although the warning
quoted above might have played a role in such non-reporting.

Alexander

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-29 22:14   ` [kernel-hardening] " Andrew Morton
@ 2011-07-02 17:31     ` Solar Designer
  -1 siblings, 0 replies; 34+ messages in thread
From: Solar Designer @ 2011-07-02 17:31 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Vasiliy Kulikov, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, Ingo Molnar, linux-doc, linux-kernel,
	linux-security-module

On Wed, Jun 29, 2011 at 03:14:36PM -0700, Andrew Morton wrote:
> What a horrid patch.  But given the POSIX (mis?)feature I don't see a
> better way, and the feature seems desirable.  Sigh.
> 
> What sort of users would want to turn this on, and why?

Originally, I introduced it into Linux 2.0.x-ow to allow for resource
limits to be enforced on shared servers, such as with shared web
hosting.  A user is supposed to be limited by RLIMIT_AS * RLIMIT_NPROC.
(This is awfully inflexible, lacking a separate per-user memory limit,
but at least it's something.)  However, with shared memory segments a
user could bypass that limit, because those segments don't have to be
tied to a process.  So the patch changed that, requiring that any shm
segment be tied to a process, or be destroyed.

Alexander

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-02 17:31     ` Solar Designer
  0 siblings, 0 replies; 34+ messages in thread
From: Solar Designer @ 2011-07-02 17:31 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Vasiliy Kulikov, kernel-hardening, Randy Dunlap,
	Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, Ingo Molnar, linux-doc, linux-kernel,
	linux-security-module

On Wed, Jun 29, 2011 at 03:14:36PM -0700, Andrew Morton wrote:
> What a horrid patch.  But given the POSIX (mis?)feature I don't see a
> better way, and the feature seems desirable.  Sigh.
> 
> What sort of users would want to turn this on, and why?

Originally, I introduced it into Linux 2.0.x-ow to allow for resource
limits to be enforced on shared servers, such as with shared web
hosting.  A user is supposed to be limited by RLIMIT_AS * RLIMIT_NPROC.
(This is awfully inflexible, lacking a separate per-user memory limit,
but at least it's something.)  However, with shared memory segments a
user could bypass that limit, because those segments don't have to be
tied to a process.  So the patch changed that, requiring that any shm
segment be tied to a process, or be destroyed.

Alexander

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-01 14:18           ` [kernel-hardening] " Alan Cox
@ 2011-07-03 19:38             ` Ingo Molnar
  -1 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-03 19:38 UTC (permalink / raw)
  To: Alan Cox
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > As we really prefer working systems over non-working ones (and lots 
> > of unattached shm segments can clearly result in a non-working 
> > system) we can only accept the "this will break stuff" argument if 
> > it's *demonstrated* to break stuff and if the failure scenario is 
> > carefully described in the commit.
> > 
> > It would take a serious breakage to override a "system locks up 
> > swapping itself to death" failure scenario.
> 
> Ths shared memory interface is defined to be persistent for good 
> reason and all sorts of apps rely upon that so no you can't just 
> ignore that. As a configurable alternative it makes sense (indeed 
> many SYS5 admins used to run shared memory segment sweepers to 
> clean up long idle ones)
> 
> However if it's locking the machine up and not being properly 
> handled by resource management then
> 
> a) your resource management is broken so fix that instead
> b) if your resource management is busted or you are not properly
> tracking resource commits then the user is going to be able to achieve the
> same result by other means (eg a unix domain socket bomb)
> 
> If you've got no overcommit set you shouldn't be able to swap to 
> death, it may be the sysv shared memory objects need to be 
> accounted for specifically somewhere but that would be the right 
> thing to fix and the mechanisms to do it exist.

But the majority of systems have overcommit enabled - that is our 
default.

This is a simple extension of the OOM killer being able to ... kill 
things on OOM, ok? 'to kill' implies 'to break'.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-03 19:38             ` Ingo Molnar
  0 siblings, 0 replies; 34+ messages in thread
From: Ingo Molnar @ 2011-07-03 19:38 UTC (permalink / raw)
  To: Alan Cox
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module


* Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> > As we really prefer working systems over non-working ones (and lots 
> > of unattached shm segments can clearly result in a non-working 
> > system) we can only accept the "this will break stuff" argument if 
> > it's *demonstrated* to break stuff and if the failure scenario is 
> > carefully described in the commit.
> > 
> > It would take a serious breakage to override a "system locks up 
> > swapping itself to death" failure scenario.
> 
> Ths shared memory interface is defined to be persistent for good 
> reason and all sorts of apps rely upon that so no you can't just 
> ignore that. As a configurable alternative it makes sense (indeed 
> many SYS5 admins used to run shared memory segment sweepers to 
> clean up long idle ones)
> 
> However if it's locking the machine up and not being properly 
> handled by resource management then
> 
> a) your resource management is broken so fix that instead
> b) if your resource management is busted or you are not properly
> tracking resource commits then the user is going to be able to achieve the
> same result by other means (eg a unix domain socket bomb)
> 
> If you've got no overcommit set you shouldn't be able to swap to 
> death, it may be the sysv shared memory objects need to be 
> accounted for specifically somewhere but that would be the right 
> thing to fix and the mechanisms to do it exist.

But the majority of systems have overcommit enabled - that is our 
default.

This is a simple extension of the OOM killer being able to ... kill 
things on OOM, ok? 'to kill' implies 'to break'.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-03 19:38             ` [kernel-hardening] " Ingo Molnar
@ 2011-07-03 21:25               ` Alan Cox
  -1 siblings, 0 replies; 34+ messages in thread
From: Alan Cox @ 2011-07-03 21:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

> This is a simple extension of the OOM killer being able to ... kill 
> things on OOM, ok? 'to kill' implies 'to break'.

If you do it on the OOM killer then yes that aspect makes sense. The real
problem is that Linux has shipped a broken default for the past ten years.

The number of times I have to explain to industrial and business
customers that Linux doesn't suck but the defaults are stupid is
astounding, and they then wonder why either the authors or their vendor is
a complete and utter moron.

But yes from an OOM perspective killing an unattached SHM segment makes
as much sense as killing anything else.

Alan

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-03 21:25               ` Alan Cox
  0 siblings, 0 replies; 34+ messages in thread
From: Alan Cox @ 2011-07-03 21:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Vasiliy Kulikov, solar, Andrew Morton, kernel-hardening,
	Randy Dunlap, Eric W. Biederman, Serge E. Hallyn, Daniel Lezcano,
	Oleg Nesterov, Tejun Heo, linux-doc, linux-kernel,
	linux-security-module

> This is a simple extension of the OOM killer being able to ... kill 
> things on OOM, ok? 'to kill' implies 'to break'.

If you do it on the OOM killer then yes that aspect makes sense. The real
problem is that Linux has shipped a broken default for the past ten years.

The number of times I have to explain to industrial and business
customers that Linux doesn't suck but the defaults are stupid is
astounding, and they then wonder why either the authors or their vendor is
a complete and utter moron.

But yes from an OOM perspective killing an unattached SHM segment makes
as much sense as killing anything else.

Alan

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-06-22 15:25 ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-07-04 15:08   ` Oleg Nesterov
  -1 siblings, 0 replies; 34+ messages in thread
From: Oleg Nesterov @ 2011-07-04 15:08 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On 06/22, Vasiliy Kulikov wrote:
>
> +void exit_shm(struct task_struct *task)
> +{
> +	struct nsproxy *nsp = task->nsproxy;
> +	struct ipc_namespace *ns;
> +
> +	if (!nsp)
> +		return;
> +	ns = nsp->ipc_ns;
> +	if (!ns || !ns->shm_rmid_forced)

This looks confusing, imho. How it is possible that ->nsproxy or
->ipc_ns is NULL?

> +		return;
> +
> +	/* Destroy all already created segments, but not mapped yet */
> +	down_write(&shm_ids(ns).rw_mutex);
> +	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
>  	up_write(&shm_ids(ns).rw_mutex);

Again, I do not pretend I understand ipc/, but it seems we can check
ns->ipc_ids[].in_use != 0 before the slow path, no?

Oleg.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-04 15:08   ` Oleg Nesterov
  0 siblings, 0 replies; 34+ messages in thread
From: Oleg Nesterov @ 2011-07-04 15:08 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On 06/22, Vasiliy Kulikov wrote:
>
> +void exit_shm(struct task_struct *task)
> +{
> +	struct nsproxy *nsp = task->nsproxy;
> +	struct ipc_namespace *ns;
> +
> +	if (!nsp)
> +		return;
> +	ns = nsp->ipc_ns;
> +	if (!ns || !ns->shm_rmid_forced)

This looks confusing, imho. How it is possible that ->nsproxy or
->ipc_ns is NULL?

> +		return;
> +
> +	/* Destroy all already created segments, but not mapped yet */
> +	down_write(&shm_ids(ns).rw_mutex);
> +	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
>  	up_write(&shm_ids(ns).rw_mutex);

Again, I do not pretend I understand ipc/, but it seems we can check
ns->ipc_ids[].in_use != 0 before the slow path, no?

Oleg.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-04 15:08   ` [kernel-hardening] " Oleg Nesterov
@ 2011-07-04 15:36     ` Vasiliy Kulikov
  -1 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-04 15:36 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> On 06/22, Vasiliy Kulikov wrote:
> >
> > +void exit_shm(struct task_struct *task)
> > +{
> > +	struct nsproxy *nsp = task->nsproxy;
> > +	struct ipc_namespace *ns;
> > +
> > +	if (!nsp)
> > +		return;
> > +	ns = nsp->ipc_ns;
> > +	if (!ns || !ns->shm_rmid_forced)
> 
> This looks confusing, imho. How it is possible that ->nsproxy or
> ->ipc_ns is NULL?

I spotted the same checking logic in other places.  I don't know whether
it is redundant, I guess it can happen when the namespace is dying.
Probably it cannot happed inside of task do_exit(), only for extern
observers.


> > +		return;
> > +
> > +	/* Destroy all already created segments, but not mapped yet */
> > +	down_write(&shm_ids(ns).rw_mutex);
> > +	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
> >  	up_write(&shm_ids(ns).rw_mutex);
> 
> Again, I do not pretend I understand ipc/, but it seems we can check
> ns->ipc_ids[].in_use != 0 before the slow path, no?

Looks like you're right.  Given it is do_exit(), the boost is
significant.

I'll send the patch for this thing and the locking part.


Thank you!

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-04 15:36     ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-04 15:36 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> On 06/22, Vasiliy Kulikov wrote:
> >
> > +void exit_shm(struct task_struct *task)
> > +{
> > +	struct nsproxy *nsp = task->nsproxy;
> > +	struct ipc_namespace *ns;
> > +
> > +	if (!nsp)
> > +		return;
> > +	ns = nsp->ipc_ns;
> > +	if (!ns || !ns->shm_rmid_forced)
> 
> This looks confusing, imho. How it is possible that ->nsproxy or
> ->ipc_ns is NULL?

I spotted the same checking logic in other places.  I don't know whether
it is redundant, I guess it can happen when the namespace is dying.
Probably it cannot happed inside of task do_exit(), only for extern
observers.


> > +		return;
> > +
> > +	/* Destroy all already created segments, but not mapped yet */
> > +	down_write(&shm_ids(ns).rw_mutex);
> > +	idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
> >  	up_write(&shm_ids(ns).rw_mutex);
> 
> Again, I do not pretend I understand ipc/, but it seems we can check
> ns->ipc_ids[].in_use != 0 before the slow path, no?

Looks like you're right.  Given it is do_exit(), the boost is
significant.

I'll send the patch for this thing and the locking part.


Thank you!

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-04 15:36     ` [kernel-hardening] " Vasiliy Kulikov
@ 2011-07-04 15:44       ` Oleg Nesterov
  -1 siblings, 0 replies; 34+ messages in thread
From: Oleg Nesterov @ 2011-07-04 15:44 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On 07/04, Vasiliy Kulikov wrote:
>
> On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> > On 06/22, Vasiliy Kulikov wrote:
> > >
> > > +void exit_shm(struct task_struct *task)
> > > +{
> > > +	struct nsproxy *nsp = task->nsproxy;
> > > +	struct ipc_namespace *ns;
> > > +
> > > +	if (!nsp)
> > > +		return;
> > > +	ns = nsp->ipc_ns;
> > > +	if (!ns || !ns->shm_rmid_forced)
> >
> > This looks confusing, imho. How it is possible that ->nsproxy or
> > ->ipc_ns is NULL?
>
> I spotted the same checking logic in other places.  I don't know whether
> it is redundant, I guess it can happen when the namespace is dying.
> Probably it cannot happed inside of task do_exit(), only for extern
> observers.

No, afaics it can't happen in do_exit() until we call exit_notify().
Otherwise, for example, any dying child will OOPS in do_notify_parent().
Or please look at exit_sem()->sem_lock_check(tsk->nsproxy->ipc_ns).

Oleg.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-04 15:44       ` Oleg Nesterov
  0 siblings, 0 replies; 34+ messages in thread
From: Oleg Nesterov @ 2011-07-04 15:44 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On 07/04, Vasiliy Kulikov wrote:
>
> On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> > On 06/22, Vasiliy Kulikov wrote:
> > >
> > > +void exit_shm(struct task_struct *task)
> > > +{
> > > +	struct nsproxy *nsp = task->nsproxy;
> > > +	struct ipc_namespace *ns;
> > > +
> > > +	if (!nsp)
> > > +		return;
> > > +	ns = nsp->ipc_ns;
> > > +	if (!ns || !ns->shm_rmid_forced)
> >
> > This looks confusing, imho. How it is possible that ->nsproxy or
> > ->ipc_ns is NULL?
>
> I spotted the same checking logic in other places.  I don't know whether
> it is redundant, I guess it can happen when the namespace is dying.
> Probably it cannot happed inside of task do_exit(), only for extern
> observers.

No, afaics it can't happen in do_exit() until we call exit_notify().
Otherwise, for example, any dying child will OOPS in do_notify_parent().
Or please look at exit_sem()->sem_lock_check(tsk->nsproxy->ipc_ns).

Oleg.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [RFC] ipc: introduce shm_rmid_forced sysctl
  2011-07-04 15:44       ` [kernel-hardening] " Oleg Nesterov
@ 2011-07-04 16:06         ` Vasiliy Kulikov
  -1 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-04 16:06 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On Mon, Jul 04, 2011 at 17:44 +0200, Oleg Nesterov wrote:
> On 07/04, Vasiliy Kulikov wrote:
> >
> > On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> > > On 06/22, Vasiliy Kulikov wrote:
> > > >
> > > > +void exit_shm(struct task_struct *task)
> > > > +{
> > > > +	struct nsproxy *nsp = task->nsproxy;
> > > > +	struct ipc_namespace *ns;
> > > > +
> > > > +	if (!nsp)
> > > > +		return;
> > > > +	ns = nsp->ipc_ns;
> > > > +	if (!ns || !ns->shm_rmid_forced)
> > >
> > > This looks confusing, imho. How it is possible that ->nsproxy or
> > > ->ipc_ns is NULL?
> >
> > I spotted the same checking logic in other places.  I don't know whether
> > it is redundant, I guess it can happen when the namespace is dying.
> > Probably it cannot happed inside of task do_exit(), only for extern
> > observers.
> 
> No, afaics it can't happen in do_exit() until we call exit_notify().
> Otherwise, for example, any dying child will OOPS in do_notify_parent().
> Or please look at exit_sem()->sem_lock_check(tsk->nsproxy->ipc_ns).

Looks you're still right :)

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [kernel-hardening] Re: [RFC] ipc: introduce shm_rmid_forced sysctl
@ 2011-07-04 16:06         ` Vasiliy Kulikov
  0 siblings, 0 replies; 34+ messages in thread
From: Vasiliy Kulikov @ 2011-07-04 16:06 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: kernel-hardening, Randy Dunlap, Andrew Morton, Eric W. Biederman,
	Serge E. Hallyn, Daniel Lezcano, Tejun Heo, Ingo Molnar,
	linux-doc, linux-kernel, linux-security-module

On Mon, Jul 04, 2011 at 17:44 +0200, Oleg Nesterov wrote:
> On 07/04, Vasiliy Kulikov wrote:
> >
> > On Mon, Jul 04, 2011 at 17:08 +0200, Oleg Nesterov wrote:
> > > On 06/22, Vasiliy Kulikov wrote:
> > > >
> > > > +void exit_shm(struct task_struct *task)
> > > > +{
> > > > +	struct nsproxy *nsp = task->nsproxy;
> > > > +	struct ipc_namespace *ns;
> > > > +
> > > > +	if (!nsp)
> > > > +		return;
> > > > +	ns = nsp->ipc_ns;
> > > > +	if (!ns || !ns->shm_rmid_forced)
> > >
> > > This looks confusing, imho. How it is possible that ->nsproxy or
> > > ->ipc_ns is NULL?
> >
> > I spotted the same checking logic in other places.  I don't know whether
> > it is redundant, I guess it can happen when the namespace is dying.
> > Probably it cannot happed inside of task do_exit(), only for extern
> > observers.
> 
> No, afaics it can't happen in do_exit() until we call exit_notify().
> Otherwise, for example, any dying child will OOPS in do_notify_parent().
> Or please look at exit_sem()->sem_lock_check(tsk->nsproxy->ipc_ns).

Looks you're still right :)

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2011-07-04 16:06 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-22 15:25 [RFC] ipc: introduce shm_rmid_forced sysctl Vasiliy Kulikov
2011-06-22 15:25 ` [kernel-hardening] " Vasiliy Kulikov
2011-06-22 16:03 ` Randy Dunlap
2011-06-22 16:03   ` [kernel-hardening] " Randy Dunlap
2011-06-29 22:14 ` Andrew Morton
2011-06-29 22:14   ` [kernel-hardening] " Andrew Morton
2011-06-30  9:21   ` Vasiliy Kulikov
2011-06-30  9:21     ` [kernel-hardening] " Vasiliy Kulikov
2011-06-30 13:08     ` Vasiliy Kulikov
2011-06-30 13:08       ` [kernel-hardening] " Vasiliy Kulikov
2011-07-01 11:25   ` Ingo Molnar
2011-07-01 11:25     ` [kernel-hardening] " Ingo Molnar
2011-07-01 11:35     ` Vasiliy Kulikov
2011-07-01 11:35       ` [kernel-hardening] " Vasiliy Kulikov
2011-07-01 12:04       ` Ingo Molnar
2011-07-01 12:04         ` [kernel-hardening] " Ingo Molnar
2011-07-01 14:18         ` Alan Cox
2011-07-01 14:18           ` [kernel-hardening] " Alan Cox
2011-07-03 19:38           ` Ingo Molnar
2011-07-03 19:38             ` [kernel-hardening] " Ingo Molnar
2011-07-03 21:25             ` Alan Cox
2011-07-03 21:25               ` [kernel-hardening] " Alan Cox
2011-07-02 16:50       ` Solar Designer
2011-07-02 16:50         ` [kernel-hardening] " Solar Designer
2011-07-02 17:31   ` Solar Designer
2011-07-02 17:31     ` [kernel-hardening] " Solar Designer
2011-07-04 15:08 ` Oleg Nesterov
2011-07-04 15:08   ` [kernel-hardening] " Oleg Nesterov
2011-07-04 15:36   ` Vasiliy Kulikov
2011-07-04 15:36     ` [kernel-hardening] " Vasiliy Kulikov
2011-07-04 15:44     ` Oleg Nesterov
2011-07-04 15:44       ` [kernel-hardening] " Oleg Nesterov
2011-07-04 16:06       ` Vasiliy Kulikov
2011-07-04 16:06         ` [kernel-hardening] " Vasiliy Kulikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.