linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] syscalls, x86: Add __NR_kcmp syscall
@ 2012-01-17 14:27 Cyrill Gorcunov
  2012-01-17 14:38 ` Alexey Dobriyan
  0 siblings, 1 reply; 27+ messages in thread
From: Cyrill Gorcunov @ 2012-01-17 14:27 UTC (permalink / raw)
  To: LKML
  Cc: Eric W. Biederman, Pavel Emelyanov, Andrey Vagin, Ingo Molnar,
	H. Peter Anvin, Thomas Gleixner, Glauber Costa, Andi Kleen,
	Tejun Heo, Matt Helsley, Pekka Enberg, Eric Dumazet,
	Vasiliy Kulikov, Andrew Morton, Alexey Dobriyan,
	Valdis.Kletnieks

Hi all,

while general-object-id patch series were reviewed and commented
(see https://lkml.org/lkml/2012/1/11/220 the thread) it seems we have
ended up in conclusion that a syscall which do compare kernel members
internally and provide the result back to user space is a way more
secure than exporting (even strongly encrypted) kernel pointers in form
of some general IDs.

So here is a really early and draft version of how can it be done I think.
Please review and comment. I've tested it on x86-64 platform only for a while.
And it seems I missed to export header to user-space but it could be fixed
later once things are settle.

p.s. hope I didn't miss anyone in CC who were involved in previous discussion ;)

Thanks,
	Cyrill
---
syscalls, x86: Add __NR_kcmp syscall

While doing the checkpoint-restore in the userspace one need to determine
whether various kernel objects (like mm_struct-s of file_struct-s) are shared
between tasks and restore this state.

The 2nd step can be solved by using appropriate CLONE_ flags and the unshare
syscall, while there's currently no ways for solving the 1st one.

One of the ways for checking whether two tasks share e.g. mm_struct is to
provide some mm_struct ID of a task to its proc file, but showing such
info considered to be not that good for security reasons.

Thus after some debates we end up in conclusion that using that named
'comparision' syscall might be the best candidate. So here is it --
__NR_kcmp.

It takes up to 5 agruments - the pids of the two tasks (which
characteristics should be compared), the comparision type and
(in case of comparision of files) two file descriptors.

At moment only x86 is supported.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: "Eric W. Biederman" <ebiederm@xmission.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Andrey Vagin <avagin@openvz.org>
CC: Ingo Molnar <mingo@elte.hu>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Glauber Costa <glommer@parallels.com>
CC: Andi Kleen <andi@firstfloor.org>
CC: Tejun Heo <tj@kernel.org>
CC: Matt Helsley <matthltc@us.ibm.com>
CC: Pekka Enberg <penberg@kernel.org>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Vasiliy Kulikov <segoon@openwall.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Valdis.Kletnieks@vt.edu
---
 arch/x86/include/asm/kcmp.h        |   21 ++++++
 arch/x86/include/asm/syscalls.h    |    4 +
 arch/x86/include/asm/unistd_32.h   |    1 
 arch/x86/include/asm/unistd_64.h   |    2 
 arch/x86/kernel/Makefile           |    1 
 arch/x86/kernel/kcmp.c             |  127 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/syscall_table_32.S |    1 
 7 files changed, 157 insertions(+)

Index: linux-2.6.git/arch/x86/include/asm/kcmp.h
===================================================================
--- /dev/null
+++ linux-2.6.git/arch/x86/include/asm/kcmp.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_KCMP_H
+#define _LINUX_KCMP_H
+
+/* Comparision type */
+enum {
+	KCMP_FILE,
+	KCMP_VM,
+	KCMP_FILES,
+	KCMP_FS,
+	KCMP_SIGHAND,
+	KCMP_IO,
+	KCMP_SYSVSEM,
+
+	KCMP_TYPES,
+};
+
+#define KCMP_EQ		0
+#define KCMP_LT		1
+#define KCMP_GT		2
+
+#endif /* _LINUX_KCMP_H */
Index: linux-2.6.git/arch/x86/include/asm/syscalls.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/syscalls.h
+++ linux-2.6.git/arch/x86/include/asm/syscalls.h
@@ -42,6 +42,10 @@ long sys_sigaltstack(const stack_t __use
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
 asmlinkage int sys_get_thread_area(struct user_desc __user *);
 
+/* kenrel/kcmp.c */
+asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
+			 unsigned long idx1, unsigned long idx2);
+
 /* X86_32 only */
 #ifdef CONFIG_X86_32
 
Index: linux-2.6.git/arch/x86/include/asm/unistd_32.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/unistd_32.h
+++ linux-2.6.git/arch/x86/include/asm/unistd_32.h
@@ -354,6 +354,7 @@
 #define __NR_setns		346
 #define __NR_process_vm_readv	347
 #define __NR_process_vm_writev	348
+#define __NR_kcmp		349
 
 #ifdef __KERNEL__
 
Index: linux-2.6.git/arch/x86/include/asm/unistd_64.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/unistd_64.h
+++ linux-2.6.git/arch/x86/include/asm/unistd_64.h
@@ -686,6 +686,8 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
 __SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev			311
 __SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
+#define __NR_kcmp				312
+__SYSCALL(__NR_kcmp, sys_kcmp)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
Index: linux-2.6.git/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/Makefile
+++ linux-2.6.git/arch/x86/kernel/Makefile
@@ -33,6 +33,7 @@ obj-y			+= alternative.o i8253.o pci-nom
 obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= kcmp.o
 
 obj-y				+= trampoline.o trampoline_$(BITS).o
 obj-y				+= process.o
Index: linux-2.6.git/arch/x86/kernel/kcmp.c
===================================================================
--- /dev/null
+++ linux-2.6.git/arch/x86/kernel/kcmp.c
@@ -0,0 +1,127 @@
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/fdtable.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+
+#include <linux/syscalls.h>
+#include <asm/unistd.h>
+#include <asm/kcmp.h>
+
+#define KCMP_PTR(ptr1, ptr2)						\
+({									\
+	long ___r = (long)ptr1 - (long)ptr2;				\
+	(___r == 0 ? KCMP_EQ : (___r < 0 ? KCMP_LT : KCMP_GT));		\
+})
+
+#define KCMP_TASK_PTR(task1, task2, member)				\
+	KCMP_PTR((task1)->member, (task2)->member)
+
+/* A caller must be sure the task is presented in memory */
+static struct file *
+get_file_raw_ptr(struct task_struct *task, unsigned int idx)
+{
+	struct fdtable *fdt;
+	struct file *file;
+
+	spin_lock(&task->files->file_lock);
+	fdt = files_fdtable(task->files);
+	if (idx < fdt->max_fds)
+		file = fdt->fd[idx];
+	else
+		file = NULL;
+	spin_unlock(&task->files->file_lock);
+
+	return file;
+}
+
+SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+		unsigned long, idx1, unsigned long, idx2)
+{
+	struct task_struct *task1;
+	struct task_struct *task2;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	task1 = find_task_by_vpid(pid1);
+	if (!task1) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	task2 = find_task_by_vpid(pid2);
+	if (!task2) {
+		put_task_struct(task1);
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	get_task_struct(task1);
+	get_task_struct(task2);
+
+	rcu_read_unlock();
+
+	if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
+	    !ptrace_may_access(task2, PTRACE_MODE_READ)) {
+		ret = -EACCES;
+		goto err;
+	}
+
+	/*
+	 * Note for all cases but the KCMP_FILE we
+	 * don't take any locks and do a plain pointer
+	 * comparision in a sake of speed.
+	 */
+
+	switch (type) {
+	case KCMP_FILE: {
+		struct file *filp1, *filp2;
+
+		filp1 = get_file_raw_ptr(task1, idx1);
+		filp2 = get_file_raw_ptr(task2, idx2);
+
+		if (filp1 && filp2)
+			ret = KCMP_PTR(filp1, filp2);
+		else
+			ret = -ENOENT;
+		break;
+	}
+	case KCMP_VM:
+		ret = KCMP_TASK_PTR(task1, task2, mm);
+		break;
+	case KCMP_FILES:
+		ret = KCMP_TASK_PTR(task1, task2, files);
+		break;
+	case KCMP_FS:
+		ret = KCMP_TASK_PTR(task1, task2, fs);
+		break;
+	case KCMP_SIGHAND:
+		ret = KCMP_TASK_PTR(task1, task2, sighand);
+		break;
+	case KCMP_IO:
+		ret = KCMP_TASK_PTR(task1, task2, io_context);
+		break;
+	case KCMP_SYSVSEM:
+#ifdef CONFIG_SYSVIPC
+		ret = KCMP_TASK_PTR(task1, task2, sysvsem.undo_list);
+#else
+		ret = -ENOENT;
+		goto err;
+#endif
+		break;
+	default:
+		ret = -EINVAL;
+		goto err;
+	}
+
+err:
+	put_task_struct(task1);
+	put_task_struct(task2);
+
+	return ret;
+}
Index: linux-2.6.git/arch/x86/kernel/syscall_table_32.S
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/syscall_table_32.S
+++ linux-2.6.git/arch/x86/kernel/syscall_table_32.S
@@ -348,3 +348,4 @@ ENTRY(sys_call_table)
 	.long sys_setns
 	.long sys_process_vm_readv
 	.long sys_process_vm_writev
+	.long sys_kcmp

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2012-01-20 18:22 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-17 14:27 [RFC] syscalls, x86: Add __NR_kcmp syscall Cyrill Gorcunov
2012-01-17 14:38 ` Alexey Dobriyan
2012-01-17 14:44   ` Cyrill Gorcunov
2012-01-17 18:47     ` H. Peter Anvin
2012-01-17 21:15       ` Cyrill Gorcunov
2012-01-17 21:40         ` Eric W. Biederman
2012-01-18  5:07           ` Pavel Emelyanov
2012-01-17 21:35       ` Eric W. Biederman
2012-01-18  8:01         ` Cyrill Gorcunov
2012-01-18  9:12           ` KOSAKI Motohiro
2012-01-18  9:19             ` Pavel Emelyanov
2012-01-18  9:23               ` KOSAKI Motohiro
2012-01-18 11:57                 ` Cyrill Gorcunov
2012-01-18 16:46                   ` KOSAKI Motohiro
2012-01-18 17:20                     ` Cyrill Gorcunov
2012-01-18 22:05         ` david
2012-01-18 22:49           ` Cyrill Gorcunov
2012-01-18 23:29             ` Eric W. Biederman
2012-01-19  6:55               ` Cyrill Gorcunov
2012-01-20  3:16                 ` Eric W. Biederman
2012-01-20  8:40                   ` Cyrill Gorcunov
2012-01-20  9:02                     ` Cyrill Gorcunov
2012-01-20 14:51                       ` H. Peter Anvin
2012-01-20 16:29                         ` Cyrill Gorcunov
2012-01-20 16:57                           ` H. Peter Anvin
2012-01-20 18:19                             ` Cyrill Gorcunov
2012-01-20 18:22                               ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).