All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Safonov <dsafonov@virtuozzo.com>
To: <linux-kernel@vger.kernel.org>
Cc: <luto@amacapital.net>, <tglx@linutronix.de>, <mingo@redhat.com>,
	<hpa@zytor.com>, <x86@kernel.org>, <shuahkh@osg.samsung.com>,
	<bp@alien8.de>, <akpm@linux-foundation.org>,
	<linux-kselftest@vger.kernel.org>, <gorcunov@openvz.org>,
	<xemul@virtuozzo.com>, <khorenko@virtuozzo.com>,
	<0x7f454c46@gmail.com>, Dmitry Safonov <dsafonov@virtuozzo.com>
Subject: [PATCH 1/2] x86/arch_prctl: add ARCH_SET_{COMPAT,NATIVE} to change compatible mode
Date: Wed, 6 Apr 2016 19:29:29 +0300	[thread overview]
Message-ID: <1459960170-4454-2-git-send-email-dsafonov@virtuozzo.com> (raw)
In-Reply-To: <1459960170-4454-1-git-send-email-dsafonov@virtuozzo.com>

Now each process that runs natively on x86_64 may execute 32-bit code
by proper setting it's CS selector: either from LDT or reuse Linux's
USER32_CS. The vice-versa is also valid: running 64-bit code in
compatible task is also possible by choosing USER_CS.
So we may switch between 32 and 64 bit code execution in any process.
Linux will choose the right syscall numbers in entries for those
processes. But it still will consider them native/compat by the
personality, that elf loader set on launch. This affects i.e., ptrace
syscall on those tasks: PTRACE_GETREGSET will return 64/32-bit regset
according to process's mode (that's how strace detect task's
personality from 4.8 version).

This patch adds arch_prctl calls for x86 that make possible to tell
Linux kernel in which mode the application is running currently.
Mainly, this is needed for CRIU: restoring compatible & native
applications both from 64-bit restorer. By that reason I wrapped all
the code in CONFIG_CHECKPOINT_RESTORE.
This patch solves also a problem for running 64-bit code in 32-bit elf
(and reverse), that you have only 32-bit elf vdso for fast syscalls.
When switching between native <-> compat mode by arch_prctl, it will
remap needed vdso binary blob for target mode.

Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Konstantin Khorenko <khorenko@virtuozzo.com>
CC: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
 arch/x86/entry/vdso/vma.c         | 76 ++++++++++++++++++++++++++--------
 arch/x86/include/asm/vdso.h       |  5 +++
 arch/x86/include/uapi/asm/prctl.h |  6 +++
 arch/x86/kernel/process_64.c      | 87 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 157 insertions(+), 17 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..9a1561da0bad 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -156,22 +156,21 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 	return VM_FAULT_SIGBUS;
 }
 
-static int map_vdso(const struct vdso_image *image, bool calculate_addr)
+static int do_map_vdso(const struct vdso_image *image, bool calculate_addr,
+		unsigned long addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long addr, text_start;
+	unsigned long text_start;
 	int ret = 0;
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
 	};
 
-	if (calculate_addr) {
+	if (calculate_addr && !addr) {
 		addr = vdso_addr(current->mm->start_stack,
 				 image->size - image->sym_vvar_start);
-	} else {
-		addr = 0;
 	}
 
 	down_write(&mm->mmap_sem);
@@ -209,11 +208,11 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       VM_PFNMAP,
 				       &vvar_mapping);
 
-	if (IS_ERR(vma)) {
+	if (IS_ERR(vma))
 		ret = PTR_ERR(vma);
-		goto up_fail;
-	}
 
+	if (ret)
+		do_munmap(mm, addr, image->size - image->sym_vvar_start);
 up_fail:
 	if (ret)
 		current->mm->context.vdso = NULL;
@@ -223,24 +222,28 @@ up_fail:
 }
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-static int load_vdso32(void)
+static int load_vdso32(unsigned long addr)
 {
 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
 		return 0;
 
-	return map_vdso(&vdso_image_32, false);
+	return do_map_vdso(&vdso_image_32, false, addr);
 }
 #endif
 
 #ifdef CONFIG_X86_64
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int load_vdso64(unsigned long addr)
 {
 	if (!vdso64_enabled)
 		return 0;
 
-	return map_vdso(&vdso_image_64, true);
+	return do_map_vdso(&vdso_image_64, true, addr);
 }
 
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	return load_vdso64(0);
+}
 #ifdef CONFIG_COMPAT
 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int uses_interp)
@@ -250,20 +253,59 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
 		if (!vdso64_enabled)
 			return 0;
 
-		return map_vdso(&vdso_image_x32, true);
+		return do_map_vdso(&vdso_image_x32, true, 0);
 	}
 #endif
 #ifdef CONFIG_IA32_EMULATION
-	return load_vdso32();
+	return load_vdso32(0);
 #else
 	return 0;
 #endif
 }
-#endif
-#else
+#endif /* CONFIG_COMPAT */
+
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+unsigned long unmap_vdso(void)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr = (unsigned long)current->mm->context.vdso;
+
+	if (!addr)
+		return 0;
+
+	/* vvar pages */
+	vma = find_vma(current->mm, addr - 1);
+	if (vma)
+		vm_munmap(vma->vm_start, vma->vm_end - vma->vm_start);
+
+	/* vdso pages */
+	vma = find_vma(current->mm, addr);
+	if (vma)
+		vm_munmap(vma->vm_start, vma->vm_end - vma->vm_start);
+
+	current->mm->context.vdso = NULL;
+
+	return addr;
+}
+/*
+ * Maps needed vdso type: vdso_image_32/vdso_image_64
+ * @compatible - true for compatible, false for native vdso image
+ * @addr - specify addr for vdso mapping (0 for random/searching)
+ * NOTE: be sure to set/clear thread-specific flags before
+ * calling this function.
+ */
+int map_vdso(bool compatible, unsigned long addr)
+{
+	if (compatible)
+		return load_vdso32(addr);
+	else
+		return load_vdso64(addr);
+}
+#endif /* CONFIG_IA32_EMULATION && CONFIG_CHECKPOINT_RESTORE */
+#else /* !CONFIG_X86_64 */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	return load_vdso32();
+	return load_vdso32(0);
 }
 #endif
 
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 43dc55be524e..3ead7cc48a68 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -39,6 +39,11 @@ extern const struct vdso_image vdso_image_x32;
 extern const struct vdso_image vdso_image_32;
 #endif
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+extern int map_vdso(bool to_compat, unsigned long addr);
+extern unsigned long unmap_vdso(void);
+#endif
+
 extern void __init init_vdso_image(const struct vdso_image *image);
 
 #endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae09..455844f06485 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+#define ARCH_SET_COMPAT		0x2001
+#define ARCH_SET_NATIVE		0x2002
+#define ARCH_GET_PERSONALITY	0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..e50660d59530 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,6 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/vdso.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -505,6 +506,83 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+/*
+ * Check if there are still some vmas (except vdso) for current,
+ * which placed above compatible TASK_SIZE.
+ * Check also code, data, stack, args and env placements.
+ * Returns true if all mappings are compatible.
+ */
+static bool task_mappings_compatible(void)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long top_addr = IA32_PAGE_OFFSET;
+	struct vm_area_struct *vma = find_vma(mm, top_addr);
+
+	if (mm->end_code	> top_addr ||
+	    mm->end_data	> top_addr ||
+	    mm->start_stack	> top_addr ||
+	    mm->brk		> top_addr ||
+	    mm->arg_end		> top_addr ||
+	    mm->env_end		> top_addr)
+		return false;
+
+	while (vma) {
+		if ((vma->vm_start != (unsigned long)mm->context.vdso) &&
+		    (vma->vm_end != (unsigned long)mm->context.vdso))
+			return false;
+
+		top_addr = vma->vm_end;
+		vma = find_vma(mm, top_addr);
+	}
+
+	return true;
+}
+
+static int do_set_personality(bool compat, unsigned long addr)
+{
+	int ret;
+	unsigned long old_vdso_base;
+	unsigned long old_mmap_base = current->mm->mmap_base;
+
+	if (test_thread_flag(TIF_IA32) == compat) /* nothing to do */
+		return 0;
+
+	if (compat && !task_mappings_compatible())
+		return -EFAULT;
+
+	/*
+	 * We can't just remap vdso to needed location:
+	 * vdso compatible and native images differs
+	 */
+	old_vdso_base = unmap_vdso();
+
+	if (compat)
+		set_personality_ia32(false);
+	else
+		set_personality_64bit();
+
+	/*
+	 * Update mmap_base & get_unmapped_area helper, side effect:
+	 * one may change get_unmapped_area or mmap_base with personality()
+	 * or switching to and fro compatible mode
+	 */
+	arch_pick_mmap_layout(current->mm);
+
+	ret = map_vdso(compat, addr);
+	if (ret) {
+		current->mm->mmap_base = old_mmap_base;
+		if (compat)
+			set_personality_64bit();
+		else
+			set_personality_ia32(false);
+		WARN_ON(map_vdso(!compat, old_vdso_base));
+	}
+
+	return ret;
+}
+#endif
+
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 {
 	int ret = 0;
@@ -592,6 +670,15 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		break;
 	}
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+	case ARCH_SET_COMPAT:
+		return do_set_personality(true, addr);
+	case ARCH_SET_NATIVE:
+		return do_set_personality(false, addr);
+	case ARCH_GET_PERSONALITY:
+		return test_thread_flag(TIF_IA32);
+#endif
+
 	default:
 		ret = -EINVAL;
 		break;
-- 
2.7.4

  reply	other threads:[~2016-04-06 16:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-06 16:29 [PATCH 0/2] x86: add arch_prctl to switch between native/compat modes Dmitry Safonov
2016-04-06 16:29 ` Dmitry Safonov [this message]
2016-04-06 18:04   ` [PATCH 1/2] x86/arch_prctl: add ARCH_SET_{COMPAT,NATIVE} to change compatible mode Andy Lutomirski
2016-04-06 18:49     ` Andy Lutomirski
2016-04-07 12:11     ` Dmitry Safonov
2016-04-07 12:21       ` Cyrill Gorcunov
2016-04-07 12:35         ` Dmitry Safonov
2016-04-07 14:39       ` Andy Lutomirski
2016-04-07 15:18         ` Dmitry Safonov
2016-04-08 13:50         ` Dmitry Safonov
2016-04-08 15:56           ` Andy Lutomirski
2016-04-08 16:18             ` Dmitry Safonov
2016-04-08 20:44               ` Andy Lutomirski
2016-04-09  8:06                 ` Dmitry Safonov
2016-04-13 16:55                 ` Dmitry Safonov
2016-04-14 18:27                   ` Andy Lutomirski
2016-04-20 11:04                     ` Peter Zijlstra
2016-04-20 15:40                       ` Andy Lutomirski
2016-04-20 19:05                         ` Peter Zijlstra
2016-04-21 19:39                           ` Andy Lutomirski
2016-04-21 20:12                             ` Peter Zijlstra
2016-04-21 23:27                               ` Andy Lutomirski
2016-04-21 23:46                                 ` Andy Lutomirski
2016-04-25 15:16                                 ` Peter Zijlstra
2016-04-25 16:50                                   ` Andy Lutomirski
2016-04-06 16:29 ` [PATCH 2/2] x86/tools/testing: add test for ARCH_SET_COMPAT Dmitry Safonov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1459960170-4454-2-git-send-email-dsafonov@virtuozzo.com \
    --to=dsafonov@virtuozzo.com \
    --cc=0x7f454c46@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=khorenko@virtuozzo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=shuahkh@osg.samsung.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.