linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/vdso: add mremap hook to vm_special_mapping
@ 2016-04-11 15:22 Dmitry Safonov
  2016-04-11 15:41 ` kbuild test robot
                   ` (5 more replies)
  0 siblings, 6 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-11 15:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
 arch/x86/entry/vdso/vma.c | 33 ++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..08ac59907cde 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,26 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *new_vma)
+{
+	struct pt_regs *regs = current_pt_regs();
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	/* Fixing userspace landing - look at do_fast_syscall_32 */
+	if (regs->ip == (unsigned long)current->mm->context.vdso +
+			vdso_image_32.sym_int80_landing_pad
+#ifdef CONFIG_IA32_EMULATION
+		&& current_thread_info()->status & TS_COMPAT
+#endif
+	   )
+		regs->ip = new_vma->vm_start +
+			vdso_image_32.sym_int80_landing_pad;
+#endif
+	new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +179,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +218,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
@ 2016-04-11 15:41 ` kbuild test robot
  2016-04-11 15:54   ` Dmitry Safonov
  2016-04-14 16:32 ` [PATCHv2] " Dmitry Safonov
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 56+ messages in thread
From: kbuild test robot @ 2016-04-11 15:41 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: kbuild-all, linux-kernel, luto, tglx, mingo, hpa, x86, akpm,
	linux-mm, 0x7f454c46, Dmitry Safonov

[-- Attachment #1: Type: text/plain, Size: 1980 bytes --]

Hi Dmitry,

[auto build test WARNING on v4.6-rc3]
[also build test WARNING on next-20160411]
[cannot apply to tip/x86/vdso luto/next]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Dmitry-Safonov/x86-vdso-add-mremap-hook-to-vm_special_mapping/20160411-232653
config: x86_64-randconfig-x000-201615 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   arch/x86/entry/vdso/vma.c: In function 'vdso_mremap':
>> arch/x86/entry/vdso/vma.c:105:18: warning: unused variable 'regs' [-Wunused-variable]
     struct pt_regs *regs = current_pt_regs();
                     ^

vim +/regs +105 arch/x86/entry/vdso/vma.c

    89	static int vdso_fault(const struct vm_special_mapping *sm,
    90			      struct vm_area_struct *vma, struct vm_fault *vmf)
    91	{
    92		const struct vdso_image *image = vma->vm_mm->context.vdso_image;
    93	
    94		if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
    95			return VM_FAULT_SIGBUS;
    96	
    97		vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
    98		get_page(vmf->page);
    99		return 0;
   100	}
   101	
   102	static int vdso_mremap(const struct vm_special_mapping *sm,
   103			      struct vm_area_struct *new_vma)
   104	{
 > 105		struct pt_regs *regs = current_pt_regs();
   106	
   107	#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
   108		/* Fixing userspace landing - look at do_fast_syscall_32 */
   109		if (regs->ip == (unsigned long)current->mm->context.vdso +
   110				vdso_image_32.sym_int80_landing_pad
   111	#ifdef CONFIG_IA32_EMULATION
   112			&& current_thread_info()->status & TS_COMPAT
   113	#endif

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 26908 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:41 ` kbuild test robot
@ 2016-04-11 15:54   ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-11 15:54 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, linux-kernel, luto, tglx, mingo, hpa, x86, akpm,
	linux-mm, 0x7f454c46

On 04/11/2016 06:41 PM, kbuild test robot wrote:
> Hi Dmitry,
>
> [auto build test WARNING on v4.6-rc3]
> [also build test WARNING on next-20160411]
> [cannot apply to tip/x86/vdso luto/next]
> [if your patch is applied to the wrong git tree, please drop us a note to help improving the system]
>
> url:    https://github.com/0day-ci/linux/commits/Dmitry-Safonov/x86-vdso-add-mremap-hook-to-vm_special_mapping/20160411-232653
> config: x86_64-randconfig-x000-201615 (attached as .config)
> reproduce:
>          # save the attached .config to linux build tree
>          make ARCH=x86_64
>
> All warnings (new ones prefixed by >>):
>
>     arch/x86/entry/vdso/vma.c: In function 'vdso_mremap':
>>> arch/x86/entry/vdso/vma.c:105:18: warning: unused variable 'regs' [-Wunused-variable]
>       struct pt_regs *regs = current_pt_regs();
>                       ^
Thanks, it should go with this:
--->8---
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 08ac59907cde..7e261e2554c8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -102,7 +102,7 @@ static int vdso_fault(const struct 
vm_special_mapping *sm,
  static int vdso_mremap(const struct vm_special_mapping *sm,
                struct vm_area_struct *new_vma)
  {
-    struct pt_regs *regs = current_pt_regs();
+    struct pt_regs __maybe_unused *regs = current_pt_regs();

  #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
      /* Fixing userspace landing - look at do_fast_syscall_32 */

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-11 15:41 ` kbuild test robot
@ 2016-04-14 16:32 ` Dmitry Safonov
  2016-04-14 22:58   ` Andy Lutomirski
  2016-04-15 13:20 ` [PATCHv3 1/2] " Dmitry Safonov
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-14 16:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in 32-bit glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 33 ++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..7e261e2554c8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,26 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *new_vma)
+{
+	struct pt_regs __maybe_unused *regs = current_pt_regs();
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	/* Fixing userspace landing - look at do_fast_syscall_32 */
+	if (regs->ip == (unsigned long)current->mm->context.vdso +
+			vdso_image_32.sym_int80_landing_pad
+#ifdef CONFIG_IA32_EMULATION
+		&& current_thread_info()->status & TS_COMPAT
+#endif
+	   )
+		regs->ip = new_vma->vm_start +
+			vdso_image_32.sym_int80_landing_pad;
+#endif
+	new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +179,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +218,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-14 16:32 ` [PATCHv2] " Dmitry Safonov
@ 2016-04-14 22:58   ` Andy Lutomirski
  2016-04-15  8:46     ` Dmitry Safonov
  2016-04-15  9:18     ` Ingo Molnar
  0 siblings, 2 replies; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-14 22:58 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On Thu, Apr 14, 2016 at 9:32 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Add possibility for userspace 32-bit applications to move
> vdso mapping. Previously, when userspace app called
> mremap for vdso, in return path it would land on previous
> address of vdso page, resulting in segmentation violation.
> Now it lands fine and returns to userspace with remapped vdso.
> This will also fix context.vdso pointer for 64-bit, which does not
> affect the user of vdso after mremap by now, but this may change.
>
> Renamed and moved text_mapping structure declaration inside
> map_vdso, as it used only there and now it complement
> vvar_mapping variable.
>
> There is still problem for remapping vdso in 32-bit glibc applications:
> linker relocates addresses for syscalls on vdso page, so
> you need to relink with the new addresses. Or the next syscall
> through glibc may fail:
>   Program received signal SIGSEGV, Segmentation fault.
>   #0  0xf7fd9b80 in __kernel_vsyscall ()
>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v2: added __maybe_unused for pt_regs in vdso_mremap
>
>  arch/x86/entry/vdso/vma.c | 33 ++++++++++++++++++++++++++++-----
>  include/linux/mm_types.h  |  3 +++
>  mm/mmap.c                 | 10 ++++++++++
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 10f704584922..7e261e2554c8 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -12,6 +12,7 @@
>  #include <linux/random.h>
>  #include <linux/elf.h>
>  #include <linux/cpu.h>
> +#include <linux/ptrace.h>
>  #include <asm/pvclock.h>
>  #include <asm/vgtod.h>
>  #include <asm/proto.h>
> @@ -98,10 +99,26 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>         return 0;
>  }
>
> -static const struct vm_special_mapping text_mapping = {
> -       .name = "[vdso]",
> -       .fault = vdso_fault,
> -};
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +                     struct vm_area_struct *new_vma)
> +{
> +       struct pt_regs __maybe_unused *regs = current_pt_regs();
> +
> +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
> +       /* Fixing userspace landing - look at do_fast_syscall_32 */
> +       if (regs->ip == (unsigned long)current->mm->context.vdso +
> +                       vdso_image_32.sym_int80_landing_pad
> +#ifdef CONFIG_IA32_EMULATION
> +               && current_thread_info()->status & TS_COMPAT
> +#endif

Instead of ifdef, use the (grossly misnamed) is_ia32_task() helper for
this, please.

> +          )
> +               regs->ip = new_vma->vm_start +
> +                       vdso_image_32.sym_int80_landing_pad;
> +#endif
> +       new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;

Can you arrange for the mremap call to fail if the old mapping gets
split?  This might be as simple as confirming that the new mapping's
length is what we expect it to be and, if it isn't, returning -EINVAL.

If anyone things that might break some existing application (which is
quite unlikely), then we could allow mremap to succeed but skip the
part where we change context.vdso and rip.

> +
> +       return 0;
> +}
>
>  static int vvar_fault(const struct vm_special_mapping *sm,
>                       struct vm_area_struct *vma, struct vm_fault *vmf)
> @@ -162,6 +179,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>         struct vm_area_struct *vma;
>         unsigned long addr, text_start;
>         int ret = 0;
> +
> +       static const struct vm_special_mapping vdso_mapping = {
> +               .name = "[vdso]",
> +               .fault = vdso_fault,
> +               .mremap = vdso_mremap,
> +       };

Why did you add this instead of modifying text_mapping?

--Andy

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-14 22:58   ` Andy Lutomirski
@ 2016-04-15  8:46     ` Dmitry Safonov
  2016-04-15  9:18     ` Ingo Molnar
  1 sibling, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15  8:46 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/15/2016 01:58 AM, Andy Lutomirski wrote:
> On Thu, Apr 14, 2016 at 9:32 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
>> Add possibility for userspace 32-bit applications to move
>> vdso mapping. Previously, when userspace app called
>> mremap for vdso, in return path it would land on previous
>> address of vdso page, resulting in segmentation violation.
>> Now it lands fine and returns to userspace with remapped vdso.
>> This will also fix context.vdso pointer for 64-bit, which does not
>> affect the user of vdso after mremap by now, but this may change.
>>
>> Renamed and moved text_mapping structure declaration inside
>> map_vdso, as it used only there and now it complement
>> vvar_mapping variable.
>>
>> There is still problem for remapping vdso in 32-bit glibc applications:
>> linker relocates addresses for syscalls on vdso page, so
>> you need to relink with the new addresses. Or the next syscall
>> through glibc may fail:
>>    Program received signal SIGSEGV, Segmentation fault.
>>    #0  0xf7fd9b80 in __kernel_vsyscall ()
>>    #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>>
>> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
>> ---
>> v2: added __maybe_unused for pt_regs in vdso_mremap
>>
>>   arch/x86/entry/vdso/vma.c | 33 ++++++++++++++++++++++++++++-----
>>   include/linux/mm_types.h  |  3 +++
>>   mm/mmap.c                 | 10 ++++++++++
>>   3 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
>> index 10f704584922..7e261e2554c8 100644
>> --- a/arch/x86/entry/vdso/vma.c
>> +++ b/arch/x86/entry/vdso/vma.c
>> @@ -12,6 +12,7 @@
>>   #include <linux/random.h>
>>   #include <linux/elf.h>
>>   #include <linux/cpu.h>
>> +#include <linux/ptrace.h>
>>   #include <asm/pvclock.h>
>>   #include <asm/vgtod.h>
>>   #include <asm/proto.h>
>> @@ -98,10 +99,26 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>>          return 0;
>>   }
>>
>> -static const struct vm_special_mapping text_mapping = {
>> -       .name = "[vdso]",
>> -       .fault = vdso_fault,
>> -};
>> +static int vdso_mremap(const struct vm_special_mapping *sm,
>> +                     struct vm_area_struct *new_vma)
>> +{
>> +       struct pt_regs __maybe_unused *regs = current_pt_regs();
>> +
>> +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
>> +       /* Fixing userspace landing - look at do_fast_syscall_32 */
>> +       if (regs->ip == (unsigned long)current->mm->context.vdso +
>> +                       vdso_image_32.sym_int80_landing_pad
>> +#ifdef CONFIG_IA32_EMULATION
>> +               && current_thread_info()->status & TS_COMPAT
>> +#endif
> Instead of ifdef, use the (grossly misnamed) is_ia32_task() helper for
> this, please.
Thanks, will do
>
>> +          )
>> +               regs->ip = new_vma->vm_start +
>> +                       vdso_image_32.sym_int80_landing_pad;
>> +#endif
>> +       new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;
> Can you arrange for the mremap call to fail if the old mapping gets
> split?  This might be as simple as confirming that the new mapping's
> length is what we expect it to be and, if it isn't, returning -EINVAL.
Sure.
>
> If anyone things that might break some existing application (which is
> quite unlikely), then we could allow mremap to succeed but skip the
> part where we change context.vdso and rip.
>
>> +
>> +       return 0;
>> +}
>>
>>   static int vvar_fault(const struct vm_special_mapping *sm,
>>                        struct vm_area_struct *vma, struct vm_fault *vmf)
>> @@ -162,6 +179,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>>          struct vm_area_struct *vma;
>>          unsigned long addr, text_start;
>>          int ret = 0;
>> +
>> +       static const struct vm_special_mapping vdso_mapping = {
>> +               .name = "[vdso]",
>> +               .fault = vdso_fault,
>> +               .mremap = vdso_mremap,
>> +       };
> Why did you add this instead of modifying text_mapping?
I moved text_mapping inside map_vdso function, as it's used
only there. Then I thought that vdso_mapping is better
naming for it as it complement vvar_mapping (goes right after).
If it's necessary, I will preserve naming.
>
> --Andy


-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-14 22:58   ` Andy Lutomirski
  2016-04-15  8:46     ` Dmitry Safonov
@ 2016-04-15  9:18     ` Ingo Molnar
  2016-04-15  9:51       ` Dmitry Safonov
  1 sibling, 1 reply; 56+ messages in thread
From: Ingo Molnar @ 2016-04-15  9:18 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Dmitry Safonov, linux-kernel, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, X86 ML, Andrew Morton, linux-mm, Dmitry Safonov


* Andy Lutomirski <luto@amacapital.net> wrote:

> > +       if (regs->ip == (unsigned long)current->mm->context.vdso +
> > +                       vdso_image_32.sym_int80_landing_pad
> > +#ifdef CONFIG_IA32_EMULATION
> > +               && current_thread_info()->status & TS_COMPAT
> > +#endif
> 
> Instead of ifdef, use the (grossly misnamed) is_ia32_task() helper for
> this, please.

Please also let's do the rename.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-15  9:18     ` Ingo Molnar
@ 2016-04-15  9:51       ` Dmitry Safonov
  2016-04-15 12:08         ` Dmitry Safonov
  0 siblings, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15  9:51 UTC (permalink / raw)
  To: Ingo Molnar, Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/15/2016 12:18 PM, Ingo Molnar wrote:
> * Andy Lutomirski <luto@amacapital.net> wrote:
>> Instead of ifdef, use the (grossly misnamed) is_ia32_task() helper for
>> this, please.
> Please also let's do the rename.
Does `is_32bit_syscall` sounds right, or shall it be `is_32bit_task`?
I think, `is_compat_task` will be bad-named for X86_32 host.

-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-15  9:51       ` Dmitry Safonov
@ 2016-04-15 12:08         ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 12:08 UTC (permalink / raw)
  To: Ingo Molnar, Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/15/2016 12:51 PM, Dmitry Safonov wrote:
> On 04/15/2016 12:18 PM, Ingo Molnar wrote:
>> * Andy Lutomirski <luto@amacapital.net> wrote:
>>> Instead of ifdef, use the (grossly misnamed) is_ia32_task() helper for
>>> this, please.
>> Please also let's do the rename.
> Does `is_32bit_syscall` sounds right, or shall it be `is_32bit_task`?
> I think, `is_compat_task` will be bad-named for X86_32 host.
>
Or maybe, better:
is_x32_task => in_x32_syscall
is_ia32_task => in_ia32_syscall
as existing in_compat_syscall().

Looks good?

-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCHv3 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-11 15:41 ` kbuild test robot
  2016-04-14 16:32 ` [PATCHv2] " Dmitry Safonov
@ 2016-04-15 13:20 ` Dmitry Safonov
  2016-04-15 13:20   ` [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
  2016-04-15 13:53   ` [PATCHv3 1/2] x86/vdso: add mremap hook to vm_special_mapping kbuild test robot
  2016-04-15 14:12 ` [PATCHv4 " Dmitry Safonov
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 13:20 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 36 +++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..8510b1b55b21 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,29 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *new_vma)
+{
+	struct pt_regs __maybe_unused *regs = current_pt_regs();
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (is_ia32_task()) {
+		unsigned long vdso_land = vdso_image_32.sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+	new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +221,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall
  2016-04-15 13:20 ` [PATCHv3 1/2] " Dmitry Safonov
@ 2016-04-15 13:20   ` Dmitry Safonov
  2016-04-15 16:52     ` Andy Lutomirski
  2016-04-15 13:53   ` [PATCHv3 1/2] x86/vdso: add mremap hook to vm_special_mapping kbuild test robot
  1 sibling, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 13:20 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Impact: clearify meaning

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v3: initial patch

 arch/x86/entry/common.c            | 2 +-
 arch/x86/entry/vdso/vma.c          | 2 +-
 arch/x86/include/asm/compat.h      | 4 ++--
 arch/x86/include/asm/thread_info.h | 2 +-
 arch/x86/kernel/process_64.c       | 2 +-
 arch/x86/kernel/ptrace.c           | 2 +-
 arch/x86/kernel/signal.c           | 2 +-
 arch/x86/kernel/uprobes.c          | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d44ecd..ec138e538c44 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 	if (phase1_result == 0)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8510b1b55b21..0b861fc274b6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -109,7 +109,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	if (image->size != new_size)
 		return -EINVAL;
 
-	if (is_ia32_task()) {
+	if (in_ia32_syscall()) {
 		unsigned long vdso_land = vdso_image_32.sym_int80_landing_pad;
 		unsigned long old_land_addr = vdso_land +
 			(unsigned long)current->mm->context.vdso;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e1bbc7..5a3b2c119ed0 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 static inline bool in_compat_syscall(void)
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84df8a93..30c133ac05cd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
 	return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
 	return true;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..4a62ec457b56 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 		else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9cbb884..0f4d2a5df2dc 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..aa31265aa61d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6eaec8f..98b4dc87628b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 static inline int sizeof_long(void)
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv3 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-15 13:20 ` [PATCHv3 1/2] " Dmitry Safonov
  2016-04-15 13:20   ` [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
@ 2016-04-15 13:53   ` kbuild test robot
  1 sibling, 0 replies; 56+ messages in thread
From: kbuild test robot @ 2016-04-15 13:53 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: kbuild-all, linux-kernel, luto, tglx, mingo, hpa, x86, akpm,
	linux-mm, 0x7f454c46, Dmitry Safonov

[-- Attachment #1: Type: text/plain, Size: 1518 bytes --]

Hi Dmitry,

[auto build test ERROR on v4.6-rc3]
[also build test ERROR on next-20160415]
[cannot apply to tip/x86/core tip/x86/vdso]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Dmitry-Safonov/x86-vdso-add-mremap-hook-to-vm_special_mapping/20160415-212414
config: x86_64-randconfig-x011-201615 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   arch/x86/entry/vdso/vma.c: In function 'vdso_mremap':
>> arch/x86/entry/vdso/vma.c:113:29: error: 'vdso_image_32' undeclared (first use in this function)
      unsigned long vdso_land = vdso_image_32.sym_int80_landing_pad;
                                ^
   arch/x86/entry/vdso/vma.c:113:29: note: each undeclared identifier is reported only once for each function it appears in

vim +/vdso_image_32 +113 arch/x86/entry/vdso/vma.c

   107		const struct vdso_image *image = current->mm->context.vdso_image;
   108	
   109		if (image->size != new_size)
   110			return -EINVAL;
   111	
   112		if (is_ia32_task()) {
 > 113			unsigned long vdso_land = vdso_image_32.sym_int80_landing_pad;
   114			unsigned long old_land_addr = vdso_land +
   115				(unsigned long)current->mm->context.vdso;
   116	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 32453 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
                   ` (2 preceding siblings ...)
  2016-04-15 13:20 ` [PATCHv3 1/2] " Dmitry Safonov
@ 2016-04-15 14:12 ` Dmitry Safonov
  2016-04-15 14:12   ` [PATCHv4 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
  2016-04-15 16:58   ` [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
  2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
  2016-04-25 11:37 ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  5 siblings, 2 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 14:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 36 +++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..d26517f3f88f 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,29 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (is_ia32_task()) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+	new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +221,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv4 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall
  2016-04-15 14:12 ` [PATCHv4 " Dmitry Safonov
@ 2016-04-15 14:12   ` Dmitry Safonov
  2016-04-15 16:58   ` [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
  1 sibling, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 14:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Impact: clearify meaning

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v3: initial patch

 arch/x86/entry/common.c            | 2 +-
 arch/x86/entry/vdso/vma.c          | 2 +-
 arch/x86/include/asm/compat.h      | 4 ++--
 arch/x86/include/asm/thread_info.h | 2 +-
 arch/x86/kernel/process_64.c       | 2 +-
 arch/x86/kernel/ptrace.c           | 2 +-
 arch/x86/kernel/signal.c           | 2 +-
 arch/x86/kernel/uprobes.c          | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d44ecd..ec138e538c44 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 	if (phase1_result == 0)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index d26517f3f88f..2c2cc6bc77fa 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -108,7 +108,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	if (image->size != new_size)
 		return -EINVAL;
 
-	if (is_ia32_task()) {
+	if (in_ia32_syscall()) {
 		struct pt_regs *regs = current_pt_regs();
 		unsigned long vdso_land = image->sym_int80_landing_pad;
 		unsigned long old_land_addr = vdso_land +
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e1bbc7..5a3b2c119ed0 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 static inline bool in_compat_syscall(void)
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84df8a93..30c133ac05cd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
 	return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
 	return true;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..4a62ec457b56 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 		else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9cbb884..0f4d2a5df2dc 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..aa31265aa61d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6eaec8f..98b4dc87628b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 static inline int sizeof_long(void)
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall
  2016-04-15 13:20   ` [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
@ 2016-04-15 16:52     ` Andy Lutomirski
  2016-04-15 16:55       ` Dmitry Safonov
  0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-15 16:52 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On Fri, Apr 15, 2016 at 6:20 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Impact: clearify meaning
>
> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> Suggested-by: Ingo Molnar <mingo@kernel.org>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>

Acked-by: Andy Lutomirski <luto@kernel.org>

But if you resubmit, please consider making this patch 1 so Ingo can
apply it directly.

--Andy

> ---
> v3: initial patch
>
>  arch/x86/entry/common.c            | 2 +-
>  arch/x86/entry/vdso/vma.c          | 2 +-
>  arch/x86/include/asm/compat.h      | 4 ++--
>  arch/x86/include/asm/thread_info.h | 2 +-
>  arch/x86/kernel/process_64.c       | 2 +-
>  arch/x86/kernel/ptrace.c           | 2 +-
>  arch/x86/kernel/signal.c           | 2 +-
>  arch/x86/kernel/uprobes.c          | 2 +-
>  8 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index e79d93d44ecd..ec138e538c44 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
>
>  long syscall_trace_enter(struct pt_regs *regs)
>  {
> -       u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
> +       u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
>         unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
>
>         if (phase1_result == 0)
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 8510b1b55b21..0b861fc274b6 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -109,7 +109,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
>         if (image->size != new_size)
>                 return -EINVAL;
>
> -       if (is_ia32_task()) {
> +       if (in_ia32_syscall()) {
>                 unsigned long vdso_land = vdso_image_32.sym_int80_landing_pad;
>                 unsigned long old_land_addr = vdso_land +
>                         (unsigned long)current->mm->context.vdso;
> diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
> index ebb102e1bbc7..5a3b2c119ed0 100644
> --- a/arch/x86/include/asm/compat.h
> +++ b/arch/x86/include/asm/compat.h
> @@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
>         return (void __user *)round_down(sp - len, 16);
>  }
>
> -static inline bool is_x32_task(void)
> +static inline bool in_x32_syscall(void)
>  {
>  #ifdef CONFIG_X86_X32_ABI
>         if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
> @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>
>  static inline bool in_compat_syscall(void)
>  {
> -       return is_ia32_task() || is_x32_task();
> +       return in_ia32_syscall() || in_x32_syscall();
>  }
>  #define in_compat_syscall in_compat_syscall    /* override the generic impl */
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index ffae84df8a93..30c133ac05cd 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
>         return true;
>  }
>
> -static inline bool is_ia32_task(void)
> +static inline bool in_ia32_syscall(void)
>  {
>  #ifdef CONFIG_X86_32
>         return true;
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 6cbab31ac23a..4a62ec457b56 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
>          */
>         if (clone_flags & CLONE_SETTLS) {
>  #ifdef CONFIG_IA32_EMULATION
> -               if (is_ia32_task())
> +               if (in_ia32_syscall())
>                         err = do_set_thread_area(p, -1,
>                                 (struct user_desc __user *)tls, 0);
>                 else
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index 32e9d9cbb884..0f4d2a5df2dc 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
>                         compat_ulong_t caddr, compat_ulong_t cdata)
>  {
>  #ifdef CONFIG_X86_X32_ABI
> -       if (!is_ia32_task())
> +       if (!in_ia32_syscall())
>                 return x32_arch_ptrace(child, request, caddr, cdata);
>  #endif
>  #ifdef CONFIG_IA32_EMULATION
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index 548ddf7d6fd2..aa31265aa61d 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
>  static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
>  {
>  #ifdef CONFIG_X86_64
> -       if (is_ia32_task())
> +       if (in_ia32_syscall())
>                 return __NR_ia32_restart_syscall;
>  #endif
>  #ifdef CONFIG_X86_X32_ABI
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> index bf4db6eaec8f..98b4dc87628b 100644
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c
> @@ -516,7 +516,7 @@ struct uprobe_xol_ops {
>
>  static inline int sizeof_long(void)
>  {
> -       return is_ia32_task() ? 4 : 8;
> +       return in_ia32_syscall() ? 4 : 8;
>  }
>
>  static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
> --
> 2.8.0
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall
  2016-04-15 16:52     ` Andy Lutomirski
@ 2016-04-15 16:55       ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-15 16:55 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/15/2016 07:52 PM, Andy Lutomirski wrote:
> Acked-by: Andy Lutomirski <luto@kernel.org>
>
> But if you resubmit, please consider making this patch 1 so Ingo can
> apply it directly.
I resubmitted it already :-[
https://lkml.org/lkml/2016/4/15/431

If there will be v5 version, I'll submit this first.
Thanks!

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-15 14:12 ` [PATCHv4 " Dmitry Safonov
  2016-04-15 14:12   ` [PATCHv4 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
@ 2016-04-15 16:58   ` Andy Lutomirski
  2016-04-18 11:18     ` Dmitry Safonov
  1 sibling, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-15 16:58 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On Fri, Apr 15, 2016 at 7:12 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Add possibility for userspace 32-bit applications to move
> vdso mapping. Previously, when userspace app called
> mremap for vdso, in return path it would land on previous
> address of vdso page, resulting in segmentation violation.
> Now it lands fine and returns to userspace with remapped vdso.
> This will also fix context.vdso pointer for 64-bit, which does not
> affect the user of vdso after mremap by now, but this may change.
>
> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>
> Renamed and moved text_mapping structure declaration inside
> map_vdso, as it used only there and now it complement
> vvar_mapping variable.
>
> There is still problem for remapping vdso in glibc applications:
> linker relocates addresses for syscalls on vdso page, so
> you need to relink with the new addresses. Or the next syscall
> through glibc may fail:
>   Program received signal SIGSEGV, Segmentation fault.
>   #0  0xf7fd9b80 in __kernel_vsyscall ()
>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
> v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
>     used is_ia32_task instead of ifdefs
> v2: added __maybe_unused for pt_regs in vdso_mremap
>
>  arch/x86/entry/vdso/vma.c | 36 +++++++++++++++++++++++++++++++-----
>  include/linux/mm_types.h  |  3 +++
>  mm/mmap.c                 | 10 ++++++++++
>  3 files changed, 44 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 10f704584922..d26517f3f88f 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -12,6 +12,7 @@
>  #include <linux/random.h>
>  #include <linux/elf.h>
>  #include <linux/cpu.h>
> +#include <linux/ptrace.h>
>  #include <asm/pvclock.h>
>  #include <asm/vgtod.h>
>  #include <asm/proto.h>
> @@ -98,10 +99,29 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>         return 0;
>  }
>
> -static const struct vm_special_mapping text_mapping = {
> -       .name = "[vdso]",
> -       .fault = vdso_fault,
> -};

Ah, this is what I missed last time.  Looks good.

> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +                     struct vm_area_struct *new_vma)
> +{
> +       unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
> +       const struct vdso_image *image = current->mm->context.vdso_image;
> +
> +       if (image->size != new_size)
> +               return -EINVAL;
> +
> +       if (is_ia32_task()) {
> +               struct pt_regs *regs = current_pt_regs();
> +               unsigned long vdso_land = image->sym_int80_landing_pad;
> +               unsigned long old_land_addr = vdso_land +
> +                       (unsigned long)current->mm->context.vdso;
> +
> +               /* Fixing userspace landing - look at do_fast_syscall_32 */
> +               if (regs->ip == old_land_addr)
> +                       regs->ip = new_vma->vm_start + vdso_land;
> +       }
> +       new_vma->vm_mm->context.vdso = (void __user *)new_vma->vm_start;

A couple minor things:

 - You're looking at both new_vma->vm_mm and current->mm.  Is there a
reason for that?  If they're different, I'd be quite surprised, but
maybe it would make sense to check.

 - On second thought, the is_ia32_task() check is a little weird given
that you're planning on making the vdso image type.  It might make
sense to change that to in_ia32_syscall() && image == &vdso_image_32.

Other than that, looks good to me.

You could add a really simple test case to selftests/x86:

mremap(the vdso, somewhere else);
asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));

That'll segfault if this fails and it'll work and return 0 if it works.

FWIW, there's one respect in which this code could be problematic down
the road: if syscalls ever start needing the vvar page, then this gets
awkward because you can't remap both at once.  Also, this is
fundamentally racy if multiple threads try to use it (but there's
nothing whatsoever the kernel could do about that).  In general, once
the call to change and relocate the vdso gets written, CRIU should
probably prefer to use it over mremap.

--Andy

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-15 16:58   ` [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
@ 2016-04-18 11:18     ` Dmitry Safonov
  2016-04-18 15:37       ` Andy Lutomirski
  0 siblings, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 11:18 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/15/2016 07:58 PM, Andy Lutomirski wrote:
> A couple minor things:
>
>   - You're looking at both new_vma->vm_mm and current->mm.  Is there a
> reason for that?  If they're different, I'd be quite surprised, but
> maybe it would make sense to check.

Ok, will add a check.

>   - On second thought, the is_ia32_task() check is a little weird given
> that you're planning on making the vdso image type.  It might make
> sense to change that to in_ia32_syscall() && image == &vdso_image_32.

Yes, we might be there remapping vdso_image_64 through int80, where
we shouldn't change landing. Thanks, will add a check.

> Other than that, looks good to me.
>
> You could add a really simple test case to selftests/x86:
>
> mremap(the vdso, somewhere else);
> asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));
>
> That'll segfault if this fails and it'll work and return 0 if it works.

Will add - for now I have tested this with kind the same program.

> FWIW, there's one respect in which this code could be problematic down
> the road: if syscalls ever start needing the vvar page, then this gets
> awkward because you can't remap both at once.  Also, this is
> fundamentally racy if multiple threads try to use it (but there's
> nothing whatsoever the kernel could do about that).  In general, once
> the call to change and relocate the vdso gets written, CRIU should
> probably prefer to use it over mremap.

Yes, but from my point of view, for the other reasons:
- on restore stage of CRIU, restorer maps VMAs that were dumped
on dump stage.
- this is done in one thread, as other threads may need those VMAs
to funciton.
- one of vmas, being restored is vDSO (which also was dumped), so
there is image for this blob.

So, ideally, I even would not need such API to remap blobs
from 64 to 32 (or backwards). This is ideally for other applications
that switches their mode. For CRIU *ideally* I do not need it, as
I have this vma's image dumped before - I only need remap to
fix contex.vdso pointer for proper landing and I'm doing it in
one thread.

But, in the practice, one may migrate application from one
kernel to another. And for different kernel versions, there may
be different vDSO entries. For now (before compatible C/R)
we have checked if vDSO differ and if so, examine this different
vDSO symbols and add jump trampolines on places where
were entries in previous vDSO to a new one.
So, this is also true for 32-bit vDSO blob. That's why I need
this API for CRIU.

-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
                   ` (3 preceding siblings ...)
  2016-04-15 14:12 ` [PATCHv4 " Dmitry Safonov
@ 2016-04-18 13:43 ` Dmitry Safonov
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
                     ` (2 more replies)
  2016-04-25 11:37 ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  5 siblings, 3 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 13:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Impact: clearify meaning

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
---
v3: initial patch

 arch/x86/entry/common.c            | 2 +-
 arch/x86/include/asm/compat.h      | 4 ++--
 arch/x86/include/asm/thread_info.h | 2 +-
 arch/x86/kernel/process_64.c       | 2 +-
 arch/x86/kernel/ptrace.c           | 2 +-
 arch/x86/kernel/signal.c           | 2 +-
 arch/x86/kernel/uprobes.c          | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d44ecd..ec138e538c44 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 	if (phase1_result == 0)
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e1bbc7..5a3b2c119ed0 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 static inline bool in_compat_syscall(void)
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84df8a93..30c133ac05cd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
 	return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
 	return true;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..4a62ec457b56 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 		else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9cbb884..0f4d2a5df2dc 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..aa31265aa61d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6eaec8f..98b4dc87628b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 static inline int sizeof_long(void)
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
@ 2016-04-18 13:43   ` Dmitry Safonov
  2016-04-18 14:03     ` kbuild test robot
                       ` (3 more replies)
  2016-04-18 13:43   ` [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test Dmitry Safonov
  2016-04-19  9:34   ` [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() tip-bot for Dmitry Safonov
  2 siblings, 4 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 13:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
    the same, also check not only in_ia32_syscall() but image == &vdso_image_32
v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 39 ++++++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..ba133a9ab4f5 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,32 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (current->mm != new_vma->vm_mm)
+		return -EFAULT;
+
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +185,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +224,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test
  2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
@ 2016-04-18 13:43   ` Dmitry Safonov
  2016-04-21 20:01     ` Andy Lutomirski
  2016-04-19  9:34   ` [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() tip-bot for Dmitry Safonov
  2 siblings, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 13:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46,
	Dmitry Safonov, Shuah Khan, linux-kselftest

Should print on success:
[root@localhost ~]# ./test_mremap_vdso_32
	AT_SYSINFO_EHDR is 0xf773f000
[NOTE]	Moving vDSO: [f773f000, f7740000] -> [a000000, a001000]
[OK]
Or segfault if landing was bad (before patches):
[root@localhost ~]# ./test_mremap_vdso_32
	AT_SYSINFO_EHDR is 0xf774f000
[NOTE]	Moving vDSO: [f774f000, f7750000] -> [a000000, a001000]
Segmentation fault (core dumped)

Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: linux-kselftest@vger.kernel.org
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v5: initial version

 tools/testing/selftests/x86/Makefile           |  2 +-
 tools/testing/selftests/x86/test_mremap_vdso.c | 72 ++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/test_mremap_vdso.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b47ebd170690..c7162b511ab0 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -7,7 +7,7 @@ include ../lib.mk
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
 			check_initial_reg_state sigreturn ldt_gdt iopl
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
-			test_FCMOV test_FCOMI test_FISTTP \
+			test_FCMOV test_FCOMI test_FISTTP test_mremap_vdso \
 			vdso_restorer
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000000..a470790e2118
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,72 @@
+/*
+ * 32-bit test to check vdso mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+	printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+	return 0;
+}
+#else
+
+#define PAGE_SIZE	4096
+#define VDSO_SIZE	PAGE_SIZE
+
+int main(int argc, char **argv, char **envp)
+{
+	unsigned long vdso_addr, dest_addr;
+	void *new_addr;
+	const char *ok_string = "[OK]\n";
+
+	vdso_addr = getauxval(AT_SYSINFO_EHDR);
+	printf("\tAT_SYSINFO_EHDR is 0x%lx\n", vdso_addr);
+	if (!vdso_addr || vdso_addr == -ENOENT) {
+		printf("[FAIL]\tgetauxval failed\n");
+		return 1;
+	}
+
+	/* to low for stack, to high for lib/data/code mappings */
+	dest_addr = 0x0a000000;
+	printf("[NOTE]\tMoving vDSO: [%lx, %lx] -> [%lx, %lx]\n",
+		vdso_addr, vdso_addr + VDSO_SIZE,
+		dest_addr, dest_addr + VDSO_SIZE);
+	new_addr = mremap((void *)vdso_addr, VDSO_SIZE, VDSO_SIZE,
+			MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+	if ((unsigned long)new_addr == (unsigned long)-1) {
+		printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+		return 1;
+	}
+
+	asm volatile ("int $0x80" : : "a" (__NR_write), "b" (STDOUT_FILENO),
+			"c" (ok_string), "d" (strlen(ok_string)));
+	asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));
+
+	return 0;
+}
+#endif
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
@ 2016-04-18 14:03     ` kbuild test robot
  2016-04-18 14:17     ` [PATCHv6 " Dmitry Safonov
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 56+ messages in thread
From: kbuild test robot @ 2016-04-18 14:03 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: kbuild-all, linux-kernel, luto, tglx, mingo, hpa, x86, akpm,
	linux-mm, 0x7f454c46, Dmitry Safonov

[-- Attachment #1: Type: text/plain, Size: 6077 bytes --]

Hi Dmitry,

[auto build test WARNING on v4.6-rc4]
[also build test WARNING on next-20160418]
[cannot apply to tip/x86/core tip/x86/vdso]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Dmitry-Safonov/x86-rename-is_-ia32-x32-_task-to-in_-ia32-x32-_syscall/20160418-214656
config: x86_64-randconfig-x000-201616 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:4:0,
                    from arch/x86/include/asm/bug.h:35,
                    from include/linux/bug.h:4,
                    from include/linux/mmdebug.h:4,
                    from include/linux/mm.h:8,
                    from arch/x86/entry/vdso/vma.c:7:
   arch/x86/entry/vdso/vma.c: In function 'vdso_mremap':
   arch/x86/entry/vdso/vma.c:114:37: error: 'vdso_image_32' undeclared (first use in this function)
     if (in_ia32_syscall() && image == &vdso_image_32) {
                                        ^
   include/linux/compiler.h:151:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^
>> arch/x86/entry/vdso/vma.c:114:2: note: in expansion of macro 'if'
     if (in_ia32_syscall() && image == &vdso_image_32) {
     ^
   arch/x86/entry/vdso/vma.c:114:37: note: each undeclared identifier is reported only once for each function it appears in
     if (in_ia32_syscall() && image == &vdso_image_32) {
                                        ^
   include/linux/compiler.h:151:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^
>> arch/x86/entry/vdso/vma.c:114:2: note: in expansion of macro 'if'
     if (in_ia32_syscall() && image == &vdso_image_32) {
     ^

vim +/if +114 arch/x86/entry/vdso/vma.c

     1	/*
     2	 * Copyright 2007 Andi Kleen, SUSE Labs.
     3	 * Subject to the GPL, v.2
     4	 *
     5	 * This contains most of the x86 vDSO kernel-side code.
     6	 */
   > 7	#include <linux/mm.h>
     8	#include <linux/err.h>
     9	#include <linux/sched.h>
    10	#include <linux/slab.h>
    11	#include <linux/init.h>
    12	#include <linux/random.h>
    13	#include <linux/elf.h>
    14	#include <linux/cpu.h>
    15	#include <linux/ptrace.h>
    16	#include <asm/pvclock.h>
    17	#include <asm/vgtod.h>
    18	#include <asm/proto.h>
    19	#include <asm/vdso.h>
    20	#include <asm/vvar.h>
    21	#include <asm/page.h>
    22	#include <asm/hpet.h>
    23	#include <asm/desc.h>
    24	#include <asm/cpufeature.h>
    25	
    26	#if defined(CONFIG_X86_64)
    27	unsigned int __read_mostly vdso64_enabled = 1;
    28	#endif
    29	
    30	void __init init_vdso_image(const struct vdso_image *image)
    31	{
    32		BUG_ON(image->size % PAGE_SIZE != 0);
    33	
    34		apply_alternatives((struct alt_instr *)(image->data + image->alt),
    35				   (struct alt_instr *)(image->data + image->alt +
    36							image->alt_len));
    37	}
    38	
    39	struct linux_binprm;
    40	
    41	/*
    42	 * Put the vdso above the (randomized) stack with another randomized
    43	 * offset.  This way there is no hole in the middle of address space.
    44	 * To save memory make sure it is still in the same PTE as the stack
    45	 * top.  This doesn't give that many random bits.
    46	 *
    47	 * Note that this algorithm is imperfect: the distribution of the vdso
    48	 * start address within a PMD is biased toward the end.
    49	 *
    50	 * Only used for the 64-bit and x32 vdsos.
    51	 */
    52	static unsigned long vdso_addr(unsigned long start, unsigned len)
    53	{
    54	#ifdef CONFIG_X86_32
    55		return 0;
    56	#else
    57		unsigned long addr, end;
    58		unsigned offset;
    59	
    60		/*
    61		 * Round up the start address.  It can start out unaligned as a result
    62		 * of stack start randomization.
    63		 */
    64		start = PAGE_ALIGN(start);
    65	
    66		/* Round the lowest possible end address up to a PMD boundary. */
    67		end = (start + len + PMD_SIZE - 1) & PMD_MASK;
    68		if (end >= TASK_SIZE_MAX)
    69			end = TASK_SIZE_MAX;
    70		end -= len;
    71	
    72		if (end > start) {
    73			offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
    74			addr = start + (offset << PAGE_SHIFT);
    75		} else {
    76			addr = start;
    77		}
    78	
    79		/*
    80		 * Forcibly align the final address in case we have a hardware
    81		 * issue that requires alignment for performance reasons.
    82		 */
    83		addr = align_vdso_addr(addr);
    84	
    85		return addr;
    86	#endif
    87	}
    88	
    89	static int vdso_fault(const struct vm_special_mapping *sm,
    90			      struct vm_area_struct *vma, struct vm_fault *vmf)
    91	{
    92		const struct vdso_image *image = vma->vm_mm->context.vdso_image;
    93	
    94		if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
    95			return VM_FAULT_SIGBUS;
    96	
    97		vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
    98		get_page(vmf->page);
    99		return 0;
   100	}
   101	
   102	static int vdso_mremap(const struct vm_special_mapping *sm,
   103			      struct vm_area_struct *new_vma)
   104	{
   105		unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
   106		const struct vdso_image *image = current->mm->context.vdso_image;
   107	
   108		if (image->size != new_size)
   109			return -EINVAL;
   110	
   111		if (current->mm != new_vma->vm_mm)
   112			return -EFAULT;
   113	
 > 114		if (in_ia32_syscall() && image == &vdso_image_32) {
   115			struct pt_regs *regs = current_pt_regs();
   116			unsigned long vdso_land = image->sym_int80_landing_pad;
   117			unsigned long old_land_addr = vdso_land +

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 23963 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCHv6 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-18 14:03     ` kbuild test robot
@ 2016-04-18 14:17     ` Dmitry Safonov
  2016-04-18 14:23     ` [PATCHv7 " Dmitry Safonov
  2016-04-23 23:09     ` [PATCHv5 " kbuild test robot
  3 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 14:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
    with ifdefs around
v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
    the same, also check not only in_ia32_syscall() but image == &vdso_image_32
v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 49 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..3385586f86a6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,42 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma)
+{
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+}
+#else
+static void vdso_fix_landing(const struct vdso_image *image) {}
+#endif
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (current->mm != new_vma->vm_mm)
+		return -EFAULT;
+
+	vdso_fix_landing(image, new_vma);
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +195,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +234,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv7 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-18 14:03     ` kbuild test robot
  2016-04-18 14:17     ` [PATCHv6 " Dmitry Safonov
@ 2016-04-18 14:23     ` Dmitry Safonov
  2016-04-20 16:22       ` Dmitry Safonov
  2016-04-21 19:52       ` Andy Lutomirski
  2016-04-23 23:09     ` [PATCHv5 " kbuild test robot
  3 siblings, 2 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-18 14:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v7: that's just not my day: add new_vma parameter to vdso_fix_landing
    sorry for the noise
v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
    with ifdefs around
v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
    the same, also check not only in_ia32_syscall() but image == &vdso_image_32
v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 50 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..d94291a19b6e 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,43 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma)
+{
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+}
+#else
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma) {}
+#endif
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (current->mm != new_vma->vm_mm)
+		return -EFAULT;
+
+	vdso_fix_landing(image, new_vma);
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +196,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +235,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 11:18     ` Dmitry Safonov
@ 2016-04-18 15:37       ` Andy Lutomirski
  0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-18 15:37 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: Thomas Gleixner, Dmitry Safonov, Ingo Molnar, linux-mm, X86 ML,
	Andrew Morton, linux-kernel, H. Peter Anvin

On Apr 18, 2016 4:19 AM, "Dmitry Safonov" <dsafonov@virtuozzo.com> wrote:
>
> On 04/15/2016 07:58 PM, Andy Lutomirski wrote:
>>
>> A couple minor things:
>>
>>   - You're looking at both new_vma->vm_mm and current->mm.  Is there a
>> reason for that?  If they're different, I'd be quite surprised, but
>> maybe it would make sense to check.
>
>
> Ok, will add a check.
>
>
>>   - On second thought, the is_ia32_task() check is a little weird given
>> that you're planning on making the vdso image type.  It might make
>> sense to change that to in_ia32_syscall() && image == &vdso_image_32.
>
>
> Yes, we might be there remapping vdso_image_64 through int80, where
> we shouldn't change landing. Thanks, will add a check.
>
>
>> Other than that, looks good to me.
>>
>> You could add a really simple test case to selftests/x86:
>>
>> mremap(the vdso, somewhere else);
>> asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));
>>
>> That'll segfault if this fails and it'll work and return 0 if it works.
>
>
> Will add - for now I have tested this with kind the same program.
>
>
>> FWIW, there's one respect in which this code could be problematic down
>> the road: if syscalls ever start needing the vvar page, then this gets
>> awkward because you can't remap both at once.  Also, this is
>> fundamentally racy if multiple threads try to use it (but there's
>> nothing whatsoever the kernel could do about that).  In general, once
>> the call to change and relocate the vdso gets written, CRIU should
>> probably prefer to use it over mremap.
>
>
> Yes, but from my point of view, for the other reasons:
> - on restore stage of CRIU, restorer maps VMAs that were dumped
> on dump stage.
> - this is done in one thread, as other threads may need those VMAs
> to funciton.
> - one of vmas, being restored is vDSO (which also was dumped), so
> there is image for this blob.
>
> So, ideally, I even would not need such API to remap blobs
> from 64 to 32 (or backwards).

If I'm understanding you right, this won't work reliably.  The kernel
makes no promise that one kernel's vdso is compatible with another
kernel.  In fact, the internal interfaces have changed several times
and are changing again in 4.6.

You also need to put vvar at the correct offset from the text, and
vvar can't be dumped and restores at all, since the whole point is
that it changes at runtime.

> This is ideally for other applications
> that switches their mode. For CRIU *ideally* I do not need it, as
> I have this vma's image dumped before - I only need remap to
> fix contex.vdso pointer for proper landing and I'm doing it in
> one thread.
>
> But, in the practice, one may migrate application from one
> kernel to another. And for different kernel versions, there may
> be different vDSO entries. For now (before compatible C/R)
> we have checked if vDSO differ and if so, examine this different
> vDSO symbols and add jump trampolines on places where
> were entries in previous vDSO to a new one.
> So, this is also true for 32-bit vDSO blob. That's why I need
> this API for CRIU.

Understood.

I like the mremap support regardless of whether CRIU ends up using it long-term.

--Andy

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-18 13:43   ` [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test Dmitry Safonov
@ 2016-04-19  9:34   ` tip-bot for Dmitry Safonov
  2016-04-19 11:15     ` Ingo Molnar
  2018-10-18  1:47     ` in_compat_syscall() returns from kernel thread for X86_32 NeilBrown
  2 siblings, 2 replies; 56+ messages in thread
From: tip-bot for Dmitry Safonov @ 2016-04-19  9:34 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, dsafonov, luto, luto, dvlasenk, hpa, mingo, torvalds, bp,
	tglx, linux-kernel, brgerst

Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 19 Apr 2016 10:44:52 +0200

x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()

The is_ia32_task()/is_x32_task() function names are a big misnomer: they
suggests that the compat-ness of a system call is a task property, which
is not true, the compatness of a system call purely depends on how it
was invoked through the system call layer.

A task may call 32-bit and 64-bit and x32 system calls without changing
any of its kernel visible state.

This specific minomer is also actively dangerous, as it might cause kernel
developers to use the wrong kind of security checks within system calls.

So rename it to in_{ia32,x32}_syscall().

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
[ Expanded the changelog. ]
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: 0x7f454c46@gmail.com
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1460987025-30360-1-git-send-email-dsafonov@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/common.c            | 2 +-
 arch/x86/include/asm/compat.h      | 4 ++--
 arch/x86/include/asm/thread_info.h | 2 +-
 arch/x86/kernel/process_64.c       | 2 +-
 arch/x86/kernel/ptrace.c           | 2 +-
 arch/x86/kernel/signal.c           | 2 +-
 arch/x86/kernel/uprobes.c          | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d..ec138e5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 	if (phase1_result == 0)
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e..5a3b2c1 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 static inline bool in_compat_syscall(void)
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84d..30c133a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
 	return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
 	return true;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 50337ea..24d1b7f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 		else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9c..0f4d2a5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6408c09..2ebcc60 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6e..98b4dc8 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 static inline int sizeof_long(void)
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-19  9:34   ` [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() tip-bot for Dmitry Safonov
@ 2016-04-19 11:15     ` Ingo Molnar
  2016-04-19 11:35       ` Borislav Petkov
  2016-04-19 16:04       ` Andy Lutomirski
  2018-10-18  1:47     ` in_compat_syscall() returns from kernel thread for X86_32 NeilBrown
  1 sibling, 2 replies; 56+ messages in thread
From: Ingo Molnar @ 2016-04-19 11:15 UTC (permalink / raw)
  To: peterz, dsafonov, luto, hpa, dvlasenk, luto, torvalds, bp,
	brgerst, linux-kernel, tglx
  Cc: linux-tip-commits


* tip-bot for Dmitry Safonov <tipbot@zytor.com> wrote:

> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
> Committer:  Ingo Molnar <mingo@kernel.org>
> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
> 
> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()

Btw., I'm not _entirely_ happy about the 'IA32' name, but went with this name for 
lack of a better alternative.

So we have 4 system call modes:

 - 64-bit native
 - 32-bit addresses with 64-bit arguments (x32)
 - 32-bit compat syscall (x86-32 compatibility on x86-64)
 - 32-bit native

and we have 2 bits of data that are per system call properties:

 - TS_COMPAT in thread_info->status is set/cleared dynamically by the compat 
   syscall entry code

 - a high bit in pt_regs->orig_ax tells us whether it's an x32 system call.

So I'd suggest the following renames to harmonize these concepts:

 - CONFIG_IA32_EMULATION   => CONFIG_X86_32_ABI
   this lines up nicely with: CONFIG_X86_X32_ABI

 - is_ia32_syscall() -> is_x86_32_syscall()
 - is_x32_syscall()  -> is_x86_x32_syscall()

 - is_compat_syscall() remains as-is.

... thoughts?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-19 11:15     ` Ingo Molnar
@ 2016-04-19 11:35       ` Borislav Petkov
  2016-04-19 17:00         ` H. Peter Anvin
  2016-04-19 16:04       ` Andy Lutomirski
  1 sibling, 1 reply; 56+ messages in thread
From: Borislav Petkov @ 2016-04-19 11:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: peterz, dsafonov, luto, hpa, dvlasenk, luto, torvalds, brgerst,
	linux-kernel, tglx, linux-tip-commits

On Tue, Apr 19, 2016 at 01:15:30PM +0200, Ingo Molnar wrote:
> So I'd suggest the following renames to harmonize these concepts:
> 
>  - CONFIG_IA32_EMULATION   => CONFIG_X86_32_ABI
>    this lines up nicely with: CONFIG_X86_X32_ABI

Except that the only difference now is the "X" in the strings. So one
would need more coffee when staring at those. :)

> 
>  - is_ia32_syscall() -> is_x86_32_syscall()
>  - is_x32_syscall()  -> is_x86_x32_syscall()
> 
>  - is_compat_syscall() remains as-is.
> 
> ... thoughts?

Still, getting those names streamlined and logical is a step in the
right direction IMO.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply. Srsly.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-19 11:15     ` Ingo Molnar
  2016-04-19 11:35       ` Borislav Petkov
@ 2016-04-19 16:04       ` Andy Lutomirski
  2016-04-22  8:36         ` Ingo Molnar
  1 sibling, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-19 16:04 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Brian Gerst, linux-kernel, Thomas Gleixner, linux-tip-commits

On Tue, Apr 19, 2016 at 4:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * tip-bot for Dmitry Safonov <tipbot@zytor.com> wrote:
>
>> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
>> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
>> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
>> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
>> Committer:  Ingo Molnar <mingo@kernel.org>
>> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>>
>> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>
> Btw., I'm not _entirely_ happy about the 'IA32' name, but went with this name for
> lack of a better alternative.
>
> So we have 4 system call modes:
>
>  - 64-bit native
>  - 32-bit addresses with 64-bit arguments (x32)
>  - 32-bit compat syscall (x86-32 compatibility on x86-64)
>  - 32-bit native
>
> and we have 2 bits of data that are per system call properties:
>
>  - TS_COMPAT in thread_info->status is set/cleared dynamically by the compat
>    syscall entry code
>
>  - a high bit in pt_regs->orig_ax tells us whether it's an x32 system call.
>
> So I'd suggest the following renames to harmonize these concepts:
>
>  - CONFIG_IA32_EMULATION   => CONFIG_X86_32_ABI
>    this lines up nicely with: CONFIG_X86_X32_ABI

I think I'd prefer a different interpretation: CONFIG_X86_32_ABI is
set if CONFIG_IA32_EMULATION is set *or* CONFIG_X86_32 is set.  There
is a lot of code that manually looks for that, because what it
actually cares about is "do we support 32-bit syscalls".  Also, with
the syscall cleanups I've been doing, a lot of the code is shared
between native 32-bit and 32-on-64 compat, so the distinction between
those two modes is slowly shrinking.

in_ia32_syscall() is consistent with that idea: it returns true on
native 32-bit kernels.

--Andy

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-19 11:35       ` Borislav Petkov
@ 2016-04-19 17:00         ` H. Peter Anvin
  0 siblings, 0 replies; 56+ messages in thread
From: H. Peter Anvin @ 2016-04-19 17:00 UTC (permalink / raw)
  To: Borislav Petkov, Ingo Molnar
  Cc: peterz, dsafonov, luto, dvlasenk, luto, torvalds, brgerst,
	linux-kernel, tglx, linux-tip-commits

On April 19, 2016 4:35:01 AM PDT, Borislav Petkov <bp@alien8.de> wrote:
>On Tue, Apr 19, 2016 at 01:15:30PM +0200, Ingo Molnar wrote:
>> So I'd suggest the following renames to harmonize these concepts:
>> 
>>  - CONFIG_IA32_EMULATION   => CONFIG_X86_32_ABI
>>    this lines up nicely with: CONFIG_X86_X32_ABI
>
>Except that the only difference now is the "X" in the strings. So one
>would need more coffee when staring at those. :)
>
>> 
>>  - is_ia32_syscall() -> is_x86_32_syscall()
>>  - is_x32_syscall()  -> is_x86_x32_syscall()
>> 
>>  - is_compat_syscall() remains as-is.
>> 
>> ... thoughts?
>
>Still, getting those names streamlined and logical is a step in the
>right direction IMO.

Let's use the i386 name instead of x86-32; that's what we use elsewhere in the kernel and is much easier to tell apart.  Also, less likely to be caught up in "is this i386 only or *any* 32-bit call, including x32"?
-- 
Sent from my Android device with K-9 Mail. Please excuse brevity and formatting.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv7 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 14:23     ` [PATCHv7 " Dmitry Safonov
@ 2016-04-20 16:22       ` Dmitry Safonov
  2016-04-21 19:52       ` Andy Lutomirski
  1 sibling, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-20 16:22 UTC (permalink / raw)
  To: luto; +Cc: linux-kernel, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46

On 04/18/2016 05:23 PM, Dmitry Safonov wrote:
> Add possibility for userspace 32-bit applications to move
> vdso mapping. Previously, when userspace app called
> mremap for vdso, in return path it would land on previous
> address of vdso page, resulting in segmentation violation.
> Now it lands fine and returns to userspace with remapped vdso.
> This will also fix context.vdso pointer for 64-bit, which does not
> affect the user of vdso after mremap by now, but this may change.
>
> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>
> Renamed and moved text_mapping structure declaration inside
> map_vdso, as it used only there and now it complement
> vvar_mapping variable.
>
> There is still problem for remapping vdso in glibc applications:
> linker relocates addresses for syscalls on vdso page, so
> you need to relink with the new addresses. Or the next syscall
> through glibc may fail:
>    Program received signal SIGSEGV, Segmentation fault.
>    #0  0xf7fd9b80 in __kernel_vsyscall ()
>    #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>

Andy, can I have an ack from you for this version?
If it looks fine for you, of course.
And for v5 test (3/3 patch)? (I did not resed it, as it hasn't changed)

> ---
> v7: that's just not my day: add new_vma parameter to vdso_fix_landing
>      sorry for the noise
> v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
>      with ifdefs around
> v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
>      the same, also check not only in_ia32_syscall() but image == &vdso_image_32
> v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
> v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
>      used is_ia32_task instead of ifdefs
> v2: added __maybe_unused for pt_regs in vdso_mremap
>
>   arch/x86/entry/vdso/vma.c | 50 ++++++++++++++++++++++++++++++++++++++++++-----
>   include/linux/mm_types.h  |  3 +++
>   mm/mmap.c                 | 10 ++++++++++
>   3 files changed, 58 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 10f704584922..d94291a19b6e 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -12,6 +12,7 @@
>   #include <linux/random.h>
>   #include <linux/elf.h>
>   #include <linux/cpu.h>
> +#include <linux/ptrace.h>
>   #include <asm/pvclock.h>
>   #include <asm/vgtod.h>
>   #include <asm/proto.h>
> @@ -98,10 +99,43 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>   	return 0;
>   }
>   
> -static const struct vm_special_mapping text_mapping = {
> -	.name = "[vdso]",
> -	.fault = vdso_fault,
> -};
> +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
> +static void vdso_fix_landing(const struct vdso_image *image,
> +		struct vm_area_struct *new_vma)
> +{
> +	if (in_ia32_syscall() && image == &vdso_image_32) {
> +		struct pt_regs *regs = current_pt_regs();
> +		unsigned long vdso_land = image->sym_int80_landing_pad;
> +		unsigned long old_land_addr = vdso_land +
> +			(unsigned long)current->mm->context.vdso;
> +
> +		/* Fixing userspace landing - look at do_fast_syscall_32 */
> +		if (regs->ip == old_land_addr)
> +			regs->ip = new_vma->vm_start + vdso_land;
> +	}
> +}
> +#else
> +static void vdso_fix_landing(const struct vdso_image *image,
> +		struct vm_area_struct *new_vma) {}
> +#endif
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +		struct vm_area_struct *new_vma)
> +{
> +	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
> +	const struct vdso_image *image = current->mm->context.vdso_image;
> +
> +	if (image->size != new_size)
> +		return -EINVAL;
> +
> +	if (current->mm != new_vma->vm_mm)
> +		return -EFAULT;
> +
> +	vdso_fix_landing(image, new_vma);
> +	current->mm->context.vdso = (void __user *)new_vma->vm_start;
> +
> +	return 0;
> +}
>   
>   static int vvar_fault(const struct vm_special_mapping *sm,
>   		      struct vm_area_struct *vma, struct vm_fault *vmf)
> @@ -162,6 +196,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>   	struct vm_area_struct *vma;
>   	unsigned long addr, text_start;
>   	int ret = 0;
> +
> +	static const struct vm_special_mapping vdso_mapping = {
> +		.name = "[vdso]",
> +		.fault = vdso_fault,
> +		.mremap = vdso_mremap,
> +	};
>   	static const struct vm_special_mapping vvar_mapping = {
>   		.name = "[vvar]",
>   		.fault = vvar_fault,
> @@ -195,7 +235,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>   				       image->size,
>   				       VM_READ|VM_EXEC|
>   				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
> -				       &text_mapping);
> +				       &vdso_mapping);
>   
>   	if (IS_ERR(vma)) {
>   		ret = PTR_ERR(vma);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index c2d75b4fa86c..4d16ab9287af 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -586,6 +586,9 @@ struct vm_special_mapping {
>   	int (*fault)(const struct vm_special_mapping *sm,
>   		     struct vm_area_struct *vma,
>   		     struct vm_fault *vmf);
> +
> +	int (*mremap)(const struct vm_special_mapping *sm,
> +		     struct vm_area_struct *new_vma);
>   };
>   
>   enum tlb_flush_reason {
> diff --git a/mm/mmap.c b/mm/mmap.c
> index bd2e1a533bc1..ba71658dd1a1 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
>   	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
>   }
>   
> +static int special_mapping_mremap(struct vm_area_struct *new_vma)
> +{
> +	struct vm_special_mapping *sm = new_vma->vm_private_data;
> +
> +	if (sm->mremap)
> +		return sm->mremap(sm, new_vma);
> +	return 0;
> +}
> +
>   static const struct vm_operations_struct special_mapping_vmops = {
>   	.close = special_mapping_close,
>   	.fault = special_mapping_fault,
> +	.mremap = special_mapping_mremap,
>   	.name = special_mapping_name,
>   };
>   


-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv7 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 14:23     ` [PATCHv7 " Dmitry Safonov
  2016-04-20 16:22       ` Dmitry Safonov
@ 2016-04-21 19:52       ` Andy Lutomirski
  2016-04-22 10:45         ` Dmitry Safonov
  1 sibling, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-21 19:52 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On Mon, Apr 18, 2016 at 7:23 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Add possibility for userspace 32-bit applications to move
> vdso mapping. Previously, when userspace app called
> mremap for vdso, in return path it would land on previous
> address of vdso page, resulting in segmentation violation.
> Now it lands fine and returns to userspace with remapped vdso.
> This will also fix context.vdso pointer for 64-bit, which does not
> affect the user of vdso after mremap by now, but this may change.
>
> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>
> Renamed and moved text_mapping structure declaration inside
> map_vdso, as it used only there and now it complement
> vvar_mapping variable.
>
> There is still problem for remapping vdso in glibc applications:
> linker relocates addresses for syscalls on vdso page, so
> you need to relink with the new addresses. Or the next syscall
> through glibc may fail:
>   Program received signal SIGSEGV, Segmentation fault.
>   #0  0xf7fd9b80 in __kernel_vsyscall ()
>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v7: that's just not my day: add new_vma parameter to vdso_fix_landing
>     sorry for the noise
> v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
>     with ifdefs around
> v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
>     the same, also check not only in_ia32_syscall() but image == &vdso_image_32
> v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
> v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
>     used is_ia32_task instead of ifdefs
> v2: added __maybe_unused for pt_regs in vdso_mremap
>
>  arch/x86/entry/vdso/vma.c | 50 ++++++++++++++++++++++++++++++++++++++++++-----
>  include/linux/mm_types.h  |  3 +++
>  mm/mmap.c                 | 10 ++++++++++
>  3 files changed, 58 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 10f704584922..d94291a19b6e 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -12,6 +12,7 @@
>  #include <linux/random.h>
>  #include <linux/elf.h>
>  #include <linux/cpu.h>
> +#include <linux/ptrace.h>
>  #include <asm/pvclock.h>
>  #include <asm/vgtod.h>
>  #include <asm/proto.h>
> @@ -98,10 +99,43 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>         return 0;
>  }
>
> -static const struct vm_special_mapping text_mapping = {
> -       .name = "[vdso]",
> -       .fault = vdso_fault,
> -};
> +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT

I think you mean defined(CONFIG_IA32_EMULATION).  IIRC CONFIG_COMPAT
is set for X32, !IA32 kernels, too.

> +static void vdso_fix_landing(const struct vdso_image *image,
> +               struct vm_area_struct *new_vma)
> +{
> +       if (in_ia32_syscall() && image == &vdso_image_32) {
> +               struct pt_regs *regs = current_pt_regs();
> +               unsigned long vdso_land = image->sym_int80_landing_pad;
> +               unsigned long old_land_addr = vdso_land +
> +                       (unsigned long)current->mm->context.vdso;
> +
> +               /* Fixing userspace landing - look at do_fast_syscall_32 */
> +               if (regs->ip == old_land_addr)
> +                       regs->ip = new_vma->vm_start + vdso_land;
> +       }
> +}
> +#else
> +static void vdso_fix_landing(const struct vdso_image *image,
> +               struct vm_area_struct *new_vma) {}
> +#endif

You could also define the function regardless and simply ifdef out the
body, thus saving a few lines of code.

> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +               struct vm_area_struct *new_vma)
> +{
> +       unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
> +       const struct vdso_image *image = current->mm->context.vdso_image;
> +
> +       if (image->size != new_size)
> +               return -EINVAL;
> +
> +       if (current->mm != new_vma->vm_mm)
> +               return -EFAULT;

Can you make that if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))?
It should be impossible.

> +
> +       vdso_fix_landing(image, new_vma);
> +       current->mm->context.vdso = (void __user *)new_vma->vm_start;
> +
> +       return 0;
> +}
>
>  static int vvar_fault(const struct vm_special_mapping *sm,
>                       struct vm_area_struct *vma, struct vm_fault *vmf)
> @@ -162,6 +196,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>         struct vm_area_struct *vma;
>         unsigned long addr, text_start;
>         int ret = 0;
> +
> +       static const struct vm_special_mapping vdso_mapping = {
> +               .name = "[vdso]",
> +               .fault = vdso_fault,
> +               .mremap = vdso_mremap,
> +       };
>         static const struct vm_special_mapping vvar_mapping = {
>                 .name = "[vvar]",
>                 .fault = vvar_fault,
> @@ -195,7 +235,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>                                        image->size,
>                                        VM_READ|VM_EXEC|
>                                        VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
> -                                      &text_mapping);
> +                                      &vdso_mapping);
>
>         if (IS_ERR(vma)) {
>                 ret = PTR_ERR(vma);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index c2d75b4fa86c..4d16ab9287af 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -586,6 +586,9 @@ struct vm_special_mapping {
>         int (*fault)(const struct vm_special_mapping *sm,
>                      struct vm_area_struct *vma,
>                      struct vm_fault *vmf);
> +
> +       int (*mremap)(const struct vm_special_mapping *sm,
> +                    struct vm_area_struct *new_vma);
>  };
>
>  enum tlb_flush_reason {
> diff --git a/mm/mmap.c b/mm/mmap.c
> index bd2e1a533bc1..ba71658dd1a1 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
>         return ((struct vm_special_mapping *)vma->vm_private_data)->name;
>  }
>
> +static int special_mapping_mremap(struct vm_area_struct *new_vma)
> +{
> +       struct vm_special_mapping *sm = new_vma->vm_private_data;
> +
> +       if (sm->mremap)
> +               return sm->mremap(sm, new_vma);
> +       return 0;
> +}
> +
>  static const struct vm_operations_struct special_mapping_vmops = {
>         .close = special_mapping_close,
>         .fault = special_mapping_fault,
> +       .mremap = special_mapping_mremap,
>         .name = special_mapping_name,
>  };
>
> --
> 2.8.0
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test
  2016-04-18 13:43   ` [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test Dmitry Safonov
@ 2016-04-21 20:01     ` Andy Lutomirski
  2016-04-22 11:34       ` Dmitry Safonov
  0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-21 20:01 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov, Shuah Khan,
	linux-kselftest

On Mon, Apr 18, 2016 at 6:43 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Should print on success:
> [root@localhost ~]# ./test_mremap_vdso_32
>         AT_SYSINFO_EHDR is 0xf773f000
> [NOTE]  Moving vDSO: [f773f000, f7740000] -> [a000000, a001000]
> [OK]
> Or segfault if landing was bad (before patches):
> [root@localhost ~]# ./test_mremap_vdso_32
>         AT_SYSINFO_EHDR is 0xf774f000
> [NOTE]  Moving vDSO: [f774f000, f7750000] -> [a000000, a001000]
> Segmentation fault (core dumped)
>
> Cc: Shuah Khan <shuahkh@osg.samsung.com>
> Cc: linux-kselftest@vger.kernel.org
> Suggested-by: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v5: initial version
>
>  tools/testing/selftests/x86/Makefile           |  2 +-
>  tools/testing/selftests/x86/test_mremap_vdso.c | 72 ++++++++++++++++++++++++++
>  2 files changed, 73 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/x86/test_mremap_vdso.c
>
> diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
> index b47ebd170690..c7162b511ab0 100644
> --- a/tools/testing/selftests/x86/Makefile
> +++ b/tools/testing/selftests/x86/Makefile
> @@ -7,7 +7,7 @@ include ../lib.mk
>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
>                         check_initial_reg_state sigreturn ldt_gdt iopl
>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
> -                       test_FCMOV test_FCOMI test_FISTTP \
> +                       test_FCMOV test_FCOMI test_FISTTP test_mremap_vdso \
>                         vdso_restorer
>
>  TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
> diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
> new file mode 100644
> index 000000000000..a470790e2118
> --- /dev/null
> +++ b/tools/testing/selftests/x86/test_mremap_vdso.c
> @@ -0,0 +1,72 @@
> +/*
> + * 32-bit test to check vdso mremap.
> + *
> + * Copyright (c) 2016 Dmitry Safonov
> + * Suggested-by: Andrew Lutomirski
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +/*
> + * Can be built statically:
> + * gcc -Os -Wall -static -m32 test_mremap_vdso.c
> + */
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <errno.h>
> +#include <unistd.h>
> +#include <string.h>
> +
> +#include <sys/mman.h>
> +#include <sys/auxv.h>
> +#include <sys/syscall.h>
> +
> +#if !defined(__i386__)
> +int main(int argc, char **argv, char **envp)
> +{
> +       printf("[SKIP]\tNot a 32-bit x86 userspace\n");
> +       return 0;

What's wrong with testing on 64-bit systems?

> +}
> +#else
> +
> +#define PAGE_SIZE      4096
> +#define VDSO_SIZE      PAGE_SIZE

The vdso is frequently bigger than a page.

> +
> +int main(int argc, char **argv, char **envp)
> +{
> +       unsigned long vdso_addr, dest_addr;
> +       void *new_addr;
> +       const char *ok_string = "[OK]\n";
> +
> +       vdso_addr = getauxval(AT_SYSINFO_EHDR);
> +       printf("\tAT_SYSINFO_EHDR is 0x%lx\n", vdso_addr);
> +       if (!vdso_addr || vdso_addr == -ENOENT) {
> +               printf("[FAIL]\tgetauxval failed\n");
> +               return 1;

Let's make this [WARN] and return 0.  The vdso is optional, and
getauxval is missing on many systems.


> +       }
> +
> +       /* to low for stack, to high for lib/data/code mappings */
> +       dest_addr = 0x0a000000;

This could be make reliable -- map a big enough area PROT_NONE and use
that address.

> +       printf("[NOTE]\tMoving vDSO: [%lx, %lx] -> [%lx, %lx]\n",
> +               vdso_addr, vdso_addr + VDSO_SIZE,
> +               dest_addr, dest_addr + VDSO_SIZE);

fflush(stdout), please, for the benefit of test harnesses that use pipes.

--Andy

> +       new_addr = mremap((void *)vdso_addr, VDSO_SIZE, VDSO_SIZE,
> +                       MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
> +       if ((unsigned long)new_addr == (unsigned long)-1) {
> +               printf("[FAIL]\tmremap failed (%d): %m\n", errno);
> +               return 1;
> +       }
> +
> +       asm volatile ("int $0x80" : : "a" (__NR_write), "b" (STDOUT_FILENO),
> +                       "c" (ok_string), "d" (strlen(ok_string)));
> +       asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));
> +
> +       return 0;
> +}
> +#endif
> --
> 2.8.0
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  2016-04-19 16:04       ` Andy Lutomirski
@ 2016-04-22  8:36         ` Ingo Molnar
  0 siblings, 0 replies; 56+ messages in thread
From: Ingo Molnar @ 2016-04-22  8:36 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Brian Gerst, linux-kernel, Thomas Gleixner, linux-tip-commits


* Andy Lutomirski <luto@amacapital.net> wrote:

> On Tue, Apr 19, 2016 at 4:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * tip-bot for Dmitry Safonov <tipbot@zytor.com> wrote:
> >
> >> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
> >> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
> >> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
> >> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
> >> Committer:  Ingo Molnar <mingo@kernel.org>
> >> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
> >>
> >> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
> >
> > Btw., I'm not _entirely_ happy about the 'IA32' name, but went with this name for
> > lack of a better alternative.
> >
> > So we have 4 system call modes:
> >
> >  - 64-bit native
> >  - 32-bit addresses with 64-bit arguments (x32)
> >  - 32-bit compat syscall (x86-32 compatibility on x86-64)
> >  - 32-bit native
> >
> > and we have 2 bits of data that are per system call properties:
> >
> >  - TS_COMPAT in thread_info->status is set/cleared dynamically by the compat
> >    syscall entry code
> >
> >  - a high bit in pt_regs->orig_ax tells us whether it's an x32 system call.
> >
> > So I'd suggest the following renames to harmonize these concepts:
> >
> >  - CONFIG_IA32_EMULATION   => CONFIG_X86_32_ABI
> >    this lines up nicely with: CONFIG_X86_X32_ABI
> 
> I think I'd prefer a different interpretation: CONFIG_X86_32_ABI is
> set if CONFIG_IA32_EMULATION is set *or* CONFIG_X86_32 is set.  There
> is a lot of code that manually looks for that, because what it
> actually cares about is "do we support 32-bit syscalls".  Also, with
> the syscall cleanups I've been doing, a lot of the code is shared
> between native 32-bit and 32-on-64 compat, so the distinction between
> those two modes is slowly shrinking.
> 
> in_ia32_syscall() is consistent with that idea: it returns true on
> native 32-bit kernels.

Ok, so how about:

 - rename CONFIG_IA32_EMULATION => CONFIG_X86_32_COMPAT

 - introduce CONFIG_X86_32_ABI to separate the 'convenience' functionality of 
   CONFIG_IA32_EMULATION from the ABI meaning.

 - rename is_ia32_syscall() -> is_x86_32_syscall()
 - rename is_x32_syscall()  -> is_x86_x32_syscall()

 - is_compat_syscall() remains as-is.

?

In the long run I'd like to get rid of two naming variants:

 - Fix all names that use 'IA32' that refer to 32-bit compat functionality,
   and only name the things 'IA32' that are truly Intel specific.

 - Fix all names that use 'emulation' when they really refer to 32-bit compat. We 
   don't actually emulate anything, we are just calling convention compatible with 
   very little overhead. The CPU is a fully 32-bit/64-bit dual mode hardware, 
   there's nothing that is emulated. Calling it IA32_EMULATION was a misnomer.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv7 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-21 19:52       ` Andy Lutomirski
@ 2016-04-22 10:45         ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-22 10:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 04/21/2016 10:52 PM, Andy Lutomirski wrote:
> On Mon, Apr 18, 2016 at 7:23 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
>> Add possibility for userspace 32-bit applications to move
>> vdso mapping. Previously, when userspace app called
>> mremap for vdso, in return path it would land on previous
>> address of vdso page, resulting in segmentation violation.
>> Now it lands fine and returns to userspace with remapped vdso.
>> This will also fix context.vdso pointer for 64-bit, which does not
>> affect the user of vdso after mremap by now, but this may change.
>>
>> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>>
>> Renamed and moved text_mapping structure declaration inside
>> map_vdso, as it used only there and now it complement
>> vvar_mapping variable.
>>
>> There is still problem for remapping vdso in glibc applications:
>> linker relocates addresses for syscalls on vdso page, so
>> you need to relink with the new addresses. Or the next syscall
>> through glibc may fail:
>>    Program received signal SIGSEGV, Segmentation fault.
>>    #0  0xf7fd9b80 in __kernel_vsyscall ()
>>    #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>>
>> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
>> ---
>> v7: that's just not my day: add new_vma parameter to vdso_fix_landing
>>      sorry for the noise
>> v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
>>      with ifdefs around
>> v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
>>      the same, also check not only in_ia32_syscall() but image == &vdso_image_32
>> v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
>> v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
>>      used is_ia32_task instead of ifdefs
>> v2: added __maybe_unused for pt_regs in vdso_mremap
>>
>>   arch/x86/entry/vdso/vma.c | 50 ++++++++++++++++++++++++++++++++++++++++++-----
>>   include/linux/mm_types.h  |  3 +++
>>   mm/mmap.c                 | 10 ++++++++++
>>   3 files changed, 58 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
>> index 10f704584922..d94291a19b6e 100644
>> --- a/arch/x86/entry/vdso/vma.c
>> +++ b/arch/x86/entry/vdso/vma.c
>> @@ -12,6 +12,7 @@
>>   #include <linux/random.h>
>>   #include <linux/elf.h>
>>   #include <linux/cpu.h>
>> +#include <linux/ptrace.h>
>>   #include <asm/pvclock.h>
>>   #include <asm/vgtod.h>
>>   #include <asm/proto.h>
>> @@ -98,10 +99,43 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>>          return 0;
>>   }
>>
>> -static const struct vm_special_mapping text_mapping = {
>> -       .name = "[vdso]",
>> -       .fault = vdso_fault,
>> -};
>> +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
> I think you mean defined(CONFIG_IA32_EMULATION).  IIRC CONFIG_COMPAT
> is set for X32, !IA32 kernels, too.

Sure, I just copied it from vdso_image_32 declaration.
Should it be there CONFIG_IA32_EMULATION there instead of
CONFIG_COMPAT? Or I miss something?

>> +static void vdso_fix_landing(const struct vdso_image *image,
>> +               struct vm_area_struct *new_vma)
>> +{
>> +       if (in_ia32_syscall() && image == &vdso_image_32) {
>> +               struct pt_regs *regs = current_pt_regs();
>> +               unsigned long vdso_land = image->sym_int80_landing_pad;
>> +               unsigned long old_land_addr = vdso_land +
>> +                       (unsigned long)current->mm->context.vdso;
>> +
>> +               /* Fixing userspace landing - look at do_fast_syscall_32 */
>> +               if (regs->ip == old_land_addr)
>> +                       regs->ip = new_vma->vm_start + vdso_land;
>> +       }
>> +}
>> +#else
>> +static void vdso_fix_landing(const struct vdso_image *image,
>> +               struct vm_area_struct *new_vma) {}
>> +#endif
> You could also define the function regardless and simply ifdef out the
> body, thus saving a few lines of code.

Yep, will do.

>> +
>> +static int vdso_mremap(const struct vm_special_mapping *sm,
>> +               struct vm_area_struct *new_vma)
>> +{
>> +       unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
>> +       const struct vdso_image *image = current->mm->context.vdso_image;
>> +
>> +       if (image->size != new_size)
>> +               return -EINVAL;
>> +
>> +       if (current->mm != new_vma->vm_mm)
>> +               return -EFAULT;
> Can you make that if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))?
> It should be impossible.

Sure.

>> +
>> +       vdso_fix_landing(image, new_vma);
>> +       current->mm->context.vdso = (void __user *)new_vma->vm_start;
>> +
>> +       return 0;
>> +}
>>
>>   static int vvar_fault(const struct vm_special_mapping *sm,
>>                        struct vm_area_struct *vma, struct vm_fault *vmf)
>> @@ -162,6 +196,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>>          struct vm_area_struct *vma;
>>          unsigned long addr, text_start;
>>          int ret = 0;
>> +
>> +       static const struct vm_special_mapping vdso_mapping = {
>> +               .name = "[vdso]",
>> +               .fault = vdso_fault,
>> +               .mremap = vdso_mremap,
>> +       };
>>          static const struct vm_special_mapping vvar_mapping = {
>>                  .name = "[vvar]",
>>                  .fault = vvar_fault,
>> @@ -195,7 +235,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>>                                         image->size,
>>                                         VM_READ|VM_EXEC|
>>                                         VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
>> -                                      &text_mapping);
>> +                                      &vdso_mapping);
>>
>>          if (IS_ERR(vma)) {
>>                  ret = PTR_ERR(vma);
>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> index c2d75b4fa86c..4d16ab9287af 100644
>> --- a/include/linux/mm_types.h
>> +++ b/include/linux/mm_types.h
>> @@ -586,6 +586,9 @@ struct vm_special_mapping {
>>          int (*fault)(const struct vm_special_mapping *sm,
>>                       struct vm_area_struct *vma,
>>                       struct vm_fault *vmf);
>> +
>> +       int (*mremap)(const struct vm_special_mapping *sm,
>> +                    struct vm_area_struct *new_vma);
>>   };
>>
>>   enum tlb_flush_reason {
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index bd2e1a533bc1..ba71658dd1a1 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
>>          return ((struct vm_special_mapping *)vma->vm_private_data)->name;
>>   }
>>
>> +static int special_mapping_mremap(struct vm_area_struct *new_vma)
>> +{
>> +       struct vm_special_mapping *sm = new_vma->vm_private_data;
>> +
>> +       if (sm->mremap)
>> +               return sm->mremap(sm, new_vma);
>> +       return 0;
>> +}
>> +
>>   static const struct vm_operations_struct special_mapping_vmops = {
>>          .close = special_mapping_close,
>>          .fault = special_mapping_fault,
>> +       .mremap = special_mapping_mremap,
>>          .name = special_mapping_name,
>>   };
>>
>> --
>> 2.8.0
>>
>
>


-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test
  2016-04-21 20:01     ` Andy Lutomirski
@ 2016-04-22 11:34       ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-22 11:34 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov, Shuah Khan,
	linux-kselftest

On 04/21/2016 11:01 PM, Andy Lutomirski wrote:
> On Mon, Apr 18, 2016 at 6:43 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
>> Should print on success:
>> [root@localhost ~]# ./test_mremap_vdso_32
>>          AT_SYSINFO_EHDR is 0xf773f000
>> [NOTE]  Moving vDSO: [f773f000, f7740000] -> [a000000, a001000]
>> [OK]
>> Or segfault if landing was bad (before patches):
>> [root@localhost ~]# ./test_mremap_vdso_32
>>          AT_SYSINFO_EHDR is 0xf774f000
>> [NOTE]  Moving vDSO: [f774f000, f7750000] -> [a000000, a001000]
>> Segmentation fault (core dumped)
>>
>> Cc: Shuah Khan <shuahkh@osg.samsung.com>
>> Cc: linux-kselftest@vger.kernel.org
>> Suggested-by: Andy Lutomirski <luto@kernel.org>
>> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
>> ---
>> v5: initial version
>>
>>   tools/testing/selftests/x86/Makefile           |  2 +-
>>   tools/testing/selftests/x86/test_mremap_vdso.c | 72 ++++++++++++++++++++++++++
>>   2 files changed, 73 insertions(+), 1 deletion(-)
>>   create mode 100644 tools/testing/selftests/x86/test_mremap_vdso.c
>>
>> diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
>> index b47ebd170690..c7162b511ab0 100644
>> --- a/tools/testing/selftests/x86/Makefile
>> +++ b/tools/testing/selftests/x86/Makefile
>> @@ -7,7 +7,7 @@ include ../lib.mk
>>   TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
>>                          check_initial_reg_state sigreturn ldt_gdt iopl
>>   TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
>> -                       test_FCMOV test_FCOMI test_FISTTP \
>> +                       test_FCMOV test_FCOMI test_FISTTP test_mremap_vdso \
>>                          vdso_restorer
>>
>>   TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
>> diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
>> new file mode 100644
>> index 000000000000..a470790e2118
>> --- /dev/null
>> +++ b/tools/testing/selftests/x86/test_mremap_vdso.c
>> @@ -0,0 +1,72 @@
>> +/*
>> + * 32-bit test to check vdso mremap.
>> + *
>> + * Copyright (c) 2016 Dmitry Safonov
>> + * Suggested-by: Andrew Lutomirski
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + */
>> +/*
>> + * Can be built statically:
>> + * gcc -Os -Wall -static -m32 test_mremap_vdso.c
>> + */
>> +#define _GNU_SOURCE
>> +#include <stdio.h>
>> +#include <errno.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +
>> +#include <sys/mman.h>
>> +#include <sys/auxv.h>
>> +#include <sys/syscall.h>
>> +
>> +#if !defined(__i386__)
>> +int main(int argc, char **argv, char **envp)
>> +{
>> +       printf("[SKIP]\tNot a 32-bit x86 userspace\n");
>> +       return 0;
> What's wrong with testing on 64-bit systems?

Ok, will drop this.

>> +}
>> +#else
>> +
>> +#define PAGE_SIZE      4096
>> +#define VDSO_SIZE      PAGE_SIZE
> The vdso is frequently bigger than a page.

Ok, will enlarge this. Are two pages big enough?

>> +
>> +int main(int argc, char **argv, char **envp)
>> +{
>> +       unsigned long vdso_addr, dest_addr;
>> +       void *new_addr;
>> +       const char *ok_string = "[OK]\n";
>> +
>> +       vdso_addr = getauxval(AT_SYSINFO_EHDR);
>> +       printf("\tAT_SYSINFO_EHDR is 0x%lx\n", vdso_addr);
>> +       if (!vdso_addr || vdso_addr == -ENOENT) {
>> +               printf("[FAIL]\tgetauxval failed\n");
>> +               return 1;
> Let's make this [WARN] and return 0.  The vdso is optional, and
> getauxval is missing on many systems.

Ok

>> +       }
>> +
>> +       /* to low for stack, to high for lib/data/code mappings */
>> +       dest_addr = 0x0a000000;
> This could be make reliable -- map a big enough area PROT_NONE and use
> that address.

Oh, that's good, will do.

>> +       printf("[NOTE]\tMoving vDSO: [%lx, %lx] -> [%lx, %lx]\n",
>> +               vdso_addr, vdso_addr + VDSO_SIZE,
>> +               dest_addr, dest_addr + VDSO_SIZE);
> fflush(stdout), please, for the benefit of test harnesses that use pipes.

Will add.

>> +       new_addr = mremap((void *)vdso_addr, VDSO_SIZE, VDSO_SIZE,
>> +                       MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
>> +       if ((unsigned long)new_addr == (unsigned long)-1) {
>> +               printf("[FAIL]\tmremap failed (%d): %m\n", errno);
>> +               return 1;
>> +       }
>> +
>> +       asm volatile ("int $0x80" : : "a" (__NR_write), "b" (STDOUT_FILENO),
>> +                       "c" (ok_string), "d" (strlen(ok_string)));
>> +       asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (0));
>> +
>> +       return 0;
>> +}
>> +#endif
>> --
>> 2.8.0
>>
>
>


-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
                       ` (2 preceding siblings ...)
  2016-04-18 14:23     ` [PATCHv7 " Dmitry Safonov
@ 2016-04-23 23:09     ` kbuild test robot
  3 siblings, 0 replies; 56+ messages in thread
From: kbuild test robot @ 2016-04-23 23:09 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: kbuild-all, linux-kernel, luto, tglx, mingo, hpa, x86, akpm,
	linux-mm, 0x7f454c46, Dmitry Safonov

[-- Attachment #1: Type: text/plain, Size: 1749 bytes --]

Hi,

[auto build test ERROR on v4.6-rc4]
[cannot apply to tip/x86/core tip/x86/vdso next-20160422]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Dmitry-Safonov/x86-rename-is_-ia32-x32-_task-to-in_-ia32-x32-_syscall/20160418-214656
config: x86_64-randconfig-s0-04240623 (attached as .config)
compiler: gcc-5 (Debian 5.3.1-14) 5.3.1 20160409
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Dmitry-Safonov/x86-rename-is_-ia32-x32-_task-to-in_-ia32-x32-_syscall/20160418-214656 HEAD aed83f1dd951908724a5ba564e2b03d68a9fc7b8 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   arch/x86/entry/vdso/vma.c: In function 'vdso_mremap':
>> arch/x86/entry/vdso/vma.c:114:37: error: 'vdso_image_32' undeclared (first use in this function)
     if (in_ia32_syscall() && image == &vdso_image_32) {
                                        ^
   arch/x86/entry/vdso/vma.c:114:37: note: each undeclared identifier is reported only once for each function it appears in

vim +/vdso_image_32 +114 arch/x86/entry/vdso/vma.c

   108		if (image->size != new_size)
   109			return -EINVAL;
   110	
   111		if (current->mm != new_vma->vm_mm)
   112			return -EFAULT;
   113	
 > 114		if (in_ia32_syscall() && image == &vdso_image_32) {
   115			struct pt_regs *regs = current_pt_regs();
   116			unsigned long vdso_land = image->sym_int80_landing_pad;
   117			unsigned long old_land_addr = vdso_land +

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 27095 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
                   ` (4 preceding siblings ...)
  2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
@ 2016-04-25 11:37 ` Dmitry Safonov
  2016-04-25 11:37   ` [PATCHv8 2/2] selftest/x86: add mremap vdso test Dmitry Safonov
  2016-04-25 21:38   ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
  5 siblings, 2 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-25 11:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46, Dmitry Safonov

Add possibility for userspace 32-bit applications to move
vdso mapping. Previously, when userspace app called
mremap for vdso, in return path it would land on previous
address of vdso page, resulting in segmentation violation.
Now it lands fine and returns to userspace with remapped vdso.
This will also fix context.vdso pointer for 64-bit, which does not
affect the user of vdso after mremap by now, but this may change.

As suggested by Andy, return EINVAL for mremap that splits vdso image.

Renamed and moved text_mapping structure declaration inside
map_vdso, as it used only there and now it complement
vvar_mapping variable.

There is still problem for remapping vdso in glibc applications:
linker relocates addresses for syscalls on vdso page, so
you need to relink with the new addresses. Or the next syscall
through glibc may fail:
  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v8: add WARN_ON_ONCE on current->mm != new_vma->vm_mm
v7: build fix
v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
    with ifdefs around
v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
    the same, also check not only in_ia32_syscall() but image == &vdso_image_32
v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
    used is_ia32_task instead of ifdefs 
v2: added __maybe_unused for pt_regs in vdso_mremap

 arch/x86/entry/vdso/vma.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..0625b66e5648 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -98,10 +99,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+		return -EFAULT;
+
+	vdso_fix_landing(image, new_vma);
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -162,6 +193,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -195,7 +232,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2d75b4fa86c..4d16ab9287af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -586,6 +586,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index bd2e1a533bc1..ba71658dd1a1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCHv8 2/2] selftest/x86: add mremap vdso test
  2016-04-25 11:37 ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
@ 2016-04-25 11:37   ` Dmitry Safonov
  2016-04-25 21:39     ` Andy Lutomirski
  2016-04-25 21:38   ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
  1 sibling, 1 reply; 56+ messages in thread
From: Dmitry Safonov @ 2016-04-25 11:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: luto, tglx, mingo, hpa, x86, akpm, linux-mm, 0x7f454c46,
	Dmitry Safonov, Shuah Khan, linux-kselftest

Should print on success:
[root@localhost ~]# ./test_mremap_vdso_32
	AT_SYSINFO_EHDR is 0xf773f000
[NOTE]	Moving vDSO: [f773f000, f7740000] -> [a000000, a001000]
[OK]
Or segfault if landing was bad (before patches):
[root@localhost ~]# ./test_mremap_vdso_32
	AT_SYSINFO_EHDR is 0xf774f000
[NOTE]	Moving vDSO: [f774f000, f7750000] -> [a000000, a001000]
Segmentation fault (core dumped)

Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: linux-kselftest@vger.kernel.org
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
v8: run test for x86_64 too;
    removed fixed VDSO_SIZE - check EINVAL mremap return for partial remapping
v5: initial version

 tools/testing/selftests/x86/Makefile           |  2 +-
 tools/testing/selftests/x86/test_mremap_vdso.c | 99 ++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/test_mremap_vdso.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b47ebd170690..ba865f2efcce 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
-			check_initial_reg_state sigreturn ldt_gdt iopl
+			check_initial_reg_state sigreturn ldt_gdt iopl test_mremap_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000000..831e2e0107d9
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,99 @@
+/*
+ * 32-bit test to check vdso mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+
+#define PAGE_SIZE	4096
+
+static int try_to_remap(void *vdso_addr, unsigned long size)
+{
+	void *dest_addr, *new_addr;
+
+	dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (dest_addr == MAP_FAILED) {
+		printf("[WARN]\tmmap failed (%d): %m\n", errno);
+		return 0;
+	}
+
+	printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+		vdso_addr, (unsigned long)vdso_addr + size,
+		dest_addr, (unsigned long)dest_addr + size);
+	fflush(stdout);
+
+	new_addr = mremap(vdso_addr, size, size,
+			MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+	if ((unsigned long)new_addr == (unsigned long)-1) {
+		munmap(dest_addr, size);
+		if (errno == EINVAL) {
+			printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+			return -1; /* retry with larger */
+		}
+		printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+		return 1;
+	}
+
+	return 0;
+
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	unsigned long auxval;
+	const char *ok_string = "[OK]\n";
+	int ret = -1;
+	unsigned long vdso_size = PAGE_SIZE;
+
+	auxval = getauxval(AT_SYSINFO_EHDR);
+	printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+	if (!auxval || auxval == -ENOENT) {
+		printf("[WARN]\tgetauxval failed\n");
+		return 0;
+	}
+
+	/* simpler than parsing ELF header */
+	while(ret < 0) {
+		ret = try_to_remap((void *)auxval, vdso_size);
+		vdso_size += PAGE_SIZE;
+	}
+
+	if (!ret)
+#if defined(__i386__)
+		asm volatile ("int $0x80" : :
+			"a" (__NR_write), "b" (STDOUT_FILENO),
+			"c" (ok_string), "d" (strlen(ok_string)));
+
+	asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else
+		asm volatile ("syscall" : :
+			"a" (__NR_write), "D" (STDOUT_FILENO),
+			"S" (ok_string), "d" (strlen(ok_string)));
+
+	asm volatile ("syscall" : : "a" (__NR_exit), "D" (!!ret));
+#endif
+
+	return 0;
+}
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-04-25 11:37 ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
  2016-04-25 11:37   ` [PATCHv8 2/2] selftest/x86: add mremap vdso test Dmitry Safonov
@ 2016-04-25 21:38   ` Andy Lutomirski
       [not found]     ` <e0a10957-ddf7-1bc4-fad6-8b5836628fce@virtuozzo.com>
  1 sibling, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-25 21:38 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On Mon, Apr 25, 2016 at 4:37 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Add possibility for userspace 32-bit applications to move
> vdso mapping. Previously, when userspace app called
> mremap for vdso, in return path it would land on previous
> address of vdso page, resulting in segmentation violation.
> Now it lands fine and returns to userspace with remapped vdso.
> This will also fix context.vdso pointer for 64-bit, which does not
> affect the user of vdso after mremap by now, but this may change.
>
> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>
> Renamed and moved text_mapping structure declaration inside
> map_vdso, as it used only there and now it complement
> vvar_mapping variable.
>
> There is still problem for remapping vdso in glibc applications:
> linker relocates addresses for syscalls on vdso page, so
> you need to relink with the new addresses. Or the next syscall
> through glibc may fail:
>   Program received signal SIGSEGV, Segmentation fault.
>   #0  0xf7fd9b80 in __kernel_vsyscall ()
>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Acked-by: Andy Lutomirski <luto@kernel.org>

Ingo, can you apply this?


>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v8: add WARN_ON_ONCE on current->mm != new_vma->vm_mm
> v7: build fix
> v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function
>     with ifdefs around
> v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are
>     the same, also check not only in_ia32_syscall() but image == &vdso_image_32
> v4: drop __maybe_unused & use image from mm->context instead vdso_image_32
> v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap;
>     used is_ia32_task instead of ifdefs
> v2: added __maybe_unused for pt_regs in vdso_mremap
>
>  arch/x86/entry/vdso/vma.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>  include/linux/mm_types.h  |  3 +++
>  mm/mmap.c                 | 10 ++++++++++
>  3 files changed, 55 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 10f704584922..0625b66e5648 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -12,6 +12,7 @@
>  #include <linux/random.h>
>  #include <linux/elf.h>
>  #include <linux/cpu.h>
> +#include <linux/ptrace.h>
>  #include <asm/pvclock.h>
>  #include <asm/vgtod.h>
>  #include <asm/proto.h>
> @@ -98,10 +99,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
>         return 0;
>  }
>
> -static const struct vm_special_mapping text_mapping = {
> -       .name = "[vdso]",
> -       .fault = vdso_fault,
> -};
> +static void vdso_fix_landing(const struct vdso_image *image,
> +               struct vm_area_struct *new_vma)
> +{
> +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
> +       if (in_ia32_syscall() && image == &vdso_image_32) {
> +               struct pt_regs *regs = current_pt_regs();
> +               unsigned long vdso_land = image->sym_int80_landing_pad;
> +               unsigned long old_land_addr = vdso_land +
> +                       (unsigned long)current->mm->context.vdso;
> +
> +               /* Fixing userspace landing - look at do_fast_syscall_32 */
> +               if (regs->ip == old_land_addr)
> +                       regs->ip = new_vma->vm_start + vdso_land;
> +       }
> +#endif
> +}
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +               struct vm_area_struct *new_vma)
> +{
> +       unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
> +       const struct vdso_image *image = current->mm->context.vdso_image;
> +
> +       if (image->size != new_size)
> +               return -EINVAL;
> +
> +       if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
> +               return -EFAULT;
> +
> +       vdso_fix_landing(image, new_vma);
> +       current->mm->context.vdso = (void __user *)new_vma->vm_start;
> +
> +       return 0;
> +}
>
>  static int vvar_fault(const struct vm_special_mapping *sm,
>                       struct vm_area_struct *vma, struct vm_fault *vmf)
> @@ -162,6 +193,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>         struct vm_area_struct *vma;
>         unsigned long addr, text_start;
>         int ret = 0;
> +
> +       static const struct vm_special_mapping vdso_mapping = {
> +               .name = "[vdso]",
> +               .fault = vdso_fault,
> +               .mremap = vdso_mremap,
> +       };
>         static const struct vm_special_mapping vvar_mapping = {
>                 .name = "[vvar]",
>                 .fault = vvar_fault,
> @@ -195,7 +232,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
>                                        image->size,
>                                        VM_READ|VM_EXEC|
>                                        VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
> -                                      &text_mapping);
> +                                      &vdso_mapping);
>
>         if (IS_ERR(vma)) {
>                 ret = PTR_ERR(vma);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index c2d75b4fa86c..4d16ab9287af 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -586,6 +586,9 @@ struct vm_special_mapping {
>         int (*fault)(const struct vm_special_mapping *sm,
>                      struct vm_area_struct *vma,
>                      struct vm_fault *vmf);
> +
> +       int (*mremap)(const struct vm_special_mapping *sm,
> +                    struct vm_area_struct *new_vma);
>  };
>
>  enum tlb_flush_reason {
> diff --git a/mm/mmap.c b/mm/mmap.c
> index bd2e1a533bc1..ba71658dd1a1 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
>         return ((struct vm_special_mapping *)vma->vm_private_data)->name;
>  }
>
> +static int special_mapping_mremap(struct vm_area_struct *new_vma)
> +{
> +       struct vm_special_mapping *sm = new_vma->vm_private_data;
> +
> +       if (sm->mremap)
> +               return sm->mremap(sm, new_vma);
> +       return 0;
> +}
> +
>  static const struct vm_operations_struct special_mapping_vmops = {
>         .close = special_mapping_close,
>         .fault = special_mapping_fault,
> +       .mremap = special_mapping_mremap,
>         .name = special_mapping_name,
>  };
>
> --
> 2.8.0
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv8 2/2] selftest/x86: add mremap vdso test
  2016-04-25 11:37   ` [PATCHv8 2/2] selftest/x86: add mremap vdso test Dmitry Safonov
@ 2016-04-25 21:39     ` Andy Lutomirski
  0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2016-04-25 21:39 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	X86 ML, Andrew Morton, linux-mm, Dmitry Safonov, Shuah Khan,
	linux-kselftest

On Mon, Apr 25, 2016 at 4:37 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> Should print on success:
> [root@localhost ~]# ./test_mremap_vdso_32
>         AT_SYSINFO_EHDR is 0xf773f000
> [NOTE]  Moving vDSO: [f773f000, f7740000] -> [a000000, a001000]
> [OK]
> Or segfault if landing was bad (before patches):
> [root@localhost ~]# ./test_mremap_vdso_32
>         AT_SYSINFO_EHDR is 0xf774f000
> [NOTE]  Moving vDSO: [f774f000, f7750000] -> [a000000, a001000]
> Segmentation fault (core dumped)

Acked-by: Andy Lutomirski <luto@amacapital.net>

Ingo, can you apply this?

>
> Cc: Shuah Khan <shuahkh@osg.samsung.com>
> Cc: linux-kselftest@vger.kernel.org
> Suggested-by: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> ---
> v8: run test for x86_64 too;
>     removed fixed VDSO_SIZE - check EINVAL mremap return for partial remapping
> v5: initial version
>
>  tools/testing/selftests/x86/Makefile           |  2 +-
>  tools/testing/selftests/x86/test_mremap_vdso.c | 99 ++++++++++++++++++++++++++
>  2 files changed, 100 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/x86/test_mremap_vdso.c
>
> diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
> index b47ebd170690..ba865f2efcce 100644
> --- a/tools/testing/selftests/x86/Makefile
> +++ b/tools/testing/selftests/x86/Makefile
> @@ -5,7 +5,7 @@ include ../lib.mk
>  .PHONY: all all_32 all_64 warn_32bit_failure clean
>
>  TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
> -                       check_initial_reg_state sigreturn ldt_gdt iopl
> +                       check_initial_reg_state sigreturn ldt_gdt iopl test_mremap_vdso
>  TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
>                         test_FCMOV test_FCOMI test_FISTTP \
>                         vdso_restorer
> diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
> new file mode 100644
> index 000000000000..831e2e0107d9
> --- /dev/null
> +++ b/tools/testing/selftests/x86/test_mremap_vdso.c
> @@ -0,0 +1,99 @@
> +/*
> + * 32-bit test to check vdso mremap.
> + *
> + * Copyright (c) 2016 Dmitry Safonov
> + * Suggested-by: Andrew Lutomirski
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +/*
> + * Can be built statically:
> + * gcc -Os -Wall -static -m32 test_mremap_vdso.c
> + */
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <errno.h>
> +#include <unistd.h>
> +#include <string.h>
> +
> +#include <sys/mman.h>
> +#include <sys/auxv.h>
> +#include <sys/syscall.h>
> +
> +#define PAGE_SIZE      4096
> +
> +static int try_to_remap(void *vdso_addr, unsigned long size)
> +{
> +       void *dest_addr, *new_addr;
> +
> +       dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
> +       if (dest_addr == MAP_FAILED) {
> +               printf("[WARN]\tmmap failed (%d): %m\n", errno);
> +               return 0;
> +       }
> +
> +       printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
> +               vdso_addr, (unsigned long)vdso_addr + size,
> +               dest_addr, (unsigned long)dest_addr + size);
> +       fflush(stdout);
> +
> +       new_addr = mremap(vdso_addr, size, size,
> +                       MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
> +       if ((unsigned long)new_addr == (unsigned long)-1) {
> +               munmap(dest_addr, size);
> +               if (errno == EINVAL) {
> +                       printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
> +                       return -1; /* retry with larger */
> +               }
> +               printf("[FAIL]\tmremap failed (%d): %m\n", errno);
> +               return 1;
> +       }
> +
> +       return 0;
> +
> +}
> +
> +int main(int argc, char **argv, char **envp)
> +{
> +       unsigned long auxval;
> +       const char *ok_string = "[OK]\n";
> +       int ret = -1;
> +       unsigned long vdso_size = PAGE_SIZE;
> +
> +       auxval = getauxval(AT_SYSINFO_EHDR);
> +       printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
> +       if (!auxval || auxval == -ENOENT) {
> +               printf("[WARN]\tgetauxval failed\n");
> +               return 0;
> +       }
> +
> +       /* simpler than parsing ELF header */
> +       while(ret < 0) {
> +               ret = try_to_remap((void *)auxval, vdso_size);
> +               vdso_size += PAGE_SIZE;
> +       }
> +
> +       if (!ret)
> +#if defined(__i386__)
> +               asm volatile ("int $0x80" : :
> +                       "a" (__NR_write), "b" (STDOUT_FILENO),
> +                       "c" (ok_string), "d" (strlen(ok_string)));
> +
> +       asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
> +#else
> +               asm volatile ("syscall" : :
> +                       "a" (__NR_write), "D" (STDOUT_FILENO),
> +                       "S" (ok_string), "d" (strlen(ok_string)));
> +
> +       asm volatile ("syscall" : : "a" (__NR_exit), "D" (!!ret));
> +#endif
> +
> +       return 0;
> +}
> --
> 2.8.0
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping
       [not found]     ` <e0a10957-ddf7-1bc4-fad6-8b5836628fce@virtuozzo.com>
@ 2016-05-05 11:52       ` Ingo Molnar
  2016-05-05 11:55         ` Dmitry Safonov
  0 siblings, 1 reply; 56+ messages in thread
From: Ingo Molnar @ 2016-05-05 11:52 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: Andy Lutomirski, Ingo Molnar, linux-kernel, Thomas Gleixner,
	H. Peter Anvin, X86 ML, Andrew Morton, linux-mm, Dmitry Safonov


* Dmitry Safonov <dsafonov@virtuozzo.com> wrote:

> On 04/26/2016 12:38 AM, Andy Lutomirski wrote:
> >On Mon, Apr 25, 2016 at 4:37 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
> >>Add possibility for userspace 32-bit applications to move
> >>vdso mapping. Previously, when userspace app called
> >>mremap for vdso, in return path it would land on previous
> >>address of vdso page, resulting in segmentation violation.
> >>Now it lands fine and returns to userspace with remapped vdso.
> >>This will also fix context.vdso pointer for 64-bit, which does not
> >>affect the user of vdso after mremap by now, but this may change.
> >>
> >>As suggested by Andy, return EINVAL for mremap that splits vdso image.
> >>
> >>Renamed and moved text_mapping structure declaration inside
> >>map_vdso, as it used only there and now it complement
> >>vvar_mapping variable.
> >>
> >>There is still problem for remapping vdso in glibc applications:
> >>linker relocates addresses for syscalls on vdso page, so
> >>you need to relink with the new addresses. Or the next syscall
> >>through glibc may fail:
> >>   Program received signal SIGSEGV, Segmentation fault.
> >>   #0  0xf7fd9b80 in __kernel_vsyscall ()
> >>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
> >Acked-by: Andy Lutomirski <luto@kernel.org>
> >
> >Ingo, can you apply this?
> 
> Hm, so I'm not sure - should I resend those two?
> Or just ping?

Please send a clean series with updated Acked-by's, etc.

Thanks!

	Ingo

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping
  2016-05-05 11:52       ` Ingo Molnar
@ 2016-05-05 11:55         ` Dmitry Safonov
  0 siblings, 0 replies; 56+ messages in thread
From: Dmitry Safonov @ 2016-05-05 11:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andy Lutomirski, Ingo Molnar, linux-kernel, Thomas Gleixner,
	H. Peter Anvin, X86 ML, Andrew Morton, linux-mm, Dmitry Safonov

On 05/05/2016 02:52 PM, Ingo Molnar wrote:
>
> * Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
>
>> On 04/26/2016 12:38 AM, Andy Lutomirski wrote:
>>> On Mon, Apr 25, 2016 at 4:37 AM, Dmitry Safonov <dsafonov@virtuozzo.com> wrote:
>>>> Add possibility for userspace 32-bit applications to move
>>>> vdso mapping. Previously, when userspace app called
>>>> mremap for vdso, in return path it would land on previous
>>>> address of vdso page, resulting in segmentation violation.
>>>> Now it lands fine and returns to userspace with remapped vdso.
>>>> This will also fix context.vdso pointer for 64-bit, which does not
>>>> affect the user of vdso after mremap by now, but this may change.
>>>>
>>>> As suggested by Andy, return EINVAL for mremap that splits vdso image.
>>>>
>>>> Renamed and moved text_mapping structure declaration inside
>>>> map_vdso, as it used only there and now it complement
>>>> vvar_mapping variable.
>>>>
>>>> There is still problem for remapping vdso in glibc applications:
>>>> linker relocates addresses for syscalls on vdso page, so
>>>> you need to relink with the new addresses. Or the next syscall
>>>> through glibc may fail:
>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>   #0  0xf7fd9b80 in __kernel_vsyscall ()
>>>>   #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6
>>> Acked-by: Andy Lutomirski <luto@kernel.org>
>>>
>>> Ingo, can you apply this?
>>
>> Hm, so I'm not sure - should I resend those two?
>> Or just ping?
>
> Please send a clean series with updated Acked-by's, etc.


Thanks, Ingo, will do.
Sorry for html in the last email - mail agent got an
update and I overlooked html composing become turned on.

-- 
Regards,
Dmitry Safonov

^ permalink raw reply	[flat|nested] 56+ messages in thread

* in_compat_syscall() returns from kernel thread for X86_32.
  2016-04-19  9:34   ` [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() tip-bot for Dmitry Safonov
  2016-04-19 11:15     ` Ingo Molnar
@ 2018-10-18  1:47     ` NeilBrown
  2018-10-18  2:37       ` Andy Lutomirski
  1 sibling, 1 reply; 56+ messages in thread
From: NeilBrown @ 2018-10-18  1:47 UTC (permalink / raw)
  To: peterz, dsafonov, luto, hpa, dvlasenk, luto, torvalds, bp, mingo,
	brgerst, linux-kernel, tglx, linux-tip-commits
  Cc: peterz, dsafonov, luto, luto, dvlasenk, hpa, mingo, torvalds, bp,
	tglx, linux-kernel, brgerst, James Simmons

[-- Attachment #1: Type: text/plain, Size: 2042 bytes --]


Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:

> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
> Committer:  Ingo Molnar <mingo@kernel.org>
> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>
> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>
...
> @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>  
>  static inline bool in_compat_syscall(void)
>  {
> -	return is_ia32_task() || is_x32_task();
> +	return in_ia32_syscall() || in_x32_syscall();
>  }

Hi,
 I'm reply to this patch largely to make sure I get the right people
 .....

This test is always true when CONFIG_X86_32 is set, as that forces
in_ia32_syscall() to true.
However we might not be in a syscall at all - we might be running a
kernel thread which is always in 64 mode.
Every other implementation of in_compat_syscall() that I found is
dependant on a thread flag or syscall register flag, and so returns
"false" in a kernel thread.

Might something like this be appropriate?

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2ff2a30a264f..c265b40a78f2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_X86_32
-#define in_ia32_syscall() true
+#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
 #else
 #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
 			   current_thread_info()->status & TS_COMPAT)

This came up in the (no out-of-tree) lustre filesystem where some code
needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
threads.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18  1:47     ` in_compat_syscall() returns from kernel thread for X86_32 NeilBrown
@ 2018-10-18  2:37       ` Andy Lutomirski
  2018-10-18  2:49         ` Al Viro
  2018-10-18  4:36         ` NeilBrown
  0 siblings, 2 replies; 56+ messages in thread
From: Andy Lutomirski @ 2018-10-18  2:37 UTC (permalink / raw)
  To: NeilBrown
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
>
>
> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
>
> > Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
> > Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
> > Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
> > AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
> > Committer:  Ingo Molnar <mingo@kernel.org>
> > CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
> >
> > x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
> >
> ...
> > @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
> >
> >  static inline bool in_compat_syscall(void)
> >  {
> > -     return is_ia32_task() || is_x32_task();
> > +     return in_ia32_syscall() || in_x32_syscall();
> >  }
>
> Hi,
>  I'm reply to this patch largely to make sure I get the right people
>  .....
>
> This test is always true when CONFIG_X86_32 is set, as that forces
> in_ia32_syscall() to true.
> However we might not be in a syscall at all - we might be running a
> kernel thread which is always in 64 mode.
> Every other implementation of in_compat_syscall() that I found is
> dependant on a thread flag or syscall register flag, and so returns
> "false" in a kernel thread.
>
> Might something like this be appropriate?
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 2ff2a30a264f..c265b40a78f2 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
>  #ifndef __ASSEMBLY__
>
>  #ifdef CONFIG_X86_32
> -#define in_ia32_syscall() true
> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
>  #else
>  #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
>                            current_thread_info()->status & TS_COMPAT)
>
> This came up in the (no out-of-tree) lustre filesystem where some code
> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
> threads.
>

I could get on board with:

({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})

The point of these accessors is to be used *in a syscall*.

What on Earth is Lustre doing that makes it have this problem?

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18  2:37       ` Andy Lutomirski
@ 2018-10-18  2:49         ` Al Viro
  2018-10-18  4:36         ` NeilBrown
  1 sibling, 0 replies; 56+ messages in thread
From: Al Viro @ 2018-10-18  2:49 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: NeilBrown, Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

On Wed, Oct 17, 2018 at 07:37:42PM -0700, Andy Lutomirski wrote:

> I could get on board with:
> 
> ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
> 
> The point of these accessors is to be used *in a syscall*.
> 
> What on Earth is Lustre doing that makes it have this problem?

	Plays with timestamps from a kernel thread, perhaps?
See the old .su joke about adenoidectomy with rectal access
for possible reasons for doing it that way...

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18  2:37       ` Andy Lutomirski
  2018-10-18  2:49         ` Al Viro
@ 2018-10-18  4:36         ` NeilBrown
  2018-10-18 17:26           ` Andy Lutomirski
  1 sibling, 1 reply; 56+ messages in thread
From: NeilBrown @ 2018-10-18  4:36 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

[-- Attachment #1: Type: text/plain, Size: 3010 bytes --]

On Wed, Oct 17 2018, Andy Lutomirski wrote:

> On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
>>
>>
>> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
>>
>> > Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
>> > Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
>> > Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
>> > AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
>> > Committer:  Ingo Molnar <mingo@kernel.org>
>> > CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>> >
>> > x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>> >
>> ...
>> > @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>> >
>> >  static inline bool in_compat_syscall(void)
>> >  {
>> > -     return is_ia32_task() || is_x32_task();
>> > +     return in_ia32_syscall() || in_x32_syscall();
>> >  }
>>
>> Hi,
>>  I'm reply to this patch largely to make sure I get the right people
>>  .....
>>
>> This test is always true when CONFIG_X86_32 is set, as that forces
>> in_ia32_syscall() to true.
>> However we might not be in a syscall at all - we might be running a
>> kernel thread which is always in 64 mode.
>> Every other implementation of in_compat_syscall() that I found is
>> dependant on a thread flag or syscall register flag, and so returns
>> "false" in a kernel thread.
>>
>> Might something like this be appropriate?
>>
>> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>> index 2ff2a30a264f..c265b40a78f2 100644
>> --- a/arch/x86/include/asm/thread_info.h
>> +++ b/arch/x86/include/asm/thread_info.h
>> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
>>  #ifndef __ASSEMBLY__
>>
>>  #ifdef CONFIG_X86_32
>> -#define in_ia32_syscall() true
>> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
>>  #else
>>  #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
>>                            current_thread_info()->status & TS_COMPAT)
>>
>> This came up in the (no out-of-tree) lustre filesystem where some code
>> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
>> threads.
>>
>
> I could get on board with:
>
> ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
>
> The point of these accessors is to be used *in a syscall*.
>
> What on Earth is Lustre doing that makes it have this problem?

Lustre uses it in the ->getattr method to make sure ->ino, ->dev and
->rdev are appropriately sized.  This isn't very different from the
usage in ext4 to ensure the seek offset for directories is suitable.

These interfaces can be used both from systemcalls and from kernel
threads, such as via nfsd.

I don't *know* if nfsd is the particular kthread that causes problems
for lustre.  All I know is that ->getattr returns 32bit squashed inode
numbers in kthread context where 64 bit numbers would be expected.

Thanks,
NeilBrown


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18  4:36         ` NeilBrown
@ 2018-10-18 17:26           ` Andy Lutomirski
  2018-10-20  6:02             ` Andreas Dilger
  2018-10-24  1:47             ` NeilBrown
  0 siblings, 2 replies; 56+ messages in thread
From: Andy Lutomirski @ 2018-10-18 17:26 UTC (permalink / raw)
  To: NeilBrown, Ted Ts'o, Andreas Dilger
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

On Wed, Oct 17, 2018 at 9:36 PM NeilBrown <neilb@suse.com> wrote:
>
> On Wed, Oct 17 2018, Andy Lutomirski wrote:
>
> > On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
> >>
> >>
> >> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
> >> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
> >>
> >> > Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
> >> > Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
> >> > Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
> >> > AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
> >> > Committer:  Ingo Molnar <mingo@kernel.org>
> >> > CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
> >> >
> >> > x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
> >> >
> >> ...
> >> > @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
> >> >
> >> >  static inline bool in_compat_syscall(void)
> >> >  {
> >> > -     return is_ia32_task() || is_x32_task();
> >> > +     return in_ia32_syscall() || in_x32_syscall();
> >> >  }
> >>
> >> Hi,
> >>  I'm reply to this patch largely to make sure I get the right people
> >>  .....
> >>
> >> This test is always true when CONFIG_X86_32 is set, as that forces
> >> in_ia32_syscall() to true.
> >> However we might not be in a syscall at all - we might be running a
> >> kernel thread which is always in 64 mode.
> >> Every other implementation of in_compat_syscall() that I found is
> >> dependant on a thread flag or syscall register flag, and so returns
> >> "false" in a kernel thread.
> >>
> >> Might something like this be appropriate?
> >>
> >> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> >> index 2ff2a30a264f..c265b40a78f2 100644
> >> --- a/arch/x86/include/asm/thread_info.h
> >> +++ b/arch/x86/include/asm/thread_info.h
> >> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
> >>  #ifndef __ASSEMBLY__
> >>
> >>  #ifdef CONFIG_X86_32
> >> -#define in_ia32_syscall() true
> >> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
> >>  #else
> >>  #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
> >>                            current_thread_info()->status & TS_COMPAT)
> >>
> >> This came up in the (no out-of-tree) lustre filesystem where some code
> >> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
> >> threads.
> >>
> >
> > I could get on board with:
> >
> > ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
> >
> > The point of these accessors is to be used *in a syscall*.
> >
> > What on Earth is Lustre doing that makes it have this problem?
>
> Lustre uses it in the ->getattr method to make sure ->ino, ->dev and
> ->rdev are appropriately sized.  This isn't very different from the
> usage in ext4 to ensure the seek offset for directories is suitable.
>
> These interfaces can be used both from systemcalls and from kernel
> threads, such as via nfsd.
>
> I don't *know* if nfsd is the particular kthread that causes problems
> for lustre.  All I know is that ->getattr returns 32bit squashed inode
> numbers in kthread context where 64 bit numbers would be expected.
>

Well, that looks like Lustre is copying an ext4 bug.

Hi ext4 people-

ext4's is_32bit_api() function is bogus.  You can't use
in_compat_syscall() unless you know you're in a syscall

The buggy code was introduced in:

commit d1f5273e9adb40724a85272f248f210dc4ce919a
Author: Fan Yong <yong.fan@whamcloud.com>
Date:   Sun Mar 18 22:44:40 2012 -0400

    ext4: return 32/64-bit dir name hash according to usage type

I don't know what the right solution is.  Al, is it legit at all for
fops->llseek to care about the caller's bitness?  If what ext4 is
doing is legit, then ISTM the VFS needs to gain a new API to tell
->llseek what to do.  But I'm wondering why FMODE_64BITHASH by itself
isn't sufficient,

I'm quite tempted to add a warning to the x86 arch code to try to
catch this type of bug.  Fortunately, a bit of grepping suggests that
ext4 is the only filesystem with this problem.

--Andy

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18 17:26           ` Andy Lutomirski
@ 2018-10-20  6:02             ` Andreas Dilger
  2018-10-20  7:58               ` Andy Lutomirski
  2018-10-24  1:47             ` NeilBrown
  1 sibling, 1 reply; 56+ messages in thread
From: Andreas Dilger @ 2018-10-20  6:02 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: NeilBrown, Ted Ts'o, Peter Zijlstra, Dmitry Safonov,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, James Simmons

[-- Attachment #1: Type: text/plain, Size: 4690 bytes --]

On Oct 18, 2018, at 11:26 AM, Andy Lutomirski <luto@kernel.org> wrote:
> 
> On Wed, Oct 17, 2018 at 9:36 PM NeilBrown <neilb@suse.com> wrote:
>> 
>> On Wed, Oct 17 2018, Andy Lutomirski wrote:
>> 
>>> On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
>>>> 
>>>> 
>>>> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>>>> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
>>>> 
>>>>> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
>>>>> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
>>>>> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
>>>>> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
>>>>> Committer:  Ingo Molnar <mingo@kernel.org>
>>>>> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>>>>> 
>>>>> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>>>>> 
>>>> ...
>>>>> @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>>>>> 
>>>>> static inline bool in_compat_syscall(void)
>>>>> {
>>>>> -     return is_ia32_task() || is_x32_task();
>>>>> +     return in_ia32_syscall() || in_x32_syscall();
>>>>> }
>>>> 
>>>> Hi,
>>>> I'm reply to this patch largely to make sure I get the right people
>>>> .....
>>>> 
>>>> This test is always true when CONFIG_X86_32 is set, as that forces
>>>> in_ia32_syscall() to true.
>>>> However we might not be in a syscall at all - we might be running a
>>>> kernel thread which is always in 64 mode.
>>>> Every other implementation of in_compat_syscall() that I found is
>>>> dependant on a thread flag or syscall register flag, and so returns
>>>> "false" in a kernel thread.
>>>> 
>>>> Might something like this be appropriate?
>>>> 
>>>> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>>>> index 2ff2a30a264f..c265b40a78f2 100644
>>>> --- a/arch/x86/include/asm/thread_info.h
>>>> +++ b/arch/x86/include/asm/thread_info.h
>>>> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
>>>> #ifndef __ASSEMBLY__
>>>> 
>>>> #ifdef CONFIG_X86_32
>>>> -#define in_ia32_syscall() true
>>>> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
>>>> #else
>>>> #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
>>>>                           current_thread_info()->status & TS_COMPAT)
>>>> 
>>>> This came up in the (no out-of-tree) lustre filesystem where some code
>>>> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
>>>> threads.
>>>> 
>>> 
>>> I could get on board with:
>>> 
>>> ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
>>> 
>>> The point of these accessors is to be used *in a syscall*.
>>> 
>>> What on Earth is Lustre doing that makes it have this problem?
>> 
>> Lustre uses it in the ->getattr method to make sure ->ino, ->dev and
>> ->rdev are appropriately sized.  This isn't very different from the
>> usage in ext4 to ensure the seek offset for directories is suitable.
>> 
>> These interfaces can be used both from systemcalls and from kernel
>> threads, such as via nfsd.
>> 
>> I don't *know* if nfsd is the particular kthread that causes problems
>> for lustre.  All I know is that ->getattr returns 32bit squashed inode
>> numbers in kthread context where 64 bit numbers would be expected.
>> 
> 
> Well, that looks like Lustre is copying an ext4 bug.
> 
> Hi ext4 people-
> 
> ext4's is_32bit_api() function is bogus.  You can't use
> in_compat_syscall() unless you know you're in a syscall
> 
> The buggy code was introduced in:
> 
> commit d1f5273e9adb40724a85272f248f210dc4ce919a
> Author: Fan Yong <yong.fan@whamcloud.com>
> Date:   Sun Mar 18 22:44:40 2012 -0400
> 
>    ext4: return 32/64-bit dir name hash according to usage type
> 
> I don't know what the right solution is.  Al, is it legit at all for
> fops->llseek to care about the caller's bitness?  If what ext4 is
> doing is legit, then ISTM the VFS needs to gain a new API to tell
> ->llseek what to do.  But I'm wondering why FMODE_64BITHASH by itself
> isn't sufficient,
> 
> I'm quite tempted to add a warning to the x86 arch code to try to
> catch this type of bug.  Fortunately, a bit of grepping suggests that
> ext4 is the only filesystem with this problem.

We need to know whether the readdir cookie returned to userspace
should be a 32-bit cookie or a 64-bit cookie.  Trying to return
a 64-bit value will result in -EOVERFLOW for a 32-bit application,
but is preferable (if possible) because it reduces the chance of
hash collisions causing readdir to have problems.

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-20  6:02             ` Andreas Dilger
@ 2018-10-20  7:58               ` Andy Lutomirski
  0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2018-10-20  7:58 UTC (permalink / raw)
  To: Andreas Dilger, Al Viro, linux-fsdevel
  Cc: Andy Lutomirski, NeilBrown, Ted Ts'o, Peter Zijlstra,
	Dmitry Safonov, H. Peter Anvin, Denys Vlasenko, Linus Torvalds,
	Borislav Petkov, Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, James Simmons



> On Oct 19, 2018, at 11:02 PM, Andreas Dilger <adilger@dilger.ca> wrote:
> 
>> On Oct 18, 2018, at 11:26 AM, Andy Lutomirski <luto@kernel.org> wrote:
>> 
>>> On Wed, Oct 17, 2018 at 9:36 PM NeilBrown <neilb@suse.com> wrote:
>>> 
>>>> On Wed, Oct 17 2018, Andy Lutomirski wrote:
>>>> 
>>>>> On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
>>>>> 
>>>>> 
>>>>> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>>>>>> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
>>>>>> 
>>>>>> Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
>>>>>> Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
>>>>>> Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
>>>>>> AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
>>>>>> Committer:  Ingo Molnar <mingo@kernel.org>
>>>>>> CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>>>>>> 
>>>>>> x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>>>>>> 
>>>>> ...
>>>>>> @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>>>>>> 
>>>>>> static inline bool in_compat_syscall(void)
>>>>>> {
>>>>>> -     return is_ia32_task() || is_x32_task();
>>>>>> +     return in_ia32_syscall() || in_x32_syscall();
>>>>>> }
>>>>> 
>>>>> Hi,
>>>>> I'm reply to this patch largely to make sure I get the right people
>>>>> .....
>>>>> 
>>>>> This test is always true when CONFIG_X86_32 is set, as that forces
>>>>> in_ia32_syscall() to true.
>>>>> However we might not be in a syscall at all - we might be running a
>>>>> kernel thread which is always in 64 mode.
>>>>> Every other implementation of in_compat_syscall() that I found is
>>>>> dependant on a thread flag or syscall register flag, and so returns
>>>>> "false" in a kernel thread.
>>>>> 
>>>>> Might something like this be appropriate?
>>>>> 
>>>>> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>>>>> index 2ff2a30a264f..c265b40a78f2 100644
>>>>> --- a/arch/x86/include/asm/thread_info.h
>>>>> +++ b/arch/x86/include/asm/thread_info.h
>>>>> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
>>>>> #ifndef __ASSEMBLY__
>>>>> 
>>>>> #ifdef CONFIG_X86_32
>>>>> -#define in_ia32_syscall() true
>>>>> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
>>>>> #else
>>>>> #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
>>>>>                          current_thread_info()->status & TS_COMPAT)
>>>>> 
>>>>> This came up in the (no out-of-tree) lustre filesystem where some code
>>>>> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
>>>>> threads.
>>>>> 
>>>> 
>>>> I could get on board with:
>>>> 
>>>> ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
>>>> 
>>>> The point of these accessors is to be used *in a syscall*.
>>>> 
>>>> What on Earth is Lustre doing that makes it have this problem?
>>> 
>>> Lustre uses it in the ->getattr method to make sure ->ino, ->dev and
>>> ->rdev are appropriately sized.  This isn't very different from the
>>> usage in ext4 to ensure the seek offset for directories is suitable.
>>> 
>>> These interfaces can be used both from systemcalls and from kernel
>>> threads, such as via nfsd.
>>> 
>>> I don't *know* if nfsd is the particular kthread that causes problems
>>> for lustre.  All I know is that ->getattr returns 32bit squashed inode
>>> numbers in kthread context where 64 bit numbers would be expected.
>>> 
>> 
>> Well, that looks like Lustre is copying an ext4 bug.
>> 
>> Hi ext4 people-
>> 
>> ext4's is_32bit_api() function is bogus.  You can't use
>> in_compat_syscall() unless you know you're in a syscall
>> 
>> The buggy code was introduced in:
>> 
>> commit d1f5273e9adb40724a85272f248f210dc4ce919a
>> Author: Fan Yong <yong.fan@whamcloud.com>
>> Date:   Sun Mar 18 22:44:40 2012 -0400
>> 
>>   ext4: return 32/64-bit dir name hash according to usage type
>> 
>> I don't know what the right solution is.  Al, is it legit at all for
>> fops->llseek to care about the caller's bitness?  If what ext4 is
>> doing is legit, then ISTM the VFS needs to gain a new API to tell
>> ->llseek what to do.  But I'm wondering why FMODE_64BITHASH by itself
>> isn't sufficient,
>> 
>> I'm quite tempted to add a warning to the x86 arch code to try to
>> catch this type of bug.  Fortunately, a bit of grepping suggests that
>> ext4 is the only filesystem with this problem.
> 
> We need to know whether the readdir cookie returned to userspace
> should be a 32-bit cookie or a 64-bit cookie.  Trying to return
> a 64-bit value will result in -EOVERFLOW for a 32-bit application,
> but is preferable (if possible) because it reduces the chance of
> hash collisions causing readdir to have problems.
> 
> 

Let’s rope Al in. Sorry, I thought he was already on cc.

The concept seems reasonable, but the implementation is problematic. For example, the behavior of calling vfs_llseek() is basically undefined.

Is some VFS change needed to fix this?  Maybe a .compat_llseek or some other explicit indication of whether a 64-bit hash is okay?

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-18 17:26           ` Andy Lutomirski
  2018-10-20  6:02             ` Andreas Dilger
@ 2018-10-24  1:47             ` NeilBrown
  2018-10-24 13:15               ` Theodore Y. Ts'o
  1 sibling, 1 reply; 56+ messages in thread
From: NeilBrown @ 2018-10-24  1:47 UTC (permalink / raw)
  To: Andy Lutomirski, Ted Ts'o, Andreas Dilger
  Cc: Peter Zijlstra, Dmitry Safonov, Andrew Lutomirski,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

[-- Attachment #1: Type: text/plain, Size: 4615 bytes --]

On Thu, Oct 18 2018, Andy Lutomirski wrote:

> On Wed, Oct 17, 2018 at 9:36 PM NeilBrown <neilb@suse.com> wrote:
>>
>> On Wed, Oct 17 2018, Andy Lutomirski wrote:
>>
>> > On Wed, Oct 17, 2018 at 6:48 PM NeilBrown <neilb@suse.com> wrote:
>> >>
>> >>
>> >> Was: Re: [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>> >> On Tue, Apr 19 2016, tip-bot for Dmitry Safonov wrote:
>> >>
>> >> > Commit-ID:  abfb9498ee1327f534df92a7ecaea81a85913bae
>> >> > Gitweb:     http://git.kernel.org/tip/abfb9498ee1327f534df92a7ecaea81a85913bae
>> >> > Author:     Dmitry Safonov <dsafonov@virtuozzo.com>
>> >> > AuthorDate: Mon, 18 Apr 2016 16:43:43 +0300
>> >> > Committer:  Ingo Molnar <mingo@kernel.org>
>> >> > CommitDate: Tue, 19 Apr 2016 10:44:52 +0200
>> >> >
>> >> > x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
>> >> >
>> >> ...
>> >> > @@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
>> >> >
>> >> >  static inline bool in_compat_syscall(void)
>> >> >  {
>> >> > -     return is_ia32_task() || is_x32_task();
>> >> > +     return in_ia32_syscall() || in_x32_syscall();
>> >> >  }
>> >>
>> >> Hi,
>> >>  I'm reply to this patch largely to make sure I get the right people
>> >>  .....
>> >>
>> >> This test is always true when CONFIG_X86_32 is set, as that forces
>> >> in_ia32_syscall() to true.
>> >> However we might not be in a syscall at all - we might be running a
>> >> kernel thread which is always in 64 mode.
>> >> Every other implementation of in_compat_syscall() that I found is
>> >> dependant on a thread flag or syscall register flag, and so returns
>> >> "false" in a kernel thread.
>> >>
>> >> Might something like this be appropriate?
>> >>
>> >> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>> >> index 2ff2a30a264f..c265b40a78f2 100644
>> >> --- a/arch/x86/include/asm/thread_info.h
>> >> +++ b/arch/x86/include/asm/thread_info.h
>> >> @@ -219,7 +219,7 @@ static inline int arch_within_stack_frames(const void * const stack,
>> >>  #ifndef __ASSEMBLY__
>> >>
>> >>  #ifdef CONFIG_X86_32
>> >> -#define in_ia32_syscall() true
>> >> +#define in_ia32_syscall() (!(current->flags & PF_KTHREAD))
>> >>  #else
>> >>  #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
>> >>                            current_thread_info()->status & TS_COMPAT)
>> >>
>> >> This came up in the (no out-of-tree) lustre filesystem where some code
>> >> needs to assume 32-bit mode in X86_32 syscalls, and 64-bit mode in kernel
>> >> threads.
>> >>
>> >
>> > I could get on board with:
>> >
>> > ({WARN_ON_ONCE(current->flags & PF_KTHREAD); true})
>> >
>> > The point of these accessors is to be used *in a syscall*.
>> >
>> > What on Earth is Lustre doing that makes it have this problem?
>>
>> Lustre uses it in the ->getattr method to make sure ->ino, ->dev and
>> ->rdev are appropriately sized.  This isn't very different from the
>> usage in ext4 to ensure the seek offset for directories is suitable.
>>
>> These interfaces can be used both from systemcalls and from kernel
>> threads, such as via nfsd.
>>
>> I don't *know* if nfsd is the particular kthread that causes problems
>> for lustre.  All I know is that ->getattr returns 32bit squashed inode
>> numbers in kthread context where 64 bit numbers would be expected.
>>
>
> Well, that looks like Lustre is copying an ext4 bug.

I doubt it was copied - more likely independent evolution.
But on reflection, I see that it is probably reasonable that it
shouldn't be used this way - or at all in this context.
I'll try to understand what the issues really are and see if I can
find a solution that doesn't depend on this interface.
Thanks for your help.

NeilBrown


>
> Hi ext4 people-
>
> ext4's is_32bit_api() function is bogus.  You can't use
> in_compat_syscall() unless you know you're in a syscall
>
> The buggy code was introduced in:
>
> commit d1f5273e9adb40724a85272f248f210dc4ce919a
> Author: Fan Yong <yong.fan@whamcloud.com>
> Date:   Sun Mar 18 22:44:40 2012 -0400
>
>     ext4: return 32/64-bit dir name hash according to usage type
>
> I don't know what the right solution is.  Al, is it legit at all for
> fops->llseek to care about the caller's bitness?  If what ext4 is
> doing is legit, then ISTM the VFS needs to gain a new API to tell
> ->llseek what to do.  But I'm wondering why FMODE_64BITHASH by itself
> isn't sufficient,
>
> I'm quite tempted to add a warning to the x86 arch code to try to
> catch this type of bug.  Fortunately, a bit of grepping suggests that
> ext4 is the only filesystem with this problem.
>
> --Andy

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-24  1:47             ` NeilBrown
@ 2018-10-24 13:15               ` Theodore Y. Ts'o
  2018-10-24 14:32                 ` Theodore Y. Ts'o
  2018-10-25  3:46                 ` NeilBrown
  0 siblings, 2 replies; 56+ messages in thread
From: Theodore Y. Ts'o @ 2018-10-24 13:15 UTC (permalink / raw)
  To: NeilBrown
  Cc: Andy Lutomirski, Andreas Dilger, Peter Zijlstra, Dmitry Safonov,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

On Wed, Oct 24, 2018 at 12:47:57PM +1100, NeilBrown wrote:
> 
> I doubt it was copied - more likely independent evolution.
> But on reflection, I see that it is probably reasonable that it
> shouldn't be used this way - or at all in this context.
> I'll try to understand what the issues really are and see if I can
> find a solution that doesn't depend on this interface.
> Thanks for your help.

At least for ext4, the primary problem is that we want to use a 64-bit
telldir/seekdir cookie if all 64-bits will make it to user space, and
a 32-bit telldir cookie if only 32 bits will make it userspace.  This
impacts NFS as well because if there are people who are still using
NFSv2, which has 32-bit directory offsets, we need to constrain the
telldir/seekdir cookies we give to NFS to be a 32 has as opposed to a
64-bit hash.

						- Ted

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-24 13:15               ` Theodore Y. Ts'o
@ 2018-10-24 14:32                 ` Theodore Y. Ts'o
  2019-01-11 22:21                   ` Pavel Machek
  2018-10-25  3:46                 ` NeilBrown
  1 sibling, 1 reply; 56+ messages in thread
From: Theodore Y. Ts'o @ 2018-10-24 14:32 UTC (permalink / raw)
  To: NeilBrown, Andy Lutomirski, Andreas Dilger, Peter Zijlstra,
	Dmitry Safonov, H. Peter Anvin, Denys Vlasenko, Linus Torvalds,
	Borislav Petkov, Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

On Wed, Oct 24, 2018 at 09:15:34AM -0400, Theodore Y. Ts'o wrote:
> At least for ext4, the primary problem is that we want to use a 64-bit
> telldir/seekdir cookie if all 64-bits will make it to user space, and
> a 32-bit telldir cookie if only 32 bits will make it userspace.  This
> impacts NFS as well because if there are people who are still using
> NFSv2, which has 32-bit directory offsets, we need to constrain the
> telldir/seekdir cookies we give to NFS to be a 32 has as opposed to a
> 64-bit hash.

Are there anyone still using NFSv2, BTW?  One way of making the
problem *much* easier to sovle would be to drop NFSv2 support.  :-)

	       	      	       	     - Ted

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-24 13:15               ` Theodore Y. Ts'o
  2018-10-24 14:32                 ` Theodore Y. Ts'o
@ 2018-10-25  3:46                 ` NeilBrown
  2018-10-25  4:45                   ` Andy Lutomirski
  1 sibling, 1 reply; 56+ messages in thread
From: NeilBrown @ 2018-10-25  3:46 UTC (permalink / raw)
  To: Theodore Y. Ts'o
  Cc: Andy Lutomirski, Andreas Dilger, Peter Zijlstra, Dmitry Safonov,
	H. Peter Anvin, Denys Vlasenko, Linus Torvalds, Borislav Petkov,
	Ingo Molnar, Brian Gerst, LKML, Thomas Gleixner,
	linux-tip-commits, jsimmons

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

On Wed, Oct 24 2018, Theodore Y. Ts'o wrote:

> On Wed, Oct 24, 2018 at 12:47:57PM +1100, NeilBrown wrote:
>> 
>> I doubt it was copied - more likely independent evolution.
>> But on reflection, I see that it is probably reasonable that it
>> shouldn't be used this way - or at all in this context.
>> I'll try to understand what the issues really are and see if I can
>> find a solution that doesn't depend on this interface.
>> Thanks for your help.
>
> At least for ext4, the primary problem is that we want to use a 64-bit
> telldir/seekdir cookie if all 64-bits will make it to user space, and
> a 32-bit telldir cookie if only 32 bits will make it userspace.  This
> impacts NFS as well because if there are people who are still using
> NFSv2, which has 32-bit directory offsets, we need to constrain the
> telldir/seekdir cookies we give to NFS to be a 32 has as opposed to a
> 64-bit hash.

NFSd uses FMODE_32BITHASH or FMODE64BITHASH to allow ext4 to do the
right thing.  FMODE_32BITHASH is set for NFSv2 only.

Maybe sys_getdents needs to set FMODE_32BITHASH, and sys_getdent64 needs
to set FMODE_64BITHASH - or something like that.

For lustre it is a bit more complex.  The internal "inode number" is 128
bits and we (sort of) hash it to 32 or 64 bits.  cp_compat_stat() just
says -EOVERFLOW if we give a 64 bit number when 32 are expected, and
there is no flag to say "this is a 32-bit 'stat' request".

But I need to dig into exactly what that "sort-of" means - maybe there
is an answer in there.

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-25  3:46                 ` NeilBrown
@ 2018-10-25  4:45                   ` Andy Lutomirski
  0 siblings, 0 replies; 56+ messages in thread
From: Andy Lutomirski @ 2018-10-25  4:45 UTC (permalink / raw)
  To: NeilBrown
  Cc: Theodore Y. Ts'o, Andy Lutomirski, Andreas Dilger,
	Peter Zijlstra, Dmitry Safonov, H. Peter Anvin, Denys Vlasenko,
	Linus Torvalds, Borislav Petkov, Ingo Molnar, Brian Gerst, LKML,
	Thomas Gleixner, linux-tip-commits, jsimmons



> On Oct 24, 2018, at 8:46 PM, NeilBrown <neilb@suse.com> wrote:
> 
>> On Wed, Oct 24 2018, Theodore Y. Ts'o wrote:
>> 
>>> On Wed, Oct 24, 2018 at 12:47:57PM +1100, NeilBrown wrote:
>>> 
>>> I doubt it was copied - more likely independent evolution.
>>> But on reflection, I see that it is probably reasonable that it
>>> shouldn't be used this way - or at all in this context.
>>> I'll try to understand what the issues really are and see if I can
>>> find a solution that doesn't depend on this interface.
>>> Thanks for your help.
>> 
>> At least for ext4, the primary problem is that we want to use a 64-bit
>> telldir/seekdir cookie if all 64-bits will make it to user space, and
>> a 32-bit telldir cookie if only 32 bits will make it userspace.  This
>> impacts NFS as well because if there are people who are still using
>> NFSv2, which has 32-bit directory offsets, we need to constrain the
>> telldir/seekdir cookies we give to NFS to be a 32 has as opposed to a
>> 64-bit hash.
> 
> NFSd uses FMODE_32BITHASH or FMODE64BITHASH to allow ext4 to do the
> right thing.  FMODE_32BITHASH is set for NFSv2 only.
> 
> Maybe sys_getdents needs to set FMODE_32BITHASH, and sys_getdent64 needs
> to set FMODE_64BITHASH - or something like that.

It’s possible for a 32-bit process and a 64-bit process to share a directory fd, so I don’t think it’s quite that simple.

One option would be to add .llseek and .getdents flags or entire new compat operations to indicate that the caller expects 32-bit offsets.

I wonder how overlayfs interacts with this whole mess.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: in_compat_syscall() returns from kernel thread for X86_32.
  2018-10-24 14:32                 ` Theodore Y. Ts'o
@ 2019-01-11 22:21                   ` Pavel Machek
  0 siblings, 0 replies; 56+ messages in thread
From: Pavel Machek @ 2019-01-11 22:21 UTC (permalink / raw)
  To: Theodore Y. Ts'o, NeilBrown, Andy Lutomirski, Andreas Dilger,
	Peter Zijlstra, Dmitry Safonov, H. Peter Anvin, Denys Vlasenko,
	Linus Torvalds, Borislav Petkov, Ingo Molnar, Brian Gerst, LKML,
	Thomas Gleixner, linux-tip-commits, jsimmons

[-- Attachment #1: Type: text/plain, Size: 995 bytes --]

On Wed 2018-10-24 10:32:37, Theodore Y. Ts'o wrote:
> On Wed, Oct 24, 2018 at 09:15:34AM -0400, Theodore Y. Ts'o wrote:
> > At least for ext4, the primary problem is that we want to use a 64-bit
> > telldir/seekdir cookie if all 64-bits will make it to user space, and
> > a 32-bit telldir cookie if only 32 bits will make it userspace.  This
> > impacts NFS as well because if there are people who are still using
> > NFSv2, which has 32-bit directory offsets, we need to constrain the
> > telldir/seekdir cookies we give to NFS to be a 32 has as opposed to a
> > 64-bit hash.
> 
> Are there anyone still using NFSv2, BTW?  One way of making the
> problem *much* easier to sovle would be to drop NFSv2 support.  :-)

NFSv2 is now in "unexcpected" places such as U-Boot bootloader. I'm
pretty sure someone still uses it...

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

end of thread, other threads:[~2019-01-11 22:21 UTC | newest]

Thread overview: 56+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-11 15:22 [PATCH] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
2016-04-11 15:41 ` kbuild test robot
2016-04-11 15:54   ` Dmitry Safonov
2016-04-14 16:32 ` [PATCHv2] " Dmitry Safonov
2016-04-14 22:58   ` Andy Lutomirski
2016-04-15  8:46     ` Dmitry Safonov
2016-04-15  9:18     ` Ingo Molnar
2016-04-15  9:51       ` Dmitry Safonov
2016-04-15 12:08         ` Dmitry Safonov
2016-04-15 13:20 ` [PATCHv3 1/2] " Dmitry Safonov
2016-04-15 13:20   ` [PATCHv3 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
2016-04-15 16:52     ` Andy Lutomirski
2016-04-15 16:55       ` Dmitry Safonov
2016-04-15 13:53   ` [PATCHv3 1/2] x86/vdso: add mremap hook to vm_special_mapping kbuild test robot
2016-04-15 14:12 ` [PATCHv4 " Dmitry Safonov
2016-04-15 14:12   ` [PATCHv4 2/2] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
2016-04-15 16:58   ` [PATCHv4 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
2016-04-18 11:18     ` Dmitry Safonov
2016-04-18 15:37       ` Andy Lutomirski
2016-04-18 13:43 ` [PATCHv5 1/3] x86: rename is_{ia32,x32}_task to in_{ia32,x32}_syscall Dmitry Safonov
2016-04-18 13:43   ` [PATCHv5 2/3] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
2016-04-18 14:03     ` kbuild test robot
2016-04-18 14:17     ` [PATCHv6 " Dmitry Safonov
2016-04-18 14:23     ` [PATCHv7 " Dmitry Safonov
2016-04-20 16:22       ` Dmitry Safonov
2016-04-21 19:52       ` Andy Lutomirski
2016-04-22 10:45         ` Dmitry Safonov
2016-04-23 23:09     ` [PATCHv5 " kbuild test robot
2016-04-18 13:43   ` [PATCHv5 3/3] selftest/x86: add mremap vdso 32-bit test Dmitry Safonov
2016-04-21 20:01     ` Andy Lutomirski
2016-04-22 11:34       ` Dmitry Safonov
2016-04-19  9:34   ` [tip:x86/asm] x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() tip-bot for Dmitry Safonov
2016-04-19 11:15     ` Ingo Molnar
2016-04-19 11:35       ` Borislav Petkov
2016-04-19 17:00         ` H. Peter Anvin
2016-04-19 16:04       ` Andy Lutomirski
2016-04-22  8:36         ` Ingo Molnar
2018-10-18  1:47     ` in_compat_syscall() returns from kernel thread for X86_32 NeilBrown
2018-10-18  2:37       ` Andy Lutomirski
2018-10-18  2:49         ` Al Viro
2018-10-18  4:36         ` NeilBrown
2018-10-18 17:26           ` Andy Lutomirski
2018-10-20  6:02             ` Andreas Dilger
2018-10-20  7:58               ` Andy Lutomirski
2018-10-24  1:47             ` NeilBrown
2018-10-24 13:15               ` Theodore Y. Ts'o
2018-10-24 14:32                 ` Theodore Y. Ts'o
2019-01-11 22:21                   ` Pavel Machek
2018-10-25  3:46                 ` NeilBrown
2018-10-25  4:45                   ` Andy Lutomirski
2016-04-25 11:37 ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Dmitry Safonov
2016-04-25 11:37   ` [PATCHv8 2/2] selftest/x86: add mremap vdso test Dmitry Safonov
2016-04-25 21:39     ` Andy Lutomirski
2016-04-25 21:38   ` [PATCHv8 1/2] x86/vdso: add mremap hook to vm_special_mapping Andy Lutomirski
     [not found]     ` <e0a10957-ddf7-1bc4-fad6-8b5836628fce@virtuozzo.com>
2016-05-05 11:52       ` Ingo Molnar
2016-05-05 11:55         ` Dmitry Safonov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).