* [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-19 22:58 ` Andy Lutomirski
0 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-19 22:58 UTC (permalink / raw)
To: x86, Andrew Morton, Sasha Levin, linux-mm, Dave Jones
Cc: LKML, Cyrill Gorcunov, Pavel Emelyanov, H. Peter Anvin,
Andy Lutomirski, Cyrill Gorcunov
Using arch_vma_name to give special mappings a name is awkward. x86
currently implements it by comparing the start address of the vma to
the expected address of the vdso. This requires tracking the start
address of special mappings and is probably buggy if a special vma
is split or moved.
Improve _install_special_mapping to just name the vma directly. Use
it to give the x86 vvar area a name, which should make CRIU's life
easier.
As a side effect, the vvar area will show up in core dumps. This
could be considered weird and is fixable. Thoughts?
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
arch/x86/include/asm/vdso.h | 6 ++-
arch/x86/mm/init_64.c | 3 --
arch/x86/vdso/vdso2c.h | 5 ++-
arch/x86/vdso/vdso32-setup.c | 7 ----
arch/x86/vdso/vma.c | 25 ++++++++-----
include/linux/mm.h | 4 +-
include/linux/mm_types.h | 6 +++
mm/mmap.c | 89 +++++++++++++++++++++++++++++---------------
8 files changed, 94 insertions(+), 51 deletions(-)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d0a2c90..30be253 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -7,10 +7,14 @@
#ifndef __ASSEMBLER__
+#include <linux/mm_types.h>
+
struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
- struct page **pages; /* Big enough for data/size page pointers */
+
+ /* text_mapping.pages is big enough for data/size page pointers */
+ struct vm_special_mapping text_mapping;
unsigned long alt, alt_len;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6f88184..9deb59b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1223,9 +1223,6 @@ int in_gate_area_no_mm(unsigned long addr)
const char *arch_vma_name(struct vm_area_struct *vma)
{
- if (vma->vm_mm && vma->vm_start ==
- (long __force)vma->vm_mm->context.vdso)
- return "[vdso]";
if (vma == &gate_vma)
return "[vsyscall]";
return NULL;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index ed2e894..3dcc61e 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -136,7 +136,10 @@ static int GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", data_size);
- fprintf(outfile, "\t.pages = pages,\n");
+ fprintf(outfile, "\t.text_mapping = {\n");
+ fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
+ fprintf(outfile, "\t\t.pages = pages,\n");
+ fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)alt_sec->sh_offset);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index c3ed708..e4f7781 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -119,13 +119,6 @@ __initcall(ia32_binfmt_init);
#else /* CONFIG_X86_32 */
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
- return "[vdso]";
- return NULL;
-}
-
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 8ad0081..e1513c4 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -30,7 +30,8 @@ void __init init_vdso_image(const struct vdso_image *image)
BUG_ON(image->size % PAGE_SIZE != 0);
for (i = 0; i < npages; i++)
- image->pages[i] = virt_to_page(image->data + i*PAGE_SIZE);
+ image->text_mapping.pages[i] =
+ virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@@ -91,6 +92,10 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
unsigned long addr;
int ret = 0;
static struct page *no_pages[] = {NULL};
+ static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ .pages = no_pages,
+ };
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -112,21 +117,23 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
- ret = install_special_mapping(mm,
- addr,
- image->size,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- image->pages);
+ vma = _install_special_mapping(mm,
+ addr,
+ image->size,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &image->text_mapping);
- if (ret)
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto up_fail;
+ }
vma = _install_special_mapping(mm,
addr + image->size,
image->sym_end_mapping - image->size,
VM_READ,
- no_pages);
+ &vvar_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63f8d4e..05aab09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1782,7 +1782,9 @@ extern struct file *get_mm_exe_file(struct mm_struct *mm);
extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
- unsigned long flags, struct page **pages);
+ unsigned long flags,
+ const struct vm_special_mapping *spec);
+/* This is an obsolete alternative to _install_special_mapping. */
extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20..22c6f4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -510,4 +510,10 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
}
#endif
+struct vm_special_mapping
+{
+ const char *name;
+ struct page **pages;
+};
+
#endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index b1202cf..52bbc95 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2872,6 +2872,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
return 1;
}
+static int special_mapping_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+ return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+ .name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+};
static int special_mapping_fault(struct vm_area_struct *vma,
struct vm_fault *vmf)
@@ -2887,7 +2912,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
*/
pgoff = vmf->pgoff - vma->vm_pgoff;
- for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+ if (vma->vm_ops == &legacy_special_mapping_vmops)
+ pages = vma->vm_private_data;
+ else
+ pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+ pages;
+
+ for (; pgoff && *pages; ++pages)
pgoff--;
if (*pages) {
@@ -2900,30 +2931,11 @@ static int special_mapping_fault(struct vm_area_struct *vma,
return VM_FAULT_SIGBUS;
}
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
- .close = special_mapping_close,
- .fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
- unsigned long addr, unsigned long len,
- unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_operations_struct *ops,
+ void *priv)
{
int ret;
struct vm_area_struct *vma;
@@ -2940,8 +2952,8 @@ struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- vma->vm_ops = &special_mapping_vmops;
- vma->vm_private_data = pages;
+ vma->vm_ops = ops;
+ vma->vm_private_data = priv;
ret = insert_vm_struct(mm, vma);
if (ret)
@@ -2958,12 +2970,31 @@ out:
return ERR_PTR(ret);
}
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+ return __install_special_mapping(mm, addr, len, vm_flags,
+ &special_mapping_vmops, (void *)spec);
+}
+
int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
{
- struct vm_area_struct *vma = _install_special_mapping(mm,
- addr, len, vm_flags, pages);
+ struct vm_area_struct *vma = __install_special_mapping(
+ mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
+ (void *)pages);
if (IS_ERR(vma))
return PTR_ERR(vma);
--
1.9.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-19 22:58 ` Andy Lutomirski
@ 2014-05-20 17:21 ` Cyrill Gorcunov
-1 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 17:21 UTC (permalink / raw)
To: Andy Lutomirski
Cc: x86, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
> Using arch_vma_name to give special mappings a name is awkward. x86
> currently implements it by comparing the start address of the vma to
> the expected address of the vdso. This requires tracking the start
> address of special mappings and is probably buggy if a special vma
> is split or moved.
>
> Improve _install_special_mapping to just name the vma directly. Use
> it to give the x86 vvar area a name, which should make CRIU's life
> easier.
>
> As a side effect, the vvar area will show up in core dumps. This
> could be considered weird and is fixable. Thoughts?
>
> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
> Cc: Pavel Emelyanov <xemul@parallels.com>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Hi Andy, thanks a lot for this! I must confess I don't yet know how
would we deal with compat tasks but this is 'must have' mark which
allow us to detect vvar area!
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 17:21 ` Cyrill Gorcunov
0 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 17:21 UTC (permalink / raw)
To: Andy Lutomirski
Cc: x86, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
> Using arch_vma_name to give special mappings a name is awkward. x86
> currently implements it by comparing the start address of the vma to
> the expected address of the vdso. This requires tracking the start
> address of special mappings and is probably buggy if a special vma
> is split or moved.
>
> Improve _install_special_mapping to just name the vma directly. Use
> it to give the x86 vvar area a name, which should make CRIU's life
> easier.
>
> As a side effect, the vvar area will show up in core dumps. This
> could be considered weird and is fixable. Thoughts?
>
> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
> Cc: Pavel Emelyanov <xemul@parallels.com>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Hi Andy, thanks a lot for this! I must confess I don't yet know how
would we deal with compat tasks but this is 'must have' mark which
allow us to detect vvar area!
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 17:21 ` Cyrill Gorcunov
@ 2014-05-20 17:24 ` Andy Lutomirski
-1 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 17:24 UTC (permalink / raw)
To: Cyrill Gorcunov
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
>> Using arch_vma_name to give special mappings a name is awkward. x86
>> currently implements it by comparing the start address of the vma to
>> the expected address of the vdso. This requires tracking the start
>> address of special mappings and is probably buggy if a special vma
>> is split or moved.
>>
>> Improve _install_special_mapping to just name the vma directly. Use
>> it to give the x86 vvar area a name, which should make CRIU's life
>> easier.
>>
>> As a side effect, the vvar area will show up in core dumps. This
>> could be considered weird and is fixable. Thoughts?
>>
>> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
>> Cc: Pavel Emelyanov <xemul@parallels.com>
>> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>
> Hi Andy, thanks a lot for this! I must confess I don't yet know how
> would we deal with compat tasks but this is 'must have' mark which
> allow us to detect vvar area!
Out of curiosity, how does CRIU currently handle checkpointing a
restored task? In current kernels, the "[vdso]" name in maps goes
away after mremapping the vdso.
I suspect that you'll need kernel changes for compat tasks, since I
think that mremapping the vdso on any reasonably modern hardware in a
32-bit task will cause sigreturn to blow up. This could be fixed by
making mremap magical, although adding a new prctl or arch_prctl to
reliably move the vdso might be a better bet.
--Andy
--
Andy Lutomirski
AMA Capital Management, LLC
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 17:24 ` Andy Lutomirski
0 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 17:24 UTC (permalink / raw)
To: Cyrill Gorcunov
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
>> Using arch_vma_name to give special mappings a name is awkward. x86
>> currently implements it by comparing the start address of the vma to
>> the expected address of the vdso. This requires tracking the start
>> address of special mappings and is probably buggy if a special vma
>> is split or moved.
>>
>> Improve _install_special_mapping to just name the vma directly. Use
>> it to give the x86 vvar area a name, which should make CRIU's life
>> easier.
>>
>> As a side effect, the vvar area will show up in core dumps. This
>> could be considered weird and is fixable. Thoughts?
>>
>> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
>> Cc: Pavel Emelyanov <xemul@parallels.com>
>> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>
> Hi Andy, thanks a lot for this! I must confess I don't yet know how
> would we deal with compat tasks but this is 'must have' mark which
> allow us to detect vvar area!
Out of curiosity, how does CRIU currently handle checkpointing a
restored task? In current kernels, the "[vdso]" name in maps goes
away after mremapping the vdso.
I suspect that you'll need kernel changes for compat tasks, since I
think that mremapping the vdso on any reasonably modern hardware in a
32-bit task will cause sigreturn to blow up. This could be fixed by
making mremap magical, although adding a new prctl or arch_prctl to
reliably move the vdso might be a better bet.
--Andy
--
Andy Lutomirski
AMA Capital Management, LLC
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 17:24 ` Andy Lutomirski
@ 2014-05-20 17:47 ` Cyrill Gorcunov
-1 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 17:47 UTC (permalink / raw)
To: Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:24:49AM -0700, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> > On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
> >> Using arch_vma_name to give special mappings a name is awkward. x86
> >> currently implements it by comparing the start address of the vma to
> >> the expected address of the vdso. This requires tracking the start
> >> address of special mappings and is probably buggy if a special vma
> >> is split or moved.
> >>
> >> Improve _install_special_mapping to just name the vma directly. Use
> >> it to give the x86 vvar area a name, which should make CRIU's life
> >> easier.
> >>
> >> As a side effect, the vvar area will show up in core dumps. This
> >> could be considered weird and is fixable. Thoughts?
> >>
> >> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
> >> Cc: Pavel Emelyanov <xemul@parallels.com>
> >> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> >
> > Hi Andy, thanks a lot for this! I must confess I don't yet know how
> > would we deal with compat tasks but this is 'must have' mark which
> > allow us to detect vvar area!
>
> Out of curiosity, how does CRIU currently handle checkpointing a
> restored task? In current kernels, the "[vdso]" name in maps goes
> away after mremapping the vdso.
We use not only [vdso] mark to detect vdso area but also page frame
number of the living vdso. If mark is not present in procfs output
we examinate executable areas and check if pfn == vdso_pfn, it's
a slow path because there migh be a bunch of executable areas and
touching every of it is not that fast thing, but we simply have no
choise.
The situation get worse when task was dumped on one kernel and
then restored on another kernel where vdso content is different
from one save in image -- is such case as I mentioned we need
that named vdso proxy which redirect calls to vdso of the system
where task is restoring. And when such "restored" task get checkpointed
second time we don't dump new living vdso but save only old vdso
proxy on disk (detecting it is a different story, in short we
inject a unique mark into elf header).
>
> I suspect that you'll need kernel changes for compat tasks, since I
> think that mremapping the vdso on any reasonably modern hardware in a
> 32-bit task will cause sigreturn to blow up. This could be fixed by
> making mremap magical, although adding a new prctl or arch_prctl to
> reliably move the vdso might be a better bet.
Well, as far as I understand compat code uses abs addressing for
vvar data and if vvar data position doesn't change we're safe,
but same time because vvar addresses are not abi I fear one day
we indeed hit the problems and the only solution would be
to use kernel's help. But again, Andy, I didn't think much
about implementing compat mode in criu yet so i might be
missing some details.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 17:47 ` Cyrill Gorcunov
0 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 17:47 UTC (permalink / raw)
To: Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:24:49AM -0700, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> > On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
> >> Using arch_vma_name to give special mappings a name is awkward. x86
> >> currently implements it by comparing the start address of the vma to
> >> the expected address of the vdso. This requires tracking the start
> >> address of special mappings and is probably buggy if a special vma
> >> is split or moved.
> >>
> >> Improve _install_special_mapping to just name the vma directly. Use
> >> it to give the x86 vvar area a name, which should make CRIU's life
> >> easier.
> >>
> >> As a side effect, the vvar area will show up in core dumps. This
> >> could be considered weird and is fixable. Thoughts?
> >>
> >> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
> >> Cc: Pavel Emelyanov <xemul@parallels.com>
> >> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> >
> > Hi Andy, thanks a lot for this! I must confess I don't yet know how
> > would we deal with compat tasks but this is 'must have' mark which
> > allow us to detect vvar area!
>
> Out of curiosity, how does CRIU currently handle checkpointing a
> restored task? In current kernels, the "[vdso]" name in maps goes
> away after mremapping the vdso.
We use not only [vdso] mark to detect vdso area but also page frame
number of the living vdso. If mark is not present in procfs output
we examinate executable areas and check if pfn == vdso_pfn, it's
a slow path because there migh be a bunch of executable areas and
touching every of it is not that fast thing, but we simply have no
choise.
The situation get worse when task was dumped on one kernel and
then restored on another kernel where vdso content is different
from one save in image -- is such case as I mentioned we need
that named vdso proxy which redirect calls to vdso of the system
where task is restoring. And when such "restored" task get checkpointed
second time we don't dump new living vdso but save only old vdso
proxy on disk (detecting it is a different story, in short we
inject a unique mark into elf header).
>
> I suspect that you'll need kernel changes for compat tasks, since I
> think that mremapping the vdso on any reasonably modern hardware in a
> 32-bit task will cause sigreturn to blow up. This could be fixed by
> making mremap magical, although adding a new prctl or arch_prctl to
> reliably move the vdso might be a better bet.
Well, as far as I understand compat code uses abs addressing for
vvar data and if vvar data position doesn't change we're safe,
but same time because vvar addresses are not abi I fear one day
we indeed hit the problems and the only solution would be
to use kernel's help. But again, Andy, I didn't think much
about implementing compat mode in criu yet so i might be
missing some details.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 17:47 ` Cyrill Gorcunov
@ 2014-05-20 17:52 ` Andy Lutomirski
-1 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 17:52 UTC (permalink / raw)
To: Cyrill Gorcunov
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:47 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Tue, May 20, 2014 at 10:24:49AM -0700, Andy Lutomirski wrote:
>> On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
>> > On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
>> >> Using arch_vma_name to give special mappings a name is awkward. x86
>> >> currently implements it by comparing the start address of the vma to
>> >> the expected address of the vdso. This requires tracking the start
>> >> address of special mappings and is probably buggy if a special vma
>> >> is split or moved.
>> >>
>> >> Improve _install_special_mapping to just name the vma directly. Use
>> >> it to give the x86 vvar area a name, which should make CRIU's life
>> >> easier.
>> >>
>> >> As a side effect, the vvar area will show up in core dumps. This
>> >> could be considered weird and is fixable. Thoughts?
>> >>
>> >> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
>> >> Cc: Pavel Emelyanov <xemul@parallels.com>
>> >> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>> >
>> > Hi Andy, thanks a lot for this! I must confess I don't yet know how
>> > would we deal with compat tasks but this is 'must have' mark which
>> > allow us to detect vvar area!
>>
>> Out of curiosity, how does CRIU currently handle checkpointing a
>> restored task? In current kernels, the "[vdso]" name in maps goes
>> away after mremapping the vdso.
>
> We use not only [vdso] mark to detect vdso area but also page frame
> number of the living vdso. If mark is not present in procfs output
> we examinate executable areas and check if pfn == vdso_pfn, it's
> a slow path because there migh be a bunch of executable areas and
> touching every of it is not that fast thing, but we simply have no
> choise.
This patch should fix this issue, at least. If there's still a way to
get a native vdso that doesn't say "[vdso]", please let me know/
>
> The situation get worse when task was dumped on one kernel and
> then restored on another kernel where vdso content is different
> from one save in image -- is such case as I mentioned we need
> that named vdso proxy which redirect calls to vdso of the system
> where task is restoring. And when such "restored" task get checkpointed
> second time we don't dump new living vdso but save only old vdso
> proxy on disk (detecting it is a different story, in short we
> inject a unique mark into elf header).
Yuck. But I don't know whether the kernel can help much here.
>
>>
>> I suspect that you'll need kernel changes for compat tasks, since I
>> think that mremapping the vdso on any reasonably modern hardware in a
>> 32-bit task will cause sigreturn to blow up. This could be fixed by
>> making mremap magical, although adding a new prctl or arch_prctl to
>> reliably move the vdso might be a better bet.
>
> Well, as far as I understand compat code uses abs addressing for
> vvar data and if vvar data position doesn't change we're safe,
> but same time because vvar addresses are not abi I fear one day
> we indeed hit the problems and the only solution would be
> to use kernel's help. But again, Andy, I didn't think much
> about implementing compat mode in criu yet so i might be
> missing some details.
Prior to 3.15, the compat code didn't have vvar data at all. In 3.15
and up, the vvar data is accessed using PC-relative addressing, even
in compat mode (using the usual call; mov trick to read EIP).
--Andy
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 17:52 ` Andy Lutomirski
0 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 17:52 UTC (permalink / raw)
To: Cyrill Gorcunov
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:47 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Tue, May 20, 2014 at 10:24:49AM -0700, Andy Lutomirski wrote:
>> On Tue, May 20, 2014 at 10:21 AM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
>> > On Mon, May 19, 2014 at 03:58:33PM -0700, Andy Lutomirski wrote:
>> >> Using arch_vma_name to give special mappings a name is awkward. x86
>> >> currently implements it by comparing the start address of the vma to
>> >> the expected address of the vdso. This requires tracking the start
>> >> address of special mappings and is probably buggy if a special vma
>> >> is split or moved.
>> >>
>> >> Improve _install_special_mapping to just name the vma directly. Use
>> >> it to give the x86 vvar area a name, which should make CRIU's life
>> >> easier.
>> >>
>> >> As a side effect, the vvar area will show up in core dumps. This
>> >> could be considered weird and is fixable. Thoughts?
>> >>
>> >> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
>> >> Cc: Pavel Emelyanov <xemul@parallels.com>
>> >> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>> >
>> > Hi Andy, thanks a lot for this! I must confess I don't yet know how
>> > would we deal with compat tasks but this is 'must have' mark which
>> > allow us to detect vvar area!
>>
>> Out of curiosity, how does CRIU currently handle checkpointing a
>> restored task? In current kernels, the "[vdso]" name in maps goes
>> away after mremapping the vdso.
>
> We use not only [vdso] mark to detect vdso area but also page frame
> number of the living vdso. If mark is not present in procfs output
> we examinate executable areas and check if pfn == vdso_pfn, it's
> a slow path because there migh be a bunch of executable areas and
> touching every of it is not that fast thing, but we simply have no
> choise.
This patch should fix this issue, at least. If there's still a way to
get a native vdso that doesn't say "[vdso]", please let me know/
>
> The situation get worse when task was dumped on one kernel and
> then restored on another kernel where vdso content is different
> from one save in image -- is such case as I mentioned we need
> that named vdso proxy which redirect calls to vdso of the system
> where task is restoring. And when such "restored" task get checkpointed
> second time we don't dump new living vdso but save only old vdso
> proxy on disk (detecting it is a different story, in short we
> inject a unique mark into elf header).
Yuck. But I don't know whether the kernel can help much here.
>
>>
>> I suspect that you'll need kernel changes for compat tasks, since I
>> think that mremapping the vdso on any reasonably modern hardware in a
>> 32-bit task will cause sigreturn to blow up. This could be fixed by
>> making mremap magical, although adding a new prctl or arch_prctl to
>> reliably move the vdso might be a better bet.
>
> Well, as far as I understand compat code uses abs addressing for
> vvar data and if vvar data position doesn't change we're safe,
> but same time because vvar addresses are not abi I fear one day
> we indeed hit the problems and the only solution would be
> to use kernel's help. But again, Andy, I didn't think much
> about implementing compat mode in criu yet so i might be
> missing some details.
Prior to 3.15, the compat code didn't have vvar data at all. In 3.15
and up, the vvar data is accessed using PC-relative addressing, even
in compat mode (using the usual call; mov trick to read EIP).
--Andy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 17:52 ` Andy Lutomirski
@ 2014-05-20 18:01 ` Cyrill Gorcunov
-1 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 18:01 UTC (permalink / raw)
To: Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:52:51AM -0700, Andy Lutomirski wrote:
> >
> > We use not only [vdso] mark to detect vdso area but also page frame
> > number of the living vdso. If mark is not present in procfs output
> > we examinate executable areas and check if pfn == vdso_pfn, it's
> > a slow path because there migh be a bunch of executable areas and
> > touching every of it is not that fast thing, but we simply have no
> > choise.
>
> This patch should fix this issue, at least. If there's still a way to
> get a native vdso that doesn't say "[vdso]", please let me know/
Yes, having a native procfs way to detect vdso is much preferred!
> > The situation get worse when task was dumped on one kernel and
> > then restored on another kernel where vdso content is different
> > from one save in image -- is such case as I mentioned we need
> > that named vdso proxy which redirect calls to vdso of the system
> > where task is restoring. And when such "restored" task get checkpointed
> > second time we don't dump new living vdso but save only old vdso
> > proxy on disk (detecting it is a different story, in short we
> > inject a unique mark into elf header).
>
> Yuck. But I don't know whether the kernel can help much here.
Some prctl which would tell kernel to put vdso at specifed address.
We can live without it for now so not a big deal (yet ;)
> >> I suspect that you'll need kernel changes for compat tasks, since I
> >> think that mremapping the vdso on any reasonably modern hardware in a
> >> 32-bit task will cause sigreturn to blow up. This could be fixed by
> >> making mremap magical, although adding a new prctl or arch_prctl to
> >> reliably move the vdso might be a better bet.
> >
> > Well, as far as I understand compat code uses abs addressing for
> > vvar data and if vvar data position doesn't change we're safe,
> > but same time because vvar addresses are not abi I fear one day
> > we indeed hit the problems and the only solution would be
> > to use kernel's help. But again, Andy, I didn't think much
> > about implementing compat mode in criu yet so i might be
> > missing some details.
>
> Prior to 3.15, the compat code didn't have vvar data at all. In 3.15
> and up, the vvar data is accessed using PC-relative addressing, even
> in compat mode (using the usual call; mov trick to read EIP).
i see. I'll ping you for help once I start implementing compat mode ;)
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:01 ` Cyrill Gorcunov
0 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 18:01 UTC (permalink / raw)
To: Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov, H. Peter Anvin
On Tue, May 20, 2014 at 10:52:51AM -0700, Andy Lutomirski wrote:
> >
> > We use not only [vdso] mark to detect vdso area but also page frame
> > number of the living vdso. If mark is not present in procfs output
> > we examinate executable areas and check if pfn == vdso_pfn, it's
> > a slow path because there migh be a bunch of executable areas and
> > touching every of it is not that fast thing, but we simply have no
> > choise.
>
> This patch should fix this issue, at least. If there's still a way to
> get a native vdso that doesn't say "[vdso]", please let me know/
Yes, having a native procfs way to detect vdso is much preferred!
> > The situation get worse when task was dumped on one kernel and
> > then restored on another kernel where vdso content is different
> > from one save in image -- is such case as I mentioned we need
> > that named vdso proxy which redirect calls to vdso of the system
> > where task is restoring. And when such "restored" task get checkpointed
> > second time we don't dump new living vdso but save only old vdso
> > proxy on disk (detecting it is a different story, in short we
> > inject a unique mark into elf header).
>
> Yuck. But I don't know whether the kernel can help much here.
Some prctl which would tell kernel to put vdso at specifed address.
We can live without it for now so not a big deal (yet ;)
> >> I suspect that you'll need kernel changes for compat tasks, since I
> >> think that mremapping the vdso on any reasonably modern hardware in a
> >> 32-bit task will cause sigreturn to blow up. This could be fixed by
> >> making mremap magical, although adding a new prctl or arch_prctl to
> >> reliably move the vdso might be a better bet.
> >
> > Well, as far as I understand compat code uses abs addressing for
> > vvar data and if vvar data position doesn't change we're safe,
> > but same time because vvar addresses are not abi I fear one day
> > we indeed hit the problems and the only solution would be
> > to use kernel's help. But again, Andy, I didn't think much
> > about implementing compat mode in criu yet so i might be
> > missing some details.
>
> Prior to 3.15, the compat code didn't have vvar data at all. In 3.15
> and up, the vvar data is accessed using PC-relative addressing, even
> in compat mode (using the usual call; mov trick to read EIP).
i see. I'll ping you for help once I start implementing compat mode ;)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 18:01 ` Cyrill Gorcunov
@ 2014-05-20 18:18 ` H. Peter Anvin
-1 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:18 UTC (permalink / raw)
To: Cyrill Gorcunov, Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov
On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>
>> This patch should fix this issue, at least. If there's still a way to
>> get a native vdso that doesn't say "[vdso]", please let me know/
>
> Yes, having a native procfs way to detect vdso is much preferred!
>
Is there any path by which we can end up with [vdso] without a leading
slash in /proc/self/maps? Otherwise, why is that not "native"?
>>> The situation get worse when task was dumped on one kernel and
>>> then restored on another kernel where vdso content is different
>>> from one save in image -- is such case as I mentioned we need
>>> that named vdso proxy which redirect calls to vdso of the system
>>> where task is restoring. And when such "restored" task get checkpointed
>>> second time we don't dump new living vdso but save only old vdso
>>> proxy on disk (detecting it is a different story, in short we
>>> inject a unique mark into elf header).
>>
>> Yuck. But I don't know whether the kernel can help much here.
>
> Some prctl which would tell kernel to put vdso at specifed address.
> We can live without it for now so not a big deal (yet ;)
mremap() will do this for you.
-hpa
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:18 ` H. Peter Anvin
0 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:18 UTC (permalink / raw)
To: Cyrill Gorcunov, Andy Lutomirski
Cc: X86 ML, Andrew Morton, Sasha Levin, linux-mm, Dave Jones, LKML,
Pavel Emelyanov
On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>
>> This patch should fix this issue, at least. If there's still a way to
>> get a native vdso that doesn't say "[vdso]", please let me know/
>
> Yes, having a native procfs way to detect vdso is much preferred!
>
Is there any path by which we can end up with [vdso] without a leading
slash in /proc/self/maps? Otherwise, why is that not "native"?
>>> The situation get worse when task was dumped on one kernel and
>>> then restored on another kernel where vdso content is different
>>> from one save in image -- is such case as I mentioned we need
>>> that named vdso proxy which redirect calls to vdso of the system
>>> where task is restoring. And when such "restored" task get checkpointed
>>> second time we don't dump new living vdso but save only old vdso
>>> proxy on disk (detecting it is a different story, in short we
>>> inject a unique mark into elf header).
>>
>> Yuck. But I don't know whether the kernel can help much here.
>
> Some prctl which would tell kernel to put vdso at specifed address.
> We can live without it for now so not a big deal (yet ;)
mremap() will do this for you.
-hpa
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 18:18 ` H. Peter Anvin
@ 2014-05-20 18:24 ` Andy Lutomirski
-1 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 18:24 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>
>>> This patch should fix this issue, at least. If there's still a way to
>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>
>> Yes, having a native procfs way to detect vdso is much preferred!
>>
>
> Is there any path by which we can end up with [vdso] without a leading
> slash in /proc/self/maps? Otherwise, why is that not "native"?
Dunno. But before this patch the reverse was possible: we can end up
with a vdso that doesn't say [vdso].
>
>>>> The situation get worse when task was dumped on one kernel and
>>>> then restored on another kernel where vdso content is different
>>>> from one save in image -- is such case as I mentioned we need
>>>> that named vdso proxy which redirect calls to vdso of the system
>>>> where task is restoring. And when such "restored" task get checkpointed
>>>> second time we don't dump new living vdso but save only old vdso
>>>> proxy on disk (detecting it is a different story, in short we
>>>> inject a unique mark into elf header).
>>>
>>> Yuck. But I don't know whether the kernel can help much here.
>>
>> Some prctl which would tell kernel to put vdso at specifed address.
>> We can live without it for now so not a big deal (yet ;)
>
> mremap() will do this for you.
Except that it's buggy: it doesn't change mm->context.vdso. For
64-bit tasks, the only consumer outside exec was arch_vma_name, and
this patch removes even that. For 32-bit tasks, though, it's needed
for signal delivery.
--Andy
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:24 ` Andy Lutomirski
0 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 18:24 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>
>>> This patch should fix this issue, at least. If there's still a way to
>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>
>> Yes, having a native procfs way to detect vdso is much preferred!
>>
>
> Is there any path by which we can end up with [vdso] without a leading
> slash in /proc/self/maps? Otherwise, why is that not "native"?
Dunno. But before this patch the reverse was possible: we can end up
with a vdso that doesn't say [vdso].
>
>>>> The situation get worse when task was dumped on one kernel and
>>>> then restored on another kernel where vdso content is different
>>>> from one save in image -- is such case as I mentioned we need
>>>> that named vdso proxy which redirect calls to vdso of the system
>>>> where task is restoring. And when such "restored" task get checkpointed
>>>> second time we don't dump new living vdso but save only old vdso
>>>> proxy on disk (detecting it is a different story, in short we
>>>> inject a unique mark into elf header).
>>>
>>> Yuck. But I don't know whether the kernel can help much here.
>>
>> Some prctl which would tell kernel to put vdso at specifed address.
>> We can live without it for now so not a big deal (yet ;)
>
> mremap() will do this for you.
Except that it's buggy: it doesn't change mm->context.vdso. For
64-bit tasks, the only consumer outside exec was arch_vma_name, and
this patch removes even that. For 32-bit tasks, though, it's needed
for signal delivery.
--Andy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 18:24 ` Andy Lutomirski
@ 2014-05-20 18:27 ` H. Peter Anvin
-1 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:27 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On 05/20/2014 11:24 AM, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>>
>>>> This patch should fix this issue, at least. If there's still a way to
>>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>>
>>> Yes, having a native procfs way to detect vdso is much preferred!
>>>
>>
>> Is there any path by which we can end up with [vdso] without a leading
>> slash in /proc/self/maps? Otherwise, why is that not "native"?
>
> Dunno. But before this patch the reverse was possible: we can end up
> with a vdso that doesn't say [vdso].
>
That's a bug, which is being fixed. We can't go back in time and create
new interfaces on old kernels.
>>
>>>>> The situation get worse when task was dumped on one kernel and
>>>>> then restored on another kernel where vdso content is different
>>>>> from one save in image -- is such case as I mentioned we need
>>>>> that named vdso proxy which redirect calls to vdso of the system
>>>>> where task is restoring. And when such "restored" task get checkpointed
>>>>> second time we don't dump new living vdso but save only old vdso
>>>>> proxy on disk (detecting it is a different story, in short we
>>>>> inject a unique mark into elf header).
>>>>
>>>> Yuck. But I don't know whether the kernel can help much here.
>>>
>>> Some prctl which would tell kernel to put vdso at specifed address.
>>> We can live without it for now so not a big deal (yet ;)
>>
>> mremap() will do this for you.
>
> Except that it's buggy: it doesn't change mm->context.vdso. For
> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
> this patch removes even that. For 32-bit tasks, though, it's needed
> for signal delivery.
>
Again, a bug, let's fix it rather than saying we need a new interface.
-hpa
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:27 ` H. Peter Anvin
0 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:27 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On 05/20/2014 11:24 AM, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>>
>>>> This patch should fix this issue, at least. If there's still a way to
>>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>>
>>> Yes, having a native procfs way to detect vdso is much preferred!
>>>
>>
>> Is there any path by which we can end up with [vdso] without a leading
>> slash in /proc/self/maps? Otherwise, why is that not "native"?
>
> Dunno. But before this patch the reverse was possible: we can end up
> with a vdso that doesn't say [vdso].
>
That's a bug, which is being fixed. We can't go back in time and create
new interfaces on old kernels.
>>
>>>>> The situation get worse when task was dumped on one kernel and
>>>>> then restored on another kernel where vdso content is different
>>>>> from one save in image -- is such case as I mentioned we need
>>>>> that named vdso proxy which redirect calls to vdso of the system
>>>>> where task is restoring. And when such "restored" task get checkpointed
>>>>> second time we don't dump new living vdso but save only old vdso
>>>>> proxy on disk (detecting it is a different story, in short we
>>>>> inject a unique mark into elf header).
>>>>
>>>> Yuck. But I don't know whether the kernel can help much here.
>>>
>>> Some prctl which would tell kernel to put vdso at specifed address.
>>> We can live without it for now so not a big deal (yet ;)
>>
>> mremap() will do this for you.
>
> Except that it's buggy: it doesn't change mm->context.vdso. For
> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
> this patch removes even that. For 32-bit tasks, though, it's needed
> for signal delivery.
>
Again, a bug, let's fix it rather than saying we need a new interface.
-hpa
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 18:27 ` H. Peter Anvin
@ 2014-05-20 18:38 ` Andy Lutomirski
-1 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 18:38 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 05/20/2014 11:24 AM, Andy Lutomirski wrote:
>> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>>> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>>>
>>>>> This patch should fix this issue, at least. If there's still a way to
>>>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>>>
>>>> Yes, having a native procfs way to detect vdso is much preferred!
>>>>
>>>
>>> Is there any path by which we can end up with [vdso] without a leading
>>> slash in /proc/self/maps? Otherwise, why is that not "native"?
>>
>> Dunno. But before this patch the reverse was possible: we can end up
>> with a vdso that doesn't say [vdso].
>>
>
> That's a bug, which is being fixed. We can't go back in time and create
> new interfaces on old kernels.
>
>>>
>>>>>> The situation get worse when task was dumped on one kernel and
>>>>>> then restored on another kernel where vdso content is different
>>>>>> from one save in image -- is such case as I mentioned we need
>>>>>> that named vdso proxy which redirect calls to vdso of the system
>>>>>> where task is restoring. And when such "restored" task get checkpointed
>>>>>> second time we don't dump new living vdso but save only old vdso
>>>>>> proxy on disk (detecting it is a different story, in short we
>>>>>> inject a unique mark into elf header).
>>>>>
>>>>> Yuck. But I don't know whether the kernel can help much here.
>>>>
>>>> Some prctl which would tell kernel to put vdso at specifed address.
>>>> We can live without it for now so not a big deal (yet ;)
>>>
>>> mremap() will do this for you.
>>
>> Except that it's buggy: it doesn't change mm->context.vdso. For
>> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
>> this patch removes even that. For 32-bit tasks, though, it's needed
>> for signal delivery.
>>
>
> Again, a bug, let's fix it rather than saying we need a new interface.
What happens if someone remaps just part of the vdso?
Presumably we'd just track the position of the first page of the vdso,
but this might be hard to implement: I don't think there's any
callback from the core mm code for ths.
--Andy
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:38 ` Andy Lutomirski
0 siblings, 0 replies; 36+ messages in thread
From: Andy Lutomirski @ 2014-05-20 18:38 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Cyrill Gorcunov, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 05/20/2014 11:24 AM, Andy Lutomirski wrote:
>> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>>> On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
>>>>>
>>>>> This patch should fix this issue, at least. If there's still a way to
>>>>> get a native vdso that doesn't say "[vdso]", please let me know/
>>>>
>>>> Yes, having a native procfs way to detect vdso is much preferred!
>>>>
>>>
>>> Is there any path by which we can end up with [vdso] without a leading
>>> slash in /proc/self/maps? Otherwise, why is that not "native"?
>>
>> Dunno. But before this patch the reverse was possible: we can end up
>> with a vdso that doesn't say [vdso].
>>
>
> That's a bug, which is being fixed. We can't go back in time and create
> new interfaces on old kernels.
>
>>>
>>>>>> The situation get worse when task was dumped on one kernel and
>>>>>> then restored on another kernel where vdso content is different
>>>>>> from one save in image -- is such case as I mentioned we need
>>>>>> that named vdso proxy which redirect calls to vdso of the system
>>>>>> where task is restoring. And when such "restored" task get checkpointed
>>>>>> second time we don't dump new living vdso but save only old vdso
>>>>>> proxy on disk (detecting it is a different story, in short we
>>>>>> inject a unique mark into elf header).
>>>>>
>>>>> Yuck. But I don't know whether the kernel can help much here.
>>>>
>>>> Some prctl which would tell kernel to put vdso at specifed address.
>>>> We can live without it for now so not a big deal (yet ;)
>>>
>>> mremap() will do this for you.
>>
>> Except that it's buggy: it doesn't change mm->context.vdso. For
>> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
>> this patch removes even that. For 32-bit tasks, though, it's needed
>> for signal delivery.
>>
>
> Again, a bug, let's fix it rather than saying we need a new interface.
What happens if someone remaps just part of the vdso?
Presumably we'd just track the position of the first page of the vdso,
but this might be hard to implement: I don't think there's any
callback from the core mm code for ths.
--Andy
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-20 18:24 ` Andy Lutomirski
@ 2014-05-20 18:39 ` Cyrill Gorcunov
-1 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 18:39 UTC (permalink / raw)
To: Andy Lutomirski
Cc: H. Peter Anvin, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:24:56AM -0700, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> > On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
> >>>
> >>> This patch should fix this issue, at least. If there's still a way to
> >>> get a native vdso that doesn't say "[vdso]", please let me know/
> >>
> >> Yes, having a native procfs way to detect vdso is much preferred!
> >>
> >
> > Is there any path by which we can end up with [vdso] without a leading
> > slash in /proc/self/maps? Otherwise, why is that not "native"?
>
> Dunno. But before this patch the reverse was possible: we can end up
> with a vdso that doesn't say [vdso].
I fear I don't understand the phrase "leading slash in /proc/self/maps".
Peter could you rephrase please?
> >>>> The situation get worse when task was dumped on one kernel and
> >>>> then restored on another kernel where vdso content is different
> >>>> from one save in image -- is such case as I mentioned we need
> >>>> that named vdso proxy which redirect calls to vdso of the system
> >>>> where task is restoring. And when such "restored" task get checkpointed
> >>>> second time we don't dump new living vdso but save only old vdso
> >>>> proxy on disk (detecting it is a different story, in short we
> >>>> inject a unique mark into elf header).
> >>>
> >>> Yuck. But I don't know whether the kernel can help much here.
> >>
> >> Some prctl which would tell kernel to put vdso at specifed address.
> >> We can live without it for now so not a big deal (yet ;)
> >
> > mremap() will do this for you.
>
> Except that it's buggy: it doesn't change mm->context.vdso. For
> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
> this patch removes even that. For 32-bit tasks, though, it's needed
> for signal delivery.
yes, fwiw we can deal with it currently but i'm not sure yet about
compat case simply because didn't look presicely.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:39 ` Cyrill Gorcunov
0 siblings, 0 replies; 36+ messages in thread
From: Cyrill Gorcunov @ 2014-05-20 18:39 UTC (permalink / raw)
To: Andy Lutomirski
Cc: H. Peter Anvin, X86 ML, Andrew Morton, Sasha Levin, linux-mm,
Dave Jones, LKML, Pavel Emelyanov
On Tue, May 20, 2014 at 11:24:56AM -0700, Andy Lutomirski wrote:
> On Tue, May 20, 2014 at 11:18 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> > On 05/20/2014 11:01 AM, Cyrill Gorcunov wrote:
> >>>
> >>> This patch should fix this issue, at least. If there's still a way to
> >>> get a native vdso that doesn't say "[vdso]", please let me know/
> >>
> >> Yes, having a native procfs way to detect vdso is much preferred!
> >>
> >
> > Is there any path by which we can end up with [vdso] without a leading
> > slash in /proc/self/maps? Otherwise, why is that not "native"?
>
> Dunno. But before this patch the reverse was possible: we can end up
> with a vdso that doesn't say [vdso].
I fear I don't understand the phrase "leading slash in /proc/self/maps".
Peter could you rephrase please?
> >>>> The situation get worse when task was dumped on one kernel and
> >>>> then restored on another kernel where vdso content is different
> >>>> from one save in image -- is such case as I mentioned we need
> >>>> that named vdso proxy which redirect calls to vdso of the system
> >>>> where task is restoring. And when such "restored" task get checkpointed
> >>>> second time we don't dump new living vdso but save only old vdso
> >>>> proxy on disk (detecting it is a different story, in short we
> >>>> inject a unique mark into elf header).
> >>>
> >>> Yuck. But I don't know whether the kernel can help much here.
> >>
> >> Some prctl which would tell kernel to put vdso at specifed address.
> >> We can live without it for now so not a big deal (yet ;)
> >
> > mremap() will do this for you.
>
> Except that it's buggy: it doesn't change mm->context.vdso. For
> 64-bit tasks, the only consumer outside exec was arch_vma_name, and
> this patch removes even that. For 32-bit tasks, though, it's needed
> for signal delivery.
yes, fwiw we can deal with it currently but i'm not sure yet about
compat case simply because didn't look presicely.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-19 22:58 ` Andy Lutomirski
@ 2014-05-20 18:37 ` H. Peter Anvin
-1 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:37 UTC (permalink / raw)
To: Andy Lutomirski, x86, Andrew Morton, Sasha Levin, linux-mm, Dave Jones
Cc: LKML, Cyrill Gorcunov, Pavel Emelyanov, Cyrill Gorcunov
On 05/19/2014 03:58 PM, Andy Lutomirski wrote:
>
> As a side effect, the vvar area will show up in core dumps. This
> could be considered weird and is fixable. Thoughts?
>
On this issue... I don't know if this is likely to break anything. My
suggestion is that we accept it as-is but be prepared to deal with it if
it breaks something.
-hpa
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] x86,mm: Improve _install_special_mapping and fix x86 vdso naming
@ 2014-05-20 18:37 ` H. Peter Anvin
0 siblings, 0 replies; 36+ messages in thread
From: H. Peter Anvin @ 2014-05-20 18:37 UTC (permalink / raw)
To: Andy Lutomirski, x86, Andrew Morton, Sasha Levin, linux-mm, Dave Jones
Cc: LKML, Cyrill Gorcunov, Pavel Emelyanov, Cyrill Gorcunov
On 05/19/2014 03:58 PM, Andy Lutomirski wrote:
>
> As a side effect, the vvar area will show up in core dumps. This
> could be considered weird and is fixable. Thoughts?
>
On this issue... I don't know if this is likely to break anything. My
suggestion is that we accept it as-is but be prepared to deal with it if
it breaks something.
-hpa
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [tip:x86/vdso] x86, mm: Improve _install_special_mapping and fix x86 vdso naming
2014-05-19 22:58 ` Andy Lutomirski
` (2 preceding siblings ...)
(?)
@ 2014-05-21 23:21 ` tip-bot for Andy Lutomirski
-1 siblings, 0 replies; 36+ messages in thread
From: tip-bot for Andy Lutomirski @ 2014-05-21 23:21 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, gorcunov, xemul, luto, tglx, hpa
Commit-ID: a62c34bd2a8a3f159945becd57401e478818d51c
Gitweb: http://git.kernel.org/tip/a62c34bd2a8a3f159945becd57401e478818d51c
Author: Andy Lutomirski <luto@amacapital.net>
AuthorDate: Mon, 19 May 2014 15:58:33 -0700
Committer: H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Tue, 20 May 2014 11:38:42 -0700
x86, mm: Improve _install_special_mapping and fix x86 vdso naming
Using arch_vma_name to give special mappings a name is awkward. x86
currently implements it by comparing the start address of the vma to
the expected address of the vdso. This requires tracking the start
address of special mappings and is probably buggy if a special vma
is split or moved.
Improve _install_special_mapping to just name the vma directly. Use
it to give the x86 vvar area a name, which should make CRIU's life
easier.
As a side effect, the vvar area will show up in core dumps. This
could be considered weird and is fixable.
[hpa: I say we accept this as-is but be prepared to deal with knocking
out the vvars from core dumps if this becomes a problem.]
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/276b39b6b645fb11e345457b503f17b83c2c6fd0.1400538962.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/include/asm/vdso.h | 6 ++-
arch/x86/mm/init_64.c | 3 --
arch/x86/vdso/vdso2c.h | 5 ++-
arch/x86/vdso/vdso32-setup.c | 7 ----
arch/x86/vdso/vma.c | 25 ++++++++-----
include/linux/mm.h | 4 +-
include/linux/mm_types.h | 6 +++
mm/mmap.c | 89 +++++++++++++++++++++++++++++---------------
8 files changed, 94 insertions(+), 51 deletions(-)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d0a2c90..30be253 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -7,10 +7,14 @@
#ifndef __ASSEMBLER__
+#include <linux/mm_types.h>
+
struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
- struct page **pages; /* Big enough for data/size page pointers */
+
+ /* text_mapping.pages is big enough for data/size page pointers */
+ struct vm_special_mapping text_mapping;
unsigned long alt, alt_len;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6f88184..9deb59b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1223,9 +1223,6 @@ int in_gate_area_no_mm(unsigned long addr)
const char *arch_vma_name(struct vm_area_struct *vma)
{
- if (vma->vm_mm && vma->vm_start ==
- (long __force)vma->vm_mm->context.vdso)
- return "[vdso]";
if (vma == &gate_vma)
return "[vsyscall]";
return NULL;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index ed2e894..3dcc61e 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -136,7 +136,10 @@ static int GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", data_size);
- fprintf(outfile, "\t.pages = pages,\n");
+ fprintf(outfile, "\t.text_mapping = {\n");
+ fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
+ fprintf(outfile, "\t\t.pages = pages,\n");
+ fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)alt_sec->sh_offset);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index c3ed708..e4f7781 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -119,13 +119,6 @@ __initcall(ia32_binfmt_init);
#else /* CONFIG_X86_32 */
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
- return "[vdso]";
- return NULL;
-}
-
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 8ad0081..e1513c4 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -30,7 +30,8 @@ void __init init_vdso_image(const struct vdso_image *image)
BUG_ON(image->size % PAGE_SIZE != 0);
for (i = 0; i < npages; i++)
- image->pages[i] = virt_to_page(image->data + i*PAGE_SIZE);
+ image->text_mapping.pages[i] =
+ virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@@ -91,6 +92,10 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
unsigned long addr;
int ret = 0;
static struct page *no_pages[] = {NULL};
+ static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ .pages = no_pages,
+ };
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -112,21 +117,23 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
- ret = install_special_mapping(mm,
- addr,
- image->size,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- image->pages);
+ vma = _install_special_mapping(mm,
+ addr,
+ image->size,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &image->text_mapping);
- if (ret)
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
goto up_fail;
+ }
vma = _install_special_mapping(mm,
addr + image->size,
image->sym_end_mapping - image->size,
VM_READ,
- no_pages);
+ &vvar_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63f8d4e..05aab09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1782,7 +1782,9 @@ extern struct file *get_mm_exe_file(struct mm_struct *mm);
extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
- unsigned long flags, struct page **pages);
+ unsigned long flags,
+ const struct vm_special_mapping *spec);
+/* This is an obsolete alternative to _install_special_mapping. */
extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20..22c6f4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -510,4 +510,10 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
}
#endif
+struct vm_special_mapping
+{
+ const char *name;
+ struct page **pages;
+};
+
#endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index b1202cf..52bbc95 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2872,6 +2872,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
return 1;
}
+static int special_mapping_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+ return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+ .name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+};
static int special_mapping_fault(struct vm_area_struct *vma,
struct vm_fault *vmf)
@@ -2887,7 +2912,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
*/
pgoff = vmf->pgoff - vma->vm_pgoff;
- for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+ if (vma->vm_ops == &legacy_special_mapping_vmops)
+ pages = vma->vm_private_data;
+ else
+ pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+ pages;
+
+ for (; pgoff && *pages; ++pages)
pgoff--;
if (*pages) {
@@ -2900,30 +2931,11 @@ static int special_mapping_fault(struct vm_area_struct *vma,
return VM_FAULT_SIGBUS;
}
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
- .close = special_mapping_close,
- .fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
- unsigned long addr, unsigned long len,
- unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_operations_struct *ops,
+ void *priv)
{
int ret;
struct vm_area_struct *vma;
@@ -2940,8 +2952,8 @@ struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- vma->vm_ops = &special_mapping_vmops;
- vma->vm_private_data = pages;
+ vma->vm_ops = ops;
+ vma->vm_private_data = priv;
ret = insert_vm_struct(mm, vma);
if (ret)
@@ -2958,12 +2970,31 @@ out:
return ERR_PTR(ret);
}
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+ return __install_special_mapping(mm, addr, len, vm_flags,
+ &special_mapping_vmops, (void *)spec);
+}
+
int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
{
- struct vm_area_struct *vma = _install_special_mapping(mm,
- addr, len, vm_flags, pages);
+ struct vm_area_struct *vma = __install_special_mapping(
+ mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
+ (void *)pages);
if (IS_ERR(vma))
return PTR_ERR(vma);
^ permalink raw reply related [flat|nested] 36+ messages in thread