linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy
@ 2022-11-29 19:18 Jann Horn
  2022-11-29 19:18 ` [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm() Jann Horn
  2022-11-30 10:15 ` [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
  0 siblings, 2 replies; 11+ messages in thread
From: Jann Horn @ 2022-11-29 19:18 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andrei Vagin, linux-kernel

find_timens_vvar_page() is not arch-specific, as can be seen from how all
five per-architecture versions of it are the same.
(arm64, powerpc and riscv are exactly the same; x86 and s390 have two
characters difference inside a comment, less blank lines, and mark the
!CONFIG_TIME_NS version as inline.)

Refactor the five copies into a central copy in kernel/time/namespace.c.

Marked for stable backporting because it is a prerequisite for the
following patch.

Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
---
 arch/arm64/kernel/vdso.c       | 22 ----------------------
 arch/powerpc/kernel/vdso.c     | 22 ----------------------
 arch/riscv/kernel/vdso.c       | 22 ----------------------
 arch/s390/kernel/vdso.c        | 20 --------------------
 arch/x86/entry/vdso/vma.c      | 23 -----------------------
 include/linux/time_namespace.h |  6 ++++++
 kernel/time/namespace.c        | 20 ++++++++++++++++++++
 7 files changed, 26 insertions(+), 109 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 99ae81ab91a74..e59a32aa0c49d 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -151,28 +151,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 	mmap_read_unlock(mm);
 	return 0;
 }
-
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops.
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-	WARN(1, "vvar_page accessed remotely");
-
-	return NULL;
-}
-#else
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
 #endif
 
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 4abc019497020..507f8228f983b 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -129,28 +129,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 
 	return 0;
 }
-
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops.
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-	WARN(1, "vvar_page accessed remotely");
-
-	return NULL;
-}
-#else
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
 #endif
 
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 123d05255fcfa..e410275918ac4 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -137,28 +137,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 	mmap_read_unlock(mm);
 	return 0;
 }
-
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops.
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-	WARN(1, "vvar_page accessed remotely");
-
-	return NULL;
-}
-#else
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
 #endif
 
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 3105ca5bd4701..d6df7169c01f2 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -44,21 +44,6 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page)
 	return (struct vdso_data *)(vvar_page);
 }
 
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops().
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-	WARN(1, "vvar_page accessed remotely");
-	return NULL;
-}
-
 /*
  * The VVAR page layout depends on whether a task belongs to the root or
  * non-root time namespace. Whenever a task changes its namespace, the VVAR
@@ -84,11 +69,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 	mmap_read_unlock(mm);
 	return 0;
 }
-#else
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
 #endif
 
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 311eae30e0894..6b36485054e8a 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -98,24 +98,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 }
 
 #ifdef CONFIG_TIME_NS
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops().
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-
-	WARN(1, "vvar_page accessed remotely");
-
-	return NULL;
-}
-
 /*
  * The vvar page layout depends on whether a task belongs to the root or
  * non-root time namespace. Whenever a task changes its namespace, the VVAR
@@ -140,11 +122,6 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 
 	return 0;
 }
-#else
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
 #endif
 
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 3146f1c056c98..bb9d3f5542f8e 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -45,6 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags,
 void free_time_ns(struct time_namespace *ns);
 void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
 struct vdso_data *arch_get_vdso_data(void *vvar_page);
+struct page *find_timens_vvar_page(struct vm_area_struct *vma);
 
 static inline void put_time_ns(struct time_namespace *ns)
 {
@@ -141,6 +142,11 @@ static inline void timens_on_fork(struct nsproxy *nsproxy,
 	return;
 }
 
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+
 static inline void timens_add_monotonic(struct timespec64 *ts) { }
 static inline void timens_add_boottime(struct timespec64 *ts) { }
 
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index aec832801c26c..761c0ada5142a 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -192,6 +192,26 @@ static void timens_setup_vdso_data(struct vdso_data *vdata,
 	offset[CLOCK_BOOTTIME_ALARM]	= boottime;
 }
 
+struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops().
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+
+
+
 /*
  * Protects possibly multiple offsets writers racing each other
  * and tasks entering the namespace.

base-commit: ca57f02295f188d6c65ec02202402979880fa6d8
-- 
2.38.1.584.g0f3c55d4c2-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 19:18 [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
@ 2022-11-29 19:18 ` Jann Horn
  2022-11-29 21:18   ` Thomas Gleixner
  2022-11-30 10:15 ` [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
  1 sibling, 1 reply; 11+ messages in thread
From: Jann Horn @ 2022-11-29 19:18 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andrei Vagin, linux-kernel

find_timens_vvar_page() doesn't work when current's timens does not match
the timens associated with current->mm.
v6 of the series adding this code [1] had some complicated code to deal
with this case, but v7 [2] removed that.

Since the vvar region is designed to only be accessed by vDSO code, and
vDSO code can't run in kthread context, it should be fine to error out in
this case.

Backporting note: This commit depends on the preceding refactoring patch.

[1] https://lore.kernel.org/lkml/20190815163836.2927-24-dima@arista.com/
[2] https://lore.kernel.org/lkml/20191011012341.846266-24-dima@arista.com/

Fixes: ee3cda8e4606 ("arm64/vdso: Handle faults on timens page")
Fixes: 74205b3fc2ef ("powerpc/vdso: Add support for time namespaces")
Fixes: dffe11e280a4 ("riscv/vdso: Add support for time namespaces")
Fixes: eeab78b05d20 ("s390/vdso: implement generic vdso time namespace support")
Fixes: af34ebeb866f ("x86/vdso: Handle faults on timens page")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
---
 kernel/time/namespace.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 761c0ada5142a..7315d0aeb1d21 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -194,6 +194,17 @@ static void timens_setup_vdso_data(struct vdso_data *vdata,
 
 struct page *find_timens_vvar_page(struct vm_area_struct *vma)
 {
+	/*
+	 * We can't handle faults where current's timens does not match the
+	 * timens associated with the mm_struct. This can happen if a page fault
+	 * occurs in a kthread that is using kthread_use_mm().
+	 */
+	if (current->flags & PF_KTHREAD) {
+		pr_warn("%s: kthread %s/%d tried to fault in timens page\n",
+			__func__, current->comm, current->pid);
+		return NULL;
+	}
+
 	if (likely(vma->vm_mm == current->mm))
 		return current->nsproxy->time_ns->vvar_page;
 
-- 
2.38.1.584.g0f3c55d4c2-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 19:18 ` [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm() Jann Horn
@ 2022-11-29 21:18   ` Thomas Gleixner
  2022-11-29 22:28     ` Jann Horn
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2022-11-29 21:18 UTC (permalink / raw)
  To: Jann Horn; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29 2022 at 20:18, Jann Horn wrote:

> find_timens_vvar_page() doesn't work when current's timens does not match
> the timens associated with current->mm.
> v6 of the series adding this code [1] had some complicated code to deal
> with this case, but v7 [2] removed that.
>
> Since the vvar region is designed to only be accessed by vDSO code, and
> vDSO code can't run in kthread context, it should be fine to error out in
> this case.

Should? Either it is correct or not.

But the way more interesting question is:
  
>  struct page *find_timens_vvar_page(struct vm_area_struct *vma)
>  {
> +	/*
> +	 * We can't handle faults where current's timens does not match the
> +	 * timens associated with the mm_struct. This can happen if a page fault
> +	 * occurs in a kthread that is using kthread_use_mm().
> +	 */

How does a kthread, which obvioulsy did kthread_use_mm(), end up trying to
fault in the time namespace vvar page?

It's probably something nasty, but the changelog has a big information
void.

It neither answers the obvious question why this is a problem of the
time namespace vvar page and not a general issue versus a kthread, which
borrowed a user mm, ending up in vdso_fault() in the first place?

None of those VDSO (user space) addresses are subject to be faulted in
by anything else than the associated user space task(s).

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 21:18   ` Thomas Gleixner
@ 2022-11-29 22:28     ` Jann Horn
  2022-11-29 22:34       ` Jann Horn
  2022-11-30  0:07       ` Thomas Gleixner
  0 siblings, 2 replies; 11+ messages in thread
From: Jann Horn @ 2022-11-29 22:28 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29, 2022 at 10:18 PM Thomas Gleixner <tglx@linutronix.de> wrote:
> On Tue, Nov 29 2022 at 20:18, Jann Horn wrote:
>
> > find_timens_vvar_page() doesn't work when current's timens does not match
> > the timens associated with current->mm.
> > v6 of the series adding this code [1] had some complicated code to deal
> > with this case, but v7 [2] removed that.
> >
> > Since the vvar region is designed to only be accessed by vDSO code, and
> > vDSO code can't run in kthread context, it should be fine to error out in
> > this case.
>
> Should? Either it is correct or not.
>
> But the way more interesting question is:
>
> >  struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> >  {
> > +     /*
> > +      * We can't handle faults where current's timens does not match the
> > +      * timens associated with the mm_struct. This can happen if a page fault
> > +      * occurs in a kthread that is using kthread_use_mm().
> > +      */
>
> How does a kthread, which obvioulsy did kthread_use_mm(), end up trying to
> fault in the time namespace vvar page?

By doing copy_from_user()? That's what kthread_use_mm() is for, right?
If you look through the users of kthread_use_mm(), most of them use it
to be able to use the normal usercopy functions. See the users in usb
gadget code, and the VFIO code, and the AMD GPU code. And if you're
doing usercopy on userspace addresses, then you can basically always
hit a vvar page - even if you had somehow checked beforehand what the
address points to, userspace could have moved a vvar region in that
spot in the meantime.

That said, I haven't actually tried it. But I don't think there's
anything in the page fault handling path that distinguishes between
copy_from_user() faults in kthread context and other userspace faults
in a relevant way?

> It's probably something nasty, but the changelog has a big information
> void.
>
> It neither answers the obvious question why this is a problem of the
> time namespace vvar page and not a general issue versus a kthread, which
> borrowed a user mm, ending up in vdso_fault() in the first place?

Is it a problem if a kthread ends up in the other parts of
vdso_fault() or vvar_fault()? From what I can tell, nothing in there
except for the timens stuff is going to care whether it's hit from a
userspace fault or from a kthread.

Though, looking at it again now, I guess the `sym_offset ==
image->sym_vvar_page` path is also going to misbehave, so I guess we
could try to instead make the vdso/vvar fault handlers bail out in
kthread context for all the architectures, since we're only going to
hit that if userspace is deliberately doing something bad...

> None of those VDSO (user space) addresses are subject to be faulted in
> by anything else than the associated user space task(s).

Are you saying that it's not possible or that it doesn't happen when
userspace is well-behaved?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 22:28     ` Jann Horn
@ 2022-11-29 22:34       ` Jann Horn
  2022-11-30  0:09         ` Thomas Gleixner
  2022-11-30  0:07       ` Thomas Gleixner
  1 sibling, 1 reply; 11+ messages in thread
From: Jann Horn @ 2022-11-29 22:34 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29, 2022 at 11:28 PM Jann Horn <jannh@google.com> wrote:
>
> On Tue, Nov 29, 2022 at 10:18 PM Thomas Gleixner <tglx@linutronix.de> wrote:
> > On Tue, Nov 29 2022 at 20:18, Jann Horn wrote:
> >
> > > find_timens_vvar_page() doesn't work when current's timens does not match
> > > the timens associated with current->mm.
> > > v6 of the series adding this code [1] had some complicated code to deal
> > > with this case, but v7 [2] removed that.
> > >
> > > Since the vvar region is designed to only be accessed by vDSO code, and
> > > vDSO code can't run in kthread context, it should be fine to error out in
> > > this case.
> >
> > Should? Either it is correct or not.
> >
> > But the way more interesting question is:
> >
> > >  struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> > >  {
> > > +     /*
> > > +      * We can't handle faults where current's timens does not match the
> > > +      * timens associated with the mm_struct. This can happen if a page fault
> > > +      * occurs in a kthread that is using kthread_use_mm().
> > > +      */
> >
> > How does a kthread, which obvioulsy did kthread_use_mm(), end up trying to
> > fault in the time namespace vvar page?
>
> By doing copy_from_user()? That's what kthread_use_mm() is for, right?
> If you look through the users of kthread_use_mm(), most of them use it
> to be able to use the normal usercopy functions. See the users in usb
> gadget code, and the VFIO code, and the AMD GPU code. And if you're
> doing usercopy on userspace addresses, then you can basically always
> hit a vvar page - even if you had somehow checked beforehand what the
> address points to, userspace could have moved a vvar region in that
> spot in the meantime.
>
> That said, I haven't actually tried it. But I don't think there's
> anything in the page fault handling path that distinguishes between
> copy_from_user() faults in kthread context and other userspace faults
> in a relevant way?

Ah, but I guess even if this can happen, it's not actually as bad as I
thought, since kthreads are in init_time_ns, and init_time_ns doesn't
have a ->vvar_page, so this isn't going to lead to anything terrible
like page UAF, and it's just a garbage-in-garbage-out scenario.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 22:28     ` Jann Horn
  2022-11-29 22:34       ` Jann Horn
@ 2022-11-30  0:07       ` Thomas Gleixner
  2022-11-30 22:48         ` David Laight
  1 sibling, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2022-11-30  0:07 UTC (permalink / raw)
  To: Jann Horn; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29 2022 at 23:28, Jann Horn wrote:
> On Tue, Nov 29, 2022 at 10:18 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>> But the way more interesting question is:
>>
>> >  struct page *find_timens_vvar_page(struct vm_area_struct *vma)
>> >  {
>> > +     /*
>> > +      * We can't handle faults where current's timens does not match the
>> > +      * timens associated with the mm_struct. This can happen if a page fault
>> > +      * occurs in a kthread that is using kthread_use_mm().
>> > +      */
>>
>> How does a kthread, which obvioulsy did kthread_use_mm(), end up trying to
>> fault in the time namespace vvar page?
>
> By doing copy_from_user()? That's what kthread_use_mm() is for, right?
> If you look through the users of kthread_use_mm(), most of them use it
> to be able to use the normal usercopy functions. See the users in usb
> gadget code, and the VFIO code, and the AMD GPU code. And if you're
> doing usercopy on userspace addresses, then you can basically always
> hit a vvar page - even if you had somehow checked beforehand what the
> address points to, userspace could have moved a vvar region in that
> spot in the meantime.
>
> That said, I haven't actually tried it. But I don't think there's
> anything in the page fault handling path that distinguishes between
> copy_from_user() faults in kthread context and other userspace faults
> in a relevant way?

True.

>> It neither answers the obvious question why this is a problem of the
>> time namespace vvar page and not a general issue versus a kthread, which
>> borrowed a user mm, ending up in vdso_fault() in the first place?
>
> Is it a problem if a kthread ends up in the other parts of
> vdso_fault() or vvar_fault()? From what I can tell, nothing in there
> except for the timens stuff is going to care whether it's hit from a
> userspace fault or from a kthread.
>
> Though, looking at it again now, I guess the `sym_offset ==
> image->sym_vvar_page` path is also going to misbehave, so I guess we
> could try to instead make the vdso/vvar fault handlers bail out in
> kthread context for all the architectures, since we're only going to
> hit that if userspace is deliberately doing something bad...

Deliberately or stupdily, does not matter. But squashing the problem
right at the entry point is definitely the better than making it a
special case of timens.

>> None of those VDSO (user space) addresses are subject to be faulted in
>> by anything else than the associated user space task(s).
>
> Are you saying that it's not possible or that it doesn't happen when
> userspace is well-behaved?

My subconcious self told me that a kthread won't do that unless it's
buggered which makes the vdso fault path the least of our problems, but
thinking more about it: You are right, that there are ways that the
kthread ends up with a vdso page address.... Bah!

Still my point stands that this is not a timens VDSO issue, but an issue
of: kthread tries to fault in a VDSO page of whatever nature.

Thanks,

        tglx



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-29 22:34       ` Jann Horn
@ 2022-11-30  0:09         ` Thomas Gleixner
  0 siblings, 0 replies; 11+ messages in thread
From: Thomas Gleixner @ 2022-11-30  0:09 UTC (permalink / raw)
  To: Jann Horn; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29 2022 at 23:34, Jann Horn wrote:
> On Tue, Nov 29, 2022 at 11:28 PM Jann Horn <jannh@google.com> wrote:
>> That said, I haven't actually tried it. But I don't think there's
>> anything in the page fault handling path that distinguishes between
>> copy_from_user() faults in kthread context and other userspace faults
>> in a relevant way?
>
> Ah, but I guess even if this can happen, it's not actually as bad as I
> thought, since kthreads are in init_time_ns, and init_time_ns doesn't
> have a ->vvar_page, so this isn't going to lead to anything terrible
> like page UAF, and it's just a garbage-in-garbage-out scenario.

True, but catching the kthread -> fault (vvar/vdso page) scenario
definitely has a value.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy
  2022-11-29 19:18 [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
  2022-11-29 19:18 ` [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm() Jann Horn
@ 2022-11-30 10:15 ` Jann Horn
  2022-11-30 10:57   ` Thomas Gleixner
  1 sibling, 1 reply; 11+ messages in thread
From: Jann Horn @ 2022-11-30 10:15 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andrei Vagin, linux-kernel

On Tue, Nov 29, 2022 at 8:18 PM Jann Horn <jannh@google.com> wrote:
> find_timens_vvar_page() is not arch-specific, as can be seen from how all
> five per-architecture versions of it are the same.
> (arm64, powerpc and riscv are exactly the same; x86 and s390 have two
> characters difference inside a comment, less blank lines, and mark the
> !CONFIG_TIME_NS version as inline.)
>
> Refactor the five copies into a central copy in kernel/time/namespace.c.
>
> Marked for stable backporting because it is a prerequisite for the
> following patch.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Jann Horn <jannh@google.com>

Should I resend this one without the Cc stable as a standalone cleanup patch?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy
  2022-11-30 10:15 ` [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
@ 2022-11-30 10:57   ` Thomas Gleixner
  0 siblings, 0 replies; 11+ messages in thread
From: Thomas Gleixner @ 2022-11-30 10:57 UTC (permalink / raw)
  To: Jann Horn; +Cc: Andrei Vagin, linux-kernel

On Wed, Nov 30 2022 at 11:15, Jann Horn wrote:
> On Tue, Nov 29, 2022 at 8:18 PM Jann Horn <jannh@google.com> wrote:
>> find_timens_vvar_page() is not arch-specific, as can be seen from how all
>> five per-architecture versions of it are the same.
>> (arm64, powerpc and riscv are exactly the same; x86 and s390 have two
>> characters difference inside a comment, less blank lines, and mark the
>> !CONFIG_TIME_NS version as inline.)
>>
>> Refactor the five copies into a central copy in kernel/time/namespace.c.
>>
>> Marked for stable backporting because it is a prerequisite for the
>> following patch.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Jann Horn <jannh@google.com>
>
> Should I resend this one without the Cc stable as a standalone cleanup patch?

Yes, please.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-30  0:07       ` Thomas Gleixner
@ 2022-11-30 22:48         ` David Laight
  2022-12-01  9:31           ` Jann Horn
  0 siblings, 1 reply; 11+ messages in thread
From: David Laight @ 2022-11-30 22:48 UTC (permalink / raw)
  To: 'Thomas Gleixner', Jann Horn; +Cc: Andrei Vagin, linux-kernel

From: Thomas Gleixner
> Sent: 30 November 2022 00:08
....
> >> None of those VDSO (user space) addresses are subject to be faulted in
> >> by anything else than the associated user space task(s).
> >
> > Are you saying that it's not possible or that it doesn't happen when
> > userspace is well-behaved?
> 
> My subconcious self told me that a kthread won't do that unless it's
> buggered which makes the vdso fault path the least of our problems, but
> thinking more about it: You are right, that there are ways that the
> kthread ends up with a vdso page address.... Bah!
> 
> Still my point stands that this is not a timens VDSO issue, but an issue
> of: kthread tries to fault in a VDSO page of whatever nature.

Isn't there also the kernel code path where one user thread
reads data from another processes address space.
(It does some unusual calls to the iov_import() functions.)
I can't remember whether it is used by strace or gdb.
But there is certainly the option of getting to access
an 'invalid' address in the other process and then faulting.

ISTR not being convinced that there was a correct check
for user/kernel addresses in it either.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm()
  2022-11-30 22:48         ` David Laight
@ 2022-12-01  9:31           ` Jann Horn
  0 siblings, 0 replies; 11+ messages in thread
From: Jann Horn @ 2022-12-01  9:31 UTC (permalink / raw)
  To: David Laight; +Cc: Thomas Gleixner, Andrei Vagin, linux-kernel

On Wed, Nov 30, 2022 at 11:48 PM David Laight <David.Laight@aculab.com> wrote:
> From: Thomas Gleixner
> > Sent: 30 November 2022 00:08
> ....
> > >> None of those VDSO (user space) addresses are subject to be faulted in
> > >> by anything else than the associated user space task(s).
> > >
> > > Are you saying that it's not possible or that it doesn't happen when
> > > userspace is well-behaved?
> >
> > My subconcious self told me that a kthread won't do that unless it's
> > buggered which makes the vdso fault path the least of our problems, but
> > thinking more about it: You are right, that there are ways that the
> > kthread ends up with a vdso page address.... Bah!
> >
> > Still my point stands that this is not a timens VDSO issue, but an issue
> > of: kthread tries to fault in a VDSO page of whatever nature.
>
> Isn't there also the kernel code path where one user thread
> reads data from another processes address space.
> (It does some unusual calls to the iov_import() functions.)
> I can't remember whether it is used by strace or gdb.
> But there is certainly the option of getting to access
> an 'invalid' address in the other process and then faulting.

That's a different mechanism. /proc/$pid/mem and process_vm_readv()
and PTRACE_PEEKDATA and so on go through get_user_pages_remote() or
pin_user_pages_remote(), which bail out on VMAs with VM_IO or
VM_PFNMAP. The ptrace-based access can also fall back to using
vma->vm_ops->access(), but the special_mapping_vmops used by the vvar
VMA explicitly don't have such a handler:

static const struct vm_operations_struct special_mapping_vmops = {
  .close = special_mapping_close,
  .fault = special_mapping_fault,
  .mremap = special_mapping_mremap,
  .name = special_mapping_name,
  /* vDSO code relies that VVAR can't be accessed remotely */
  .access = NULL,
  .may_split = special_mapping_split,
};

One path that I'm not sure about is the Intel i915 GPU virtualization
codepath ppgtt_populate_shadow_entry -> intel_gvt_dma_map_guest_page
-> gvt_dma_map_page -> gvt_pin_guest_page -> vfio_pin_pages ->
vfio_iommu_type1_pin_pages -> vfio_pin_page_external -> vaddr_get_pfns
-> follow_fault_pfn -> fixup_user_fault -> handle_mm_fault. That looks
like it might actually be able to trigger pagefault handling on the
vvar mapping from another process.

> ISTR not being convinced that there was a correct check
> for user/kernel addresses in it either.

The get_user_pages_remote() machinery only works on areas that are
mapped by VMAs (__get_user_pages() bails out if find_extend_vma()
fails and the address is not located in the gate area). There are no
VMAs for kernel memory.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-12-01  9:32 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-29 19:18 [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
2022-11-29 19:18 ` [PATCH 2/2] time/namespace: Forbid timens page faults under kthread_use_mm() Jann Horn
2022-11-29 21:18   ` Thomas Gleixner
2022-11-29 22:28     ` Jann Horn
2022-11-29 22:34       ` Jann Horn
2022-11-30  0:09         ` Thomas Gleixner
2022-11-30  0:07       ` Thomas Gleixner
2022-11-30 22:48         ` David Laight
2022-12-01  9:31           ` Jann Horn
2022-11-30 10:15 ` [PATCH 1/2] time/namespace: Refactor copy-pasted helper into one copy Jann Horn
2022-11-30 10:57   ` Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).