All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -next] riscv/vdso: Add support for time namespaces
@ 2021-09-01  3:20 ` Tong Tiangen
  0 siblings, 0 replies; 6+ messages in thread
From: Tong Tiangen @ 2021-09-01  3:20 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Palmer Dabbelt, Albert Ou
  Cc: linux-riscv, linux-kernel, Tong Tiangen

Implement generic vdso time namespace support which also enables time
namespaces for riscv. This is quite similar to what arm64 does.

selftest/timens test result:
  1..10
  ok 1 Passed for CLOCK_BOOTTIME (syscall)
  ok 2 Passed for CLOCK_BOOTTIME (vdso)
  ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 5 Passed for CLOCK_MONOTONIC (syscall)
  ok 6 Passed for CLOCK_MONOTONIC (vdso)
  ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
  ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
  ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
  ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
  # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
---
This patch is based on patchset:
  https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/

 arch/riscv/Kconfig                         |   1 +
 arch/riscv/include/asm/page.h              |   2 +
 arch/riscv/include/asm/vdso.h              |   2 +-
 arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
 arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
 arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
 6 files changed, 211 insertions(+), 54 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index aac669a6c3d8..7b0eff8a7eef 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -61,6 +61,7 @@ config RISCV
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 109c97e991a6..b3e5ff0125fe 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
 #define page_to_bus(page)	(page_to_phys(page))
 #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
 
+#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) \
 	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 34210b22ba91..bee9514104f7 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -14,7 +14,7 @@
  */
 #ifdef CONFIG_MMU
 
-#define __VVAR_PAGES    1
+#define __VVAR_PAGES    2
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index f839f16e0d2a..77d9c2f721c4 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
 	return _vdso_data;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return _timens_data;
+}
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index b70956d80408..a9436a65161a 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <linux/time_namespace.h>
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 #include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
 
 enum vvar_pages {
 	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
 	VVAR_NR_PAGES,
 };
 
 #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
 
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
-
 /*
  * The vDSO data page.
  */
@@ -42,83 +41,228 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static int __init vdso_init(void)
+struct __vdso_info {
+	const char *name;
+	const char *vdso_code_start;
+	const char *vdso_code_end;
+	unsigned long vdso_pages;
+	/* Data Mapping */
+	struct vm_special_mapping *dm;
+	/* Code Mapping */
+	struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+	.name = "vdso",
+	.vdso_code_start = vdso_start,
+	.vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *new_vma)
+{
+	current->mm->context.vdso = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
+static int __init __vdso_init(void)
 {
 	unsigned int i;
+	struct page **vdso_pagelist;
+	unsigned long pfn;
 
-	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-	vdso_pagelist =
-		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
-	if (unlikely(vdso_pagelist == NULL)) {
-		pr_err("vdso: pagelist allocation failed\n");
-		return -ENOMEM;
+	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
 	}
 
-	for (i = 0; i < vdso_pages; i++) {
-		struct page *pg;
+	vdso_info.vdso_pages = (
+		vdso_info.vdso_code_end -
+		vdso_info.vdso_code_start) >>
+		PAGE_SHIFT;
+
+	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+				sizeof(struct page *),
+				GFP_KERNEL);
+	if (vdso_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the vDSO code pages. */
+	pfn = sym_to_pfn(vdso_info.vdso_code_start);
+
+	for (i = 0; i < vdso_info.vdso_pages; i++)
+		vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+	vdso_info.cm->pages = vdso_pagelist;
+
+	return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
 
-		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
-		vdso_pagelist[i] = pg;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long size = vma->vm_end - vma->vm_start;
+
+		if (vma_is_special_mapping(vma, vdso_info.dm))
+			zap_page_range(vma, vma->vm_start, size);
 	}
-	vdso_pagelist[i] = virt_to_page(vdso_data);
 
+	mmap_read_unlock(mm);
 	return 0;
 }
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops.
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long pfn;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+		else
+			pfn = sym_to_pfn(vdso_data);
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = sym_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+	RV_VDSO_MAP_VVAR,
+	RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+	[RV_VDSO_MAP_VVAR] = {
+		.name   = "[vvar]",
+		.fault = vvar_fault,
+	},
+	[RV_VDSO_MAP_VDSO] = {
+		.name   = "[vdso]",
+		.mremap = vdso_mremap,
+	},
+};
+
+static int __init vdso_init(void)
+{
+	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+	return __vdso_init();
+}
 arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm,
-	int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+				    struct linux_binprm *bprm,
+				    int uses_interp)
 {
-	struct mm_struct *mm = current->mm;
-	unsigned long vdso_base, vdso_len;
-	int ret;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	void *ret;
 
 	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
 
-	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
+	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+	/* Be sure to map the data page */
+	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
 
-	if (mmap_write_lock_killable(mm))
-		return -EINTR;
-
-	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
 	if (IS_ERR_VALUE(vdso_base)) {
-		ret = vdso_base;
-		goto end;
+		ret = ERR_PTR(vdso_base);
+		goto up_fail;
 	}
 
-	mm->context.vdso = NULL;
-	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
-		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
-	if (unlikely(ret))
-		goto end;
+	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+	if (IS_ERR(ret))
+		goto up_fail;
 
+	vdso_base += VVAR_SIZE;
+	mm->context.vdso = (void *)vdso_base;
 	ret =
-	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
-		vdso_pages << PAGE_SHIFT,
+	   _install_special_mapping(mm, vdso_base, vdso_text_len,
 		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
-		vdso_pagelist);
+		vdso_info.cm);
 
-	if (unlikely(ret))
-		goto end;
+	if (IS_ERR(ret))
+		goto up_fail;
 
-	/*
-	 * Put vDSO base into mm struct. We need to do this before calling
-	 * install_special_mapping or the perf counter mmap tracking code
-	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
-	 */
-	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
+	return 0;
 
-end:
-	mmap_write_unlock(mm);
-	return ret;
+up_fail:
+	mm->context.vdso = NULL;
+	return PTR_ERR(ret);
 }
 
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
-		return "[vdso]";
-	if (vma->vm_mm && (vma->vm_start ==
-			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
-		return "[vdso_data]";
-	return NULL;
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	ret = __setup_additional_pages(mm, bprm, uses_interp);
+	mmap_write_unlock(mm);
+
+	return ret;
 }
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e9111f700af0..01d94aae5bf5 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
 SECTIONS
 {
 	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
 	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
-- 
2.18.0.huawei.25


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH -next] riscv/vdso: Add support for time namespaces
@ 2021-09-01  3:20 ` Tong Tiangen
  0 siblings, 0 replies; 6+ messages in thread
From: Tong Tiangen @ 2021-09-01  3:20 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Palmer Dabbelt, Albert Ou
  Cc: linux-riscv, linux-kernel, Tong Tiangen

Implement generic vdso time namespace support which also enables time
namespaces for riscv. This is quite similar to what arm64 does.

selftest/timens test result:
  1..10
  ok 1 Passed for CLOCK_BOOTTIME (syscall)
  ok 2 Passed for CLOCK_BOOTTIME (vdso)
  ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 5 Passed for CLOCK_MONOTONIC (syscall)
  ok 6 Passed for CLOCK_MONOTONIC (vdso)
  ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
  ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
  ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
  ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
  # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
---
This patch is based on patchset:
  https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/

 arch/riscv/Kconfig                         |   1 +
 arch/riscv/include/asm/page.h              |   2 +
 arch/riscv/include/asm/vdso.h              |   2 +-
 arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
 arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
 arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
 6 files changed, 211 insertions(+), 54 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index aac669a6c3d8..7b0eff8a7eef 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -61,6 +61,7 @@ config RISCV
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 109c97e991a6..b3e5ff0125fe 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
 #define page_to_bus(page)	(page_to_phys(page))
 #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
 
+#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) \
 	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 34210b22ba91..bee9514104f7 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -14,7 +14,7 @@
  */
 #ifdef CONFIG_MMU
 
-#define __VVAR_PAGES    1
+#define __VVAR_PAGES    2
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index f839f16e0d2a..77d9c2f721c4 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
 	return _vdso_data;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return _timens_data;
+}
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index b70956d80408..a9436a65161a 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <linux/time_namespace.h>
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 #include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
 
 enum vvar_pages {
 	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
 	VVAR_NR_PAGES,
 };
 
 #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
 
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
-
 /*
  * The vDSO data page.
  */
@@ -42,83 +41,228 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static int __init vdso_init(void)
+struct __vdso_info {
+	const char *name;
+	const char *vdso_code_start;
+	const char *vdso_code_end;
+	unsigned long vdso_pages;
+	/* Data Mapping */
+	struct vm_special_mapping *dm;
+	/* Code Mapping */
+	struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+	.name = "vdso",
+	.vdso_code_start = vdso_start,
+	.vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *new_vma)
+{
+	current->mm->context.vdso = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
+static int __init __vdso_init(void)
 {
 	unsigned int i;
+	struct page **vdso_pagelist;
+	unsigned long pfn;
 
-	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-	vdso_pagelist =
-		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
-	if (unlikely(vdso_pagelist == NULL)) {
-		pr_err("vdso: pagelist allocation failed\n");
-		return -ENOMEM;
+	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
 	}
 
-	for (i = 0; i < vdso_pages; i++) {
-		struct page *pg;
+	vdso_info.vdso_pages = (
+		vdso_info.vdso_code_end -
+		vdso_info.vdso_code_start) >>
+		PAGE_SHIFT;
+
+	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+				sizeof(struct page *),
+				GFP_KERNEL);
+	if (vdso_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the vDSO code pages. */
+	pfn = sym_to_pfn(vdso_info.vdso_code_start);
+
+	for (i = 0; i < vdso_info.vdso_pages; i++)
+		vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+	vdso_info.cm->pages = vdso_pagelist;
+
+	return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
 
-		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
-		vdso_pagelist[i] = pg;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long size = vma->vm_end - vma->vm_start;
+
+		if (vma_is_special_mapping(vma, vdso_info.dm))
+			zap_page_range(vma, vma->vm_start, size);
 	}
-	vdso_pagelist[i] = virt_to_page(vdso_data);
 
+	mmap_read_unlock(mm);
 	return 0;
 }
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops.
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long pfn;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+		else
+			pfn = sym_to_pfn(vdso_data);
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = sym_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+	RV_VDSO_MAP_VVAR,
+	RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+	[RV_VDSO_MAP_VVAR] = {
+		.name   = "[vvar]",
+		.fault = vvar_fault,
+	},
+	[RV_VDSO_MAP_VDSO] = {
+		.name   = "[vdso]",
+		.mremap = vdso_mremap,
+	},
+};
+
+static int __init vdso_init(void)
+{
+	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+	return __vdso_init();
+}
 arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm,
-	int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+				    struct linux_binprm *bprm,
+				    int uses_interp)
 {
-	struct mm_struct *mm = current->mm;
-	unsigned long vdso_base, vdso_len;
-	int ret;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	void *ret;
 
 	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
 
-	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
+	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+	/* Be sure to map the data page */
+	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
 
-	if (mmap_write_lock_killable(mm))
-		return -EINTR;
-
-	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
 	if (IS_ERR_VALUE(vdso_base)) {
-		ret = vdso_base;
-		goto end;
+		ret = ERR_PTR(vdso_base);
+		goto up_fail;
 	}
 
-	mm->context.vdso = NULL;
-	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
-		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
-	if (unlikely(ret))
-		goto end;
+	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+	if (IS_ERR(ret))
+		goto up_fail;
 
+	vdso_base += VVAR_SIZE;
+	mm->context.vdso = (void *)vdso_base;
 	ret =
-	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
-		vdso_pages << PAGE_SHIFT,
+	   _install_special_mapping(mm, vdso_base, vdso_text_len,
 		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
-		vdso_pagelist);
+		vdso_info.cm);
 
-	if (unlikely(ret))
-		goto end;
+	if (IS_ERR(ret))
+		goto up_fail;
 
-	/*
-	 * Put vDSO base into mm struct. We need to do this before calling
-	 * install_special_mapping or the perf counter mmap tracking code
-	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
-	 */
-	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
+	return 0;
 
-end:
-	mmap_write_unlock(mm);
-	return ret;
+up_fail:
+	mm->context.vdso = NULL;
+	return PTR_ERR(ret);
 }
 
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
-		return "[vdso]";
-	if (vma->vm_mm && (vma->vm_start ==
-			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
-		return "[vdso_data]";
-	return NULL;
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	ret = __setup_additional_pages(mm, bprm, uses_interp);
+	mmap_write_unlock(mm);
+
+	return ret;
 }
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e9111f700af0..01d94aae5bf5 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
 SECTIONS
 {
 	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
 	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
-- 
2.18.0.huawei.25


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH -next] riscv/vdso: Add support for time namespaces
  2021-09-01  3:20 ` Tong Tiangen
@ 2021-09-28  2:35   ` tongtiangen
  -1 siblings, 0 replies; 6+ messages in thread
From: tongtiangen @ 2021-09-28  2:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Palmer Dabbelt, Albert Ou
  Cc: linux-riscv, linux-kernel

kindly ping.

On 2021/9/1 11:20, Tong Tiangen wrote:
> Implement generic vdso time namespace support which also enables time
> namespaces for riscv. This is quite similar to what arm64 does.
>
> selftest/timens test result:
>   1..10
>   ok 1 Passed for CLOCK_BOOTTIME (syscall)
>   ok 2 Passed for CLOCK_BOOTTIME (vdso)
>   ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 5 Passed for CLOCK_MONOTONIC (syscall)
>   ok 6 Passed for CLOCK_MONOTONIC (vdso)
>   ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
>   ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
>   ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
>   ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
>   # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0
>
> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
> ---
> This patch is based on patchset:
>   https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/
>
>  arch/riscv/Kconfig                         |   1 +
>  arch/riscv/include/asm/page.h              |   2 +
>  arch/riscv/include/asm/vdso.h              |   2 +-
>  arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
>  arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
>  arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
>  6 files changed, 211 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index aac669a6c3d8..7b0eff8a7eef 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -61,6 +61,7 @@ config RISCV
>  	select GENERIC_SCHED_CLOCK
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
> +	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>  	select HANDLE_DOMAIN_IRQ
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 109c97e991a6..b3e5ff0125fe 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
>  #define page_to_bus(page)	(page_to_phys(page))
>  #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
>
> +#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
> +
>  #ifdef CONFIG_FLATMEM
>  #define pfn_valid(pfn) \
>  	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
> index 34210b22ba91..bee9514104f7 100644
> --- a/arch/riscv/include/asm/vdso.h
> +++ b/arch/riscv/include/asm/vdso.h
> @@ -14,7 +14,7 @@
>   */
>  #ifdef CONFIG_MMU
>
> -#define __VVAR_PAGES    1
> +#define __VVAR_PAGES    2
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
> index f839f16e0d2a..77d9c2f721c4 100644
> --- a/arch/riscv/include/asm/vdso/gettimeofday.h
> +++ b/arch/riscv/include/asm/vdso/gettimeofday.h
> @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
>  	return _vdso_data;
>  }
>
> +#ifdef CONFIG_TIME_NS
> +static __always_inline
> +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
> +{
> +	return _timens_data;
> +}
> +#endif
>  #endif /* !__ASSEMBLY__ */
>
>  #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> index b70956d80408..a9436a65161a 100644
> --- a/arch/riscv/kernel/vdso.c
> +++ b/arch/riscv/kernel/vdso.c
> @@ -13,6 +13,7 @@
>  #include <linux/err.h>
>  #include <asm/page.h>
>  #include <asm/vdso.h>
> +#include <linux/time_namespace.h>
>
>  #ifdef CONFIG_GENERIC_TIME_VSYSCALL
>  #include <vdso/datapage.h>
> @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
>
>  enum vvar_pages {
>  	VVAR_DATA_PAGE_OFFSET,
> +	VVAR_TIMENS_PAGE_OFFSET,
>  	VVAR_NR_PAGES,
>  };
>
>  #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
>
> -static unsigned int vdso_pages __ro_after_init;
> -static struct page **vdso_pagelist __ro_after_init;
> -
>  /*
>   * The vDSO data page.
>   */
> @@ -42,83 +41,228 @@ static union {
>  } vdso_data_store __page_aligned_data;
>  struct vdso_data *vdso_data = &vdso_data_store.data;
>
> -static int __init vdso_init(void)
> +struct __vdso_info {
> +	const char *name;
> +	const char *vdso_code_start;
> +	const char *vdso_code_end;
> +	unsigned long vdso_pages;
> +	/* Data Mapping */
> +	struct vm_special_mapping *dm;
> +	/* Code Mapping */
> +	struct vm_special_mapping *cm;
> +};
> +
> +static struct __vdso_info vdso_info __ro_after_init = {
> +	.name = "vdso",
> +	.vdso_code_start = vdso_start,
> +	.vdso_code_end = vdso_end,
> +};
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +		       struct vm_area_struct *new_vma)
> +{
> +	current->mm->context.vdso = (void *)new_vma->vm_start;
> +
> +	return 0;
> +}
> +
> +static int __init __vdso_init(void)
>  {
>  	unsigned int i;
> +	struct page **vdso_pagelist;
> +	unsigned long pfn;
>
> -	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
> -	vdso_pagelist =
> -		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
> -	if (unlikely(vdso_pagelist == NULL)) {
> -		pr_err("vdso: pagelist allocation failed\n");
> -		return -ENOMEM;
> +	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
> +		pr_err("vDSO is not a valid ELF object!\n");
> +		return -EINVAL;
>  	}
>
> -	for (i = 0; i < vdso_pages; i++) {
> -		struct page *pg;
> +	vdso_info.vdso_pages = (
> +		vdso_info.vdso_code_end -
> +		vdso_info.vdso_code_start) >>
> +		PAGE_SHIFT;
> +
> +	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
> +				sizeof(struct page *),
> +				GFP_KERNEL);
> +	if (vdso_pagelist == NULL)
> +		return -ENOMEM;
> +
> +	/* Grab the vDSO code pages. */
> +	pfn = sym_to_pfn(vdso_info.vdso_code_start);
> +
> +	for (i = 0; i < vdso_info.vdso_pages; i++)
> +		vdso_pagelist[i] = pfn_to_page(pfn + i);
> +
> +	vdso_info.cm->pages = vdso_pagelist;
> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_TIME_NS
> +struct vdso_data *arch_get_vdso_data(void *vvar_page)
> +{
> +	return (struct vdso_data *)(vvar_page);
> +}
> +
> +/*
> + * The vvar mapping contains data for a specific time namespace, so when a task
> + * changes namespace we must unmap its vvar data for the old namespace.
> + * Subsequent faults will map in data for the new namespace.
> + *
> + * For more details see timens_setup_vdso_data().
> + */
> +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
> +{
> +	struct mm_struct *mm = task->mm;
> +	struct vm_area_struct *vma;
> +
> +	mmap_read_lock(mm);
>
> -		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> -		vdso_pagelist[i] = pg;
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		unsigned long size = vma->vm_end - vma->vm_start;
> +
> +		if (vma_is_special_mapping(vma, vdso_info.dm))
> +			zap_page_range(vma, vma->vm_start, size);
>  	}
> -	vdso_pagelist[i] = virt_to_page(vdso_data);
>
> +	mmap_read_unlock(mm);
>  	return 0;
>  }
> +
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	if (likely(vma->vm_mm == current->mm))
> +		return current->nsproxy->time_ns->vvar_page;
> +
> +	/*
> +	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
> +	 * through interfaces like /proc/$pid/mem or
> +	 * process_vm_{readv,writev}() as long as there's no .access()
> +	 * in special_mapping_vmops.
> +	 * For more details check_vma_flags() and __access_remote_vm()
> +	 */
> +	WARN(1, "vvar_page accessed remotely");
> +
> +	return NULL;
> +}
> +#else
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	return NULL;
> +}
> +#endif
> +
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> +			     struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	struct page *timens_page = find_timens_vvar_page(vma);
> +	unsigned long pfn;
> +
> +	switch (vmf->pgoff) {
> +	case VVAR_DATA_PAGE_OFFSET:
> +		if (timens_page)
> +			pfn = page_to_pfn(timens_page);
> +		else
> +			pfn = sym_to_pfn(vdso_data);
> +		break;
> +#ifdef CONFIG_TIME_NS
> +	case VVAR_TIMENS_PAGE_OFFSET:
> +		/*
> +		 * If a task belongs to a time namespace then a namespace
> +		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
> +		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
> +		 * offset.
> +		 * See also the comment near timens_setup_vdso_data().
> +		 */
> +		if (!timens_page)
> +			return VM_FAULT_SIGBUS;
> +		pfn = sym_to_pfn(vdso_data);
> +		break;
> +#endif /* CONFIG_TIME_NS */
> +	default:
> +		return VM_FAULT_SIGBUS;
> +	}
> +
> +	return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +enum rv_vdso_map {
> +	RV_VDSO_MAP_VVAR,
> +	RV_VDSO_MAP_VDSO,
> +};
> +
> +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
> +	[RV_VDSO_MAP_VVAR] = {
> +		.name   = "[vvar]",
> +		.fault = vvar_fault,
> +	},
> +	[RV_VDSO_MAP_VDSO] = {
> +		.name   = "[vdso]",
> +		.mremap = vdso_mremap,
> +	},
> +};
> +
> +static int __init vdso_init(void)
> +{
> +	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
> +	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
> +
> +	return __vdso_init();
> +}
>  arch_initcall(vdso_init);
>
> -int arch_setup_additional_pages(struct linux_binprm *bprm,
> -	int uses_interp)
> +static int __setup_additional_pages(struct mm_struct *mm,
> +				    struct linux_binprm *bprm,
> +				    int uses_interp)
>  {
> -	struct mm_struct *mm = current->mm;
> -	unsigned long vdso_base, vdso_len;
> -	int ret;
> +	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
> +	void *ret;
>
>  	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
>
> -	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
> +	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
> +	/* Be sure to map the data page */
> +	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
>
> -	if (mmap_write_lock_killable(mm))
> -		return -EINTR;
> -
> -	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
> +	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
>  	if (IS_ERR_VALUE(vdso_base)) {
> -		ret = vdso_base;
> -		goto end;
> +		ret = ERR_PTR(vdso_base);
> +		goto up_fail;
>  	}
>
> -	mm->context.vdso = NULL;
> -	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
> -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> -	if (unlikely(ret))
> -		goto end;
> +	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
> +		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> +	vdso_base += VVAR_SIZE;
> +	mm->context.vdso = (void *)vdso_base;
>  	ret =
> -	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
> -		vdso_pages << PAGE_SHIFT,
> +	   _install_special_mapping(mm, vdso_base, vdso_text_len,
>  		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> -		vdso_pagelist);
> +		vdso_info.cm);
>
> -	if (unlikely(ret))
> -		goto end;
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> -	/*
> -	 * Put vDSO base into mm struct. We need to do this before calling
> -	 * install_special_mapping or the perf counter mmap tracking code
> -	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
> -	 */
> -	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
> +	return 0;
>
> -end:
> -	mmap_write_unlock(mm);
> -	return ret;
> +up_fail:
> +	mm->context.vdso = NULL;
> +	return PTR_ERR(ret);
>  }
>
> -const char *arch_vma_name(struct vm_area_struct *vma)
> +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>  {
> -	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
> -		return "[vdso]";
> -	if (vma->vm_mm && (vma->vm_start ==
> -			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
> -		return "[vdso_data]";
> -	return NULL;
> +	struct mm_struct *mm = current->mm;
> +	int ret;
> +
> +	if (mmap_write_lock_killable(mm))
> +		return -EINTR;
> +
> +	ret = __setup_additional_pages(mm, bprm, uses_interp);
> +	mmap_write_unlock(mm);
> +
> +	return ret;
>  }
> diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> index e9111f700af0..01d94aae5bf5 100644
> --- a/arch/riscv/kernel/vdso/vdso.lds.S
> +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
>  SECTIONS
>  {
>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +#ifdef CONFIG_TIME_NS
> +	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
> +#endif
>  	. = SIZEOF_HEADERS;
>
>  	.hash		: { *(.hash) }			:text
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH -next] riscv/vdso: Add support for time namespaces
@ 2021-09-28  2:35   ` tongtiangen
  0 siblings, 0 replies; 6+ messages in thread
From: tongtiangen @ 2021-09-28  2:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Palmer Dabbelt, Albert Ou
  Cc: linux-riscv, linux-kernel

kindly ping.

On 2021/9/1 11:20, Tong Tiangen wrote:
> Implement generic vdso time namespace support which also enables time
> namespaces for riscv. This is quite similar to what arm64 does.
>
> selftest/timens test result:
>   1..10
>   ok 1 Passed for CLOCK_BOOTTIME (syscall)
>   ok 2 Passed for CLOCK_BOOTTIME (vdso)
>   ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 5 Passed for CLOCK_MONOTONIC (syscall)
>   ok 6 Passed for CLOCK_MONOTONIC (vdso)
>   ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
>   ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
>   ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
>   ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
>   # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0
>
> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
> ---
> This patch is based on patchset:
>   https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/
>
>  arch/riscv/Kconfig                         |   1 +
>  arch/riscv/include/asm/page.h              |   2 +
>  arch/riscv/include/asm/vdso.h              |   2 +-
>  arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
>  arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
>  arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
>  6 files changed, 211 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index aac669a6c3d8..7b0eff8a7eef 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -61,6 +61,7 @@ config RISCV
>  	select GENERIC_SCHED_CLOCK
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
> +	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>  	select HANDLE_DOMAIN_IRQ
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 109c97e991a6..b3e5ff0125fe 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
>  #define page_to_bus(page)	(page_to_phys(page))
>  #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
>
> +#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
> +
>  #ifdef CONFIG_FLATMEM
>  #define pfn_valid(pfn) \
>  	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
> index 34210b22ba91..bee9514104f7 100644
> --- a/arch/riscv/include/asm/vdso.h
> +++ b/arch/riscv/include/asm/vdso.h
> @@ -14,7 +14,7 @@
>   */
>  #ifdef CONFIG_MMU
>
> -#define __VVAR_PAGES    1
> +#define __VVAR_PAGES    2
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
> index f839f16e0d2a..77d9c2f721c4 100644
> --- a/arch/riscv/include/asm/vdso/gettimeofday.h
> +++ b/arch/riscv/include/asm/vdso/gettimeofday.h
> @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
>  	return _vdso_data;
>  }
>
> +#ifdef CONFIG_TIME_NS
> +static __always_inline
> +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
> +{
> +	return _timens_data;
> +}
> +#endif
>  #endif /* !__ASSEMBLY__ */
>
>  #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> index b70956d80408..a9436a65161a 100644
> --- a/arch/riscv/kernel/vdso.c
> +++ b/arch/riscv/kernel/vdso.c
> @@ -13,6 +13,7 @@
>  #include <linux/err.h>
>  #include <asm/page.h>
>  #include <asm/vdso.h>
> +#include <linux/time_namespace.h>
>
>  #ifdef CONFIG_GENERIC_TIME_VSYSCALL
>  #include <vdso/datapage.h>
> @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
>
>  enum vvar_pages {
>  	VVAR_DATA_PAGE_OFFSET,
> +	VVAR_TIMENS_PAGE_OFFSET,
>  	VVAR_NR_PAGES,
>  };
>
>  #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
>
> -static unsigned int vdso_pages __ro_after_init;
> -static struct page **vdso_pagelist __ro_after_init;
> -
>  /*
>   * The vDSO data page.
>   */
> @@ -42,83 +41,228 @@ static union {
>  } vdso_data_store __page_aligned_data;
>  struct vdso_data *vdso_data = &vdso_data_store.data;
>
> -static int __init vdso_init(void)
> +struct __vdso_info {
> +	const char *name;
> +	const char *vdso_code_start;
> +	const char *vdso_code_end;
> +	unsigned long vdso_pages;
> +	/* Data Mapping */
> +	struct vm_special_mapping *dm;
> +	/* Code Mapping */
> +	struct vm_special_mapping *cm;
> +};
> +
> +static struct __vdso_info vdso_info __ro_after_init = {
> +	.name = "vdso",
> +	.vdso_code_start = vdso_start,
> +	.vdso_code_end = vdso_end,
> +};
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +		       struct vm_area_struct *new_vma)
> +{
> +	current->mm->context.vdso = (void *)new_vma->vm_start;
> +
> +	return 0;
> +}
> +
> +static int __init __vdso_init(void)
>  {
>  	unsigned int i;
> +	struct page **vdso_pagelist;
> +	unsigned long pfn;
>
> -	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
> -	vdso_pagelist =
> -		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
> -	if (unlikely(vdso_pagelist == NULL)) {
> -		pr_err("vdso: pagelist allocation failed\n");
> -		return -ENOMEM;
> +	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
> +		pr_err("vDSO is not a valid ELF object!\n");
> +		return -EINVAL;
>  	}
>
> -	for (i = 0; i < vdso_pages; i++) {
> -		struct page *pg;
> +	vdso_info.vdso_pages = (
> +		vdso_info.vdso_code_end -
> +		vdso_info.vdso_code_start) >>
> +		PAGE_SHIFT;
> +
> +	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
> +				sizeof(struct page *),
> +				GFP_KERNEL);
> +	if (vdso_pagelist == NULL)
> +		return -ENOMEM;
> +
> +	/* Grab the vDSO code pages. */
> +	pfn = sym_to_pfn(vdso_info.vdso_code_start);
> +
> +	for (i = 0; i < vdso_info.vdso_pages; i++)
> +		vdso_pagelist[i] = pfn_to_page(pfn + i);
> +
> +	vdso_info.cm->pages = vdso_pagelist;
> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_TIME_NS
> +struct vdso_data *arch_get_vdso_data(void *vvar_page)
> +{
> +	return (struct vdso_data *)(vvar_page);
> +}
> +
> +/*
> + * The vvar mapping contains data for a specific time namespace, so when a task
> + * changes namespace we must unmap its vvar data for the old namespace.
> + * Subsequent faults will map in data for the new namespace.
> + *
> + * For more details see timens_setup_vdso_data().
> + */
> +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
> +{
> +	struct mm_struct *mm = task->mm;
> +	struct vm_area_struct *vma;
> +
> +	mmap_read_lock(mm);
>
> -		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> -		vdso_pagelist[i] = pg;
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		unsigned long size = vma->vm_end - vma->vm_start;
> +
> +		if (vma_is_special_mapping(vma, vdso_info.dm))
> +			zap_page_range(vma, vma->vm_start, size);
>  	}
> -	vdso_pagelist[i] = virt_to_page(vdso_data);
>
> +	mmap_read_unlock(mm);
>  	return 0;
>  }
> +
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	if (likely(vma->vm_mm == current->mm))
> +		return current->nsproxy->time_ns->vvar_page;
> +
> +	/*
> +	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
> +	 * through interfaces like /proc/$pid/mem or
> +	 * process_vm_{readv,writev}() as long as there's no .access()
> +	 * in special_mapping_vmops.
> +	 * For more details check_vma_flags() and __access_remote_vm()
> +	 */
> +	WARN(1, "vvar_page accessed remotely");
> +
> +	return NULL;
> +}
> +#else
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	return NULL;
> +}
> +#endif
> +
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> +			     struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	struct page *timens_page = find_timens_vvar_page(vma);
> +	unsigned long pfn;
> +
> +	switch (vmf->pgoff) {
> +	case VVAR_DATA_PAGE_OFFSET:
> +		if (timens_page)
> +			pfn = page_to_pfn(timens_page);
> +		else
> +			pfn = sym_to_pfn(vdso_data);
> +		break;
> +#ifdef CONFIG_TIME_NS
> +	case VVAR_TIMENS_PAGE_OFFSET:
> +		/*
> +		 * If a task belongs to a time namespace then a namespace
> +		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
> +		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
> +		 * offset.
> +		 * See also the comment near timens_setup_vdso_data().
> +		 */
> +		if (!timens_page)
> +			return VM_FAULT_SIGBUS;
> +		pfn = sym_to_pfn(vdso_data);
> +		break;
> +#endif /* CONFIG_TIME_NS */
> +	default:
> +		return VM_FAULT_SIGBUS;
> +	}
> +
> +	return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +enum rv_vdso_map {
> +	RV_VDSO_MAP_VVAR,
> +	RV_VDSO_MAP_VDSO,
> +};
> +
> +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
> +	[RV_VDSO_MAP_VVAR] = {
> +		.name   = "[vvar]",
> +		.fault = vvar_fault,
> +	},
> +	[RV_VDSO_MAP_VDSO] = {
> +		.name   = "[vdso]",
> +		.mremap = vdso_mremap,
> +	},
> +};
> +
> +static int __init vdso_init(void)
> +{
> +	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
> +	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
> +
> +	return __vdso_init();
> +}
>  arch_initcall(vdso_init);
>
> -int arch_setup_additional_pages(struct linux_binprm *bprm,
> -	int uses_interp)
> +static int __setup_additional_pages(struct mm_struct *mm,
> +				    struct linux_binprm *bprm,
> +				    int uses_interp)
>  {
> -	struct mm_struct *mm = current->mm;
> -	unsigned long vdso_base, vdso_len;
> -	int ret;
> +	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
> +	void *ret;
>
>  	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
>
> -	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
> +	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
> +	/* Be sure to map the data page */
> +	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
>
> -	if (mmap_write_lock_killable(mm))
> -		return -EINTR;
> -
> -	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
> +	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
>  	if (IS_ERR_VALUE(vdso_base)) {
> -		ret = vdso_base;
> -		goto end;
> +		ret = ERR_PTR(vdso_base);
> +		goto up_fail;
>  	}
>
> -	mm->context.vdso = NULL;
> -	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
> -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> -	if (unlikely(ret))
> -		goto end;
> +	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
> +		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> +	vdso_base += VVAR_SIZE;
> +	mm->context.vdso = (void *)vdso_base;
>  	ret =
> -	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
> -		vdso_pages << PAGE_SHIFT,
> +	   _install_special_mapping(mm, vdso_base, vdso_text_len,
>  		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> -		vdso_pagelist);
> +		vdso_info.cm);
>
> -	if (unlikely(ret))
> -		goto end;
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> -	/*
> -	 * Put vDSO base into mm struct. We need to do this before calling
> -	 * install_special_mapping or the perf counter mmap tracking code
> -	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
> -	 */
> -	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
> +	return 0;
>
> -end:
> -	mmap_write_unlock(mm);
> -	return ret;
> +up_fail:
> +	mm->context.vdso = NULL;
> +	return PTR_ERR(ret);
>  }
>
> -const char *arch_vma_name(struct vm_area_struct *vma)
> +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>  {
> -	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
> -		return "[vdso]";
> -	if (vma->vm_mm && (vma->vm_start ==
> -			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
> -		return "[vdso_data]";
> -	return NULL;
> +	struct mm_struct *mm = current->mm;
> +	int ret;
> +
> +	if (mmap_write_lock_killable(mm))
> +		return -EINTR;
> +
> +	ret = __setup_additional_pages(mm, bprm, uses_interp);
> +	mmap_write_unlock(mm);
> +
> +	return ret;
>  }
> diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> index e9111f700af0..01d94aae5bf5 100644
> --- a/arch/riscv/kernel/vdso/vdso.lds.S
> +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
>  SECTIONS
>  {
>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +#ifdef CONFIG_TIME_NS
> +	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
> +#endif
>  	. = SIZEOF_HEADERS;
>
>  	.hash		: { *(.hash) }			:text
>

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH -next] riscv/vdso: Add support for time namespaces
  2021-09-01  3:20 ` Tong Tiangen
@ 2021-10-02 23:18   ` Palmer Dabbelt
  -1 siblings, 0 replies; 6+ messages in thread
From: Palmer Dabbelt @ 2021-10-02 23:18 UTC (permalink / raw)
  To: tongtiangen; +Cc: Paul Walmsley, aou, linux-riscv, linux-kernel, tongtiangen

On Tue, 31 Aug 2021 20:20:25 PDT (-0700), tongtiangen@huawei.com wrote:
> Implement generic vdso time namespace support which also enables time
> namespaces for riscv. This is quite similar to what arm64 does.
>
> selftest/timens test result:
>   1..10
>   ok 1 Passed for CLOCK_BOOTTIME (syscall)
>   ok 2 Passed for CLOCK_BOOTTIME (vdso)
>   ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 5 Passed for CLOCK_MONOTONIC (syscall)
>   ok 6 Passed for CLOCK_MONOTONIC (vdso)
>   ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
>   ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
>   ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
>   ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
>   # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0
>
> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
> ---
> This patch is based on patchset:
>   https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/

Thanks.  This is on for next.  I just re-juggled that one on fixes so I 
could pull it in to both.

>
>  arch/riscv/Kconfig                         |   1 +
>  arch/riscv/include/asm/page.h              |   2 +
>  arch/riscv/include/asm/vdso.h              |   2 +-
>  arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
>  arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
>  arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
>  6 files changed, 211 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index aac669a6c3d8..7b0eff8a7eef 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -61,6 +61,7 @@ config RISCV
>  	select GENERIC_SCHED_CLOCK
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
> +	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>  	select HANDLE_DOMAIN_IRQ
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 109c97e991a6..b3e5ff0125fe 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
>  #define page_to_bus(page)	(page_to_phys(page))
>  #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
>
> +#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
> +
>  #ifdef CONFIG_FLATMEM
>  #define pfn_valid(pfn) \
>  	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
> index 34210b22ba91..bee9514104f7 100644
> --- a/arch/riscv/include/asm/vdso.h
> +++ b/arch/riscv/include/asm/vdso.h
> @@ -14,7 +14,7 @@
>   */
>  #ifdef CONFIG_MMU
>
> -#define __VVAR_PAGES    1
> +#define __VVAR_PAGES    2
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
> index f839f16e0d2a..77d9c2f721c4 100644
> --- a/arch/riscv/include/asm/vdso/gettimeofday.h
> +++ b/arch/riscv/include/asm/vdso/gettimeofday.h
> @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
>  	return _vdso_data;
>  }
>
> +#ifdef CONFIG_TIME_NS
> +static __always_inline
> +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
> +{
> +	return _timens_data;
> +}
> +#endif
>  #endif /* !__ASSEMBLY__ */
>
>  #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> index b70956d80408..a9436a65161a 100644
> --- a/arch/riscv/kernel/vdso.c
> +++ b/arch/riscv/kernel/vdso.c
> @@ -13,6 +13,7 @@
>  #include <linux/err.h>
>  #include <asm/page.h>
>  #include <asm/vdso.h>
> +#include <linux/time_namespace.h>
>
>  #ifdef CONFIG_GENERIC_TIME_VSYSCALL
>  #include <vdso/datapage.h>
> @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
>
>  enum vvar_pages {
>  	VVAR_DATA_PAGE_OFFSET,
> +	VVAR_TIMENS_PAGE_OFFSET,
>  	VVAR_NR_PAGES,
>  };
>
>  #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
>
> -static unsigned int vdso_pages __ro_after_init;
> -static struct page **vdso_pagelist __ro_after_init;
> -
>  /*
>   * The vDSO data page.
>   */
> @@ -42,83 +41,228 @@ static union {
>  } vdso_data_store __page_aligned_data;
>  struct vdso_data *vdso_data = &vdso_data_store.data;
>
> -static int __init vdso_init(void)
> +struct __vdso_info {
> +	const char *name;
> +	const char *vdso_code_start;
> +	const char *vdso_code_end;
> +	unsigned long vdso_pages;
> +	/* Data Mapping */
> +	struct vm_special_mapping *dm;
> +	/* Code Mapping */
> +	struct vm_special_mapping *cm;
> +};
> +
> +static struct __vdso_info vdso_info __ro_after_init = {
> +	.name = "vdso",
> +	.vdso_code_start = vdso_start,
> +	.vdso_code_end = vdso_end,
> +};
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +		       struct vm_area_struct *new_vma)
> +{
> +	current->mm->context.vdso = (void *)new_vma->vm_start;
> +
> +	return 0;
> +}
> +
> +static int __init __vdso_init(void)
>  {
>  	unsigned int i;
> +	struct page **vdso_pagelist;
> +	unsigned long pfn;
>
> -	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
> -	vdso_pagelist =
> -		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
> -	if (unlikely(vdso_pagelist == NULL)) {
> -		pr_err("vdso: pagelist allocation failed\n");
> -		return -ENOMEM;
> +	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
> +		pr_err("vDSO is not a valid ELF object!\n");
> +		return -EINVAL;
>  	}
>
> -	for (i = 0; i < vdso_pages; i++) {
> -		struct page *pg;
> +	vdso_info.vdso_pages = (
> +		vdso_info.vdso_code_end -
> +		vdso_info.vdso_code_start) >>
> +		PAGE_SHIFT;
> +
> +	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
> +				sizeof(struct page *),
> +				GFP_KERNEL);
> +	if (vdso_pagelist == NULL)
> +		return -ENOMEM;
> +
> +	/* Grab the vDSO code pages. */
> +	pfn = sym_to_pfn(vdso_info.vdso_code_start);
> +
> +	for (i = 0; i < vdso_info.vdso_pages; i++)
> +		vdso_pagelist[i] = pfn_to_page(pfn + i);
> +
> +	vdso_info.cm->pages = vdso_pagelist;
> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_TIME_NS
> +struct vdso_data *arch_get_vdso_data(void *vvar_page)
> +{
> +	return (struct vdso_data *)(vvar_page);
> +}
> +
> +/*
> + * The vvar mapping contains data for a specific time namespace, so when a task
> + * changes namespace we must unmap its vvar data for the old namespace.
> + * Subsequent faults will map in data for the new namespace.
> + *
> + * For more details see timens_setup_vdso_data().
> + */
> +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
> +{
> +	struct mm_struct *mm = task->mm;
> +	struct vm_area_struct *vma;
> +
> +	mmap_read_lock(mm);
>
> -		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> -		vdso_pagelist[i] = pg;
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		unsigned long size = vma->vm_end - vma->vm_start;
> +
> +		if (vma_is_special_mapping(vma, vdso_info.dm))
> +			zap_page_range(vma, vma->vm_start, size);
>  	}
> -	vdso_pagelist[i] = virt_to_page(vdso_data);
>
> +	mmap_read_unlock(mm);
>  	return 0;
>  }
> +
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	if (likely(vma->vm_mm == current->mm))
> +		return current->nsproxy->time_ns->vvar_page;
> +
> +	/*
> +	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
> +	 * through interfaces like /proc/$pid/mem or
> +	 * process_vm_{readv,writev}() as long as there's no .access()
> +	 * in special_mapping_vmops.
> +	 * For more details check_vma_flags() and __access_remote_vm()
> +	 */
> +	WARN(1, "vvar_page accessed remotely");
> +
> +	return NULL;
> +}
> +#else
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	return NULL;
> +}
> +#endif
> +
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> +			     struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	struct page *timens_page = find_timens_vvar_page(vma);
> +	unsigned long pfn;
> +
> +	switch (vmf->pgoff) {
> +	case VVAR_DATA_PAGE_OFFSET:
> +		if (timens_page)
> +			pfn = page_to_pfn(timens_page);
> +		else
> +			pfn = sym_to_pfn(vdso_data);
> +		break;
> +#ifdef CONFIG_TIME_NS
> +	case VVAR_TIMENS_PAGE_OFFSET:
> +		/*
> +		 * If a task belongs to a time namespace then a namespace
> +		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
> +		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
> +		 * offset.
> +		 * See also the comment near timens_setup_vdso_data().
> +		 */
> +		if (!timens_page)
> +			return VM_FAULT_SIGBUS;
> +		pfn = sym_to_pfn(vdso_data);
> +		break;
> +#endif /* CONFIG_TIME_NS */
> +	default:
> +		return VM_FAULT_SIGBUS;
> +	}
> +
> +	return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +enum rv_vdso_map {
> +	RV_VDSO_MAP_VVAR,
> +	RV_VDSO_MAP_VDSO,
> +};
> +
> +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
> +	[RV_VDSO_MAP_VVAR] = {
> +		.name   = "[vvar]",
> +		.fault = vvar_fault,
> +	},
> +	[RV_VDSO_MAP_VDSO] = {
> +		.name   = "[vdso]",
> +		.mremap = vdso_mremap,
> +	},
> +};
> +
> +static int __init vdso_init(void)
> +{
> +	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
> +	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
> +
> +	return __vdso_init();
> +}
>  arch_initcall(vdso_init);
>
> -int arch_setup_additional_pages(struct linux_binprm *bprm,
> -	int uses_interp)
> +static int __setup_additional_pages(struct mm_struct *mm,
> +				    struct linux_binprm *bprm,
> +				    int uses_interp)
>  {
> -	struct mm_struct *mm = current->mm;
> -	unsigned long vdso_base, vdso_len;
> -	int ret;
> +	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
> +	void *ret;
>
>  	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
>
> -	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
> +	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
> +	/* Be sure to map the data page */
> +	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
>
> -	if (mmap_write_lock_killable(mm))
> -		return -EINTR;
> -
> -	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
> +	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
>  	if (IS_ERR_VALUE(vdso_base)) {
> -		ret = vdso_base;
> -		goto end;
> +		ret = ERR_PTR(vdso_base);
> +		goto up_fail;
>  	}
>
> -	mm->context.vdso = NULL;
> -	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
> -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> -	if (unlikely(ret))
> -		goto end;
> +	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
> +		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> +	vdso_base += VVAR_SIZE;
> +	mm->context.vdso = (void *)vdso_base;
>  	ret =
> -	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
> -		vdso_pages << PAGE_SHIFT,
> +	   _install_special_mapping(mm, vdso_base, vdso_text_len,
>  		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> -		vdso_pagelist);
> +		vdso_info.cm);
>
> -	if (unlikely(ret))
> -		goto end;
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> -	/*
> -	 * Put vDSO base into mm struct. We need to do this before calling
> -	 * install_special_mapping or the perf counter mmap tracking code
> -	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
> -	 */
> -	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
> +	return 0;
>
> -end:
> -	mmap_write_unlock(mm);
> -	return ret;
> +up_fail:
> +	mm->context.vdso = NULL;
> +	return PTR_ERR(ret);
>  }
>
> -const char *arch_vma_name(struct vm_area_struct *vma)
> +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>  {
> -	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
> -		return "[vdso]";
> -	if (vma->vm_mm && (vma->vm_start ==
> -			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
> -		return "[vdso_data]";
> -	return NULL;
> +	struct mm_struct *mm = current->mm;
> +	int ret;
> +
> +	if (mmap_write_lock_killable(mm))
> +		return -EINTR;
> +
> +	ret = __setup_additional_pages(mm, bprm, uses_interp);
> +	mmap_write_unlock(mm);
> +
> +	return ret;
>  }
> diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> index e9111f700af0..01d94aae5bf5 100644
> --- a/arch/riscv/kernel/vdso/vdso.lds.S
> +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
>  SECTIONS
>  {
>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +#ifdef CONFIG_TIME_NS
> +	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
> +#endif
>  	. = SIZEOF_HEADERS;
>
>  	.hash		: { *(.hash) }			:text

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH -next] riscv/vdso: Add support for time namespaces
@ 2021-10-02 23:18   ` Palmer Dabbelt
  0 siblings, 0 replies; 6+ messages in thread
From: Palmer Dabbelt @ 2021-10-02 23:18 UTC (permalink / raw)
  To: tongtiangen; +Cc: Paul Walmsley, aou, linux-riscv, linux-kernel, tongtiangen

On Tue, 31 Aug 2021 20:20:25 PDT (-0700), tongtiangen@huawei.com wrote:
> Implement generic vdso time namespace support which also enables time
> namespaces for riscv. This is quite similar to what arm64 does.
>
> selftest/timens test result:
>   1..10
>   ok 1 Passed for CLOCK_BOOTTIME (syscall)
>   ok 2 Passed for CLOCK_BOOTTIME (vdso)
>   ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
>   ok 5 Passed for CLOCK_MONOTONIC (syscall)
>   ok 6 Passed for CLOCK_MONOTONIC (vdso)
>   ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
>   ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
>   ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
>   ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
>   # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0
>
> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
> ---
> This patch is based on patchset:
>   https://lore.kernel.org/lkml/20210901024621.2528797-1-tongtiangen@huawei.com/

Thanks.  This is on for next.  I just re-juggled that one on fixes so I 
could pull it in to both.

>
>  arch/riscv/Kconfig                         |   1 +
>  arch/riscv/include/asm/page.h              |   2 +
>  arch/riscv/include/asm/vdso.h              |   2 +-
>  arch/riscv/include/asm/vdso/gettimeofday.h |   7 +
>  arch/riscv/kernel/vdso.c                   | 250 ++++++++++++++++-----
>  arch/riscv/kernel/vdso/vdso.lds.S          |   3 +
>  6 files changed, 211 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index aac669a6c3d8..7b0eff8a7eef 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -61,6 +61,7 @@ config RISCV
>  	select GENERIC_SCHED_CLOCK
>  	select GENERIC_SMP_IDLE_THREAD
>  	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
> +	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>  	select HANDLE_DOMAIN_IRQ
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 109c97e991a6..b3e5ff0125fe 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
>  #define page_to_bus(page)	(page_to_phys(page))
>  #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
>
> +#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
> +
>  #ifdef CONFIG_FLATMEM
>  #define pfn_valid(pfn) \
>  	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
> index 34210b22ba91..bee9514104f7 100644
> --- a/arch/riscv/include/asm/vdso.h
> +++ b/arch/riscv/include/asm/vdso.h
> @@ -14,7 +14,7 @@
>   */
>  #ifdef CONFIG_MMU
>
> -#define __VVAR_PAGES    1
> +#define __VVAR_PAGES    2
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
> index f839f16e0d2a..77d9c2f721c4 100644
> --- a/arch/riscv/include/asm/vdso/gettimeofday.h
> +++ b/arch/riscv/include/asm/vdso/gettimeofday.h
> @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
>  	return _vdso_data;
>  }
>
> +#ifdef CONFIG_TIME_NS
> +static __always_inline
> +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
> +{
> +	return _timens_data;
> +}
> +#endif
>  #endif /* !__ASSEMBLY__ */
>
>  #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
> diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> index b70956d80408..a9436a65161a 100644
> --- a/arch/riscv/kernel/vdso.c
> +++ b/arch/riscv/kernel/vdso.c
> @@ -13,6 +13,7 @@
>  #include <linux/err.h>
>  #include <asm/page.h>
>  #include <asm/vdso.h>
> +#include <linux/time_namespace.h>
>
>  #ifdef CONFIG_GENERIC_TIME_VSYSCALL
>  #include <vdso/datapage.h>
> @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
>
>  enum vvar_pages {
>  	VVAR_DATA_PAGE_OFFSET,
> +	VVAR_TIMENS_PAGE_OFFSET,
>  	VVAR_NR_PAGES,
>  };
>
>  #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
>
> -static unsigned int vdso_pages __ro_after_init;
> -static struct page **vdso_pagelist __ro_after_init;
> -
>  /*
>   * The vDSO data page.
>   */
> @@ -42,83 +41,228 @@ static union {
>  } vdso_data_store __page_aligned_data;
>  struct vdso_data *vdso_data = &vdso_data_store.data;
>
> -static int __init vdso_init(void)
> +struct __vdso_info {
> +	const char *name;
> +	const char *vdso_code_start;
> +	const char *vdso_code_end;
> +	unsigned long vdso_pages;
> +	/* Data Mapping */
> +	struct vm_special_mapping *dm;
> +	/* Code Mapping */
> +	struct vm_special_mapping *cm;
> +};
> +
> +static struct __vdso_info vdso_info __ro_after_init = {
> +	.name = "vdso",
> +	.vdso_code_start = vdso_start,
> +	.vdso_code_end = vdso_end,
> +};
> +
> +static int vdso_mremap(const struct vm_special_mapping *sm,
> +		       struct vm_area_struct *new_vma)
> +{
> +	current->mm->context.vdso = (void *)new_vma->vm_start;
> +
> +	return 0;
> +}
> +
> +static int __init __vdso_init(void)
>  {
>  	unsigned int i;
> +	struct page **vdso_pagelist;
> +	unsigned long pfn;
>
> -	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
> -	vdso_pagelist =
> -		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
> -	if (unlikely(vdso_pagelist == NULL)) {
> -		pr_err("vdso: pagelist allocation failed\n");
> -		return -ENOMEM;
> +	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
> +		pr_err("vDSO is not a valid ELF object!\n");
> +		return -EINVAL;
>  	}
>
> -	for (i = 0; i < vdso_pages; i++) {
> -		struct page *pg;
> +	vdso_info.vdso_pages = (
> +		vdso_info.vdso_code_end -
> +		vdso_info.vdso_code_start) >>
> +		PAGE_SHIFT;
> +
> +	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
> +				sizeof(struct page *),
> +				GFP_KERNEL);
> +	if (vdso_pagelist == NULL)
> +		return -ENOMEM;
> +
> +	/* Grab the vDSO code pages. */
> +	pfn = sym_to_pfn(vdso_info.vdso_code_start);
> +
> +	for (i = 0; i < vdso_info.vdso_pages; i++)
> +		vdso_pagelist[i] = pfn_to_page(pfn + i);
> +
> +	vdso_info.cm->pages = vdso_pagelist;
> +
> +	return 0;
> +}
> +
> +#ifdef CONFIG_TIME_NS
> +struct vdso_data *arch_get_vdso_data(void *vvar_page)
> +{
> +	return (struct vdso_data *)(vvar_page);
> +}
> +
> +/*
> + * The vvar mapping contains data for a specific time namespace, so when a task
> + * changes namespace we must unmap its vvar data for the old namespace.
> + * Subsequent faults will map in data for the new namespace.
> + *
> + * For more details see timens_setup_vdso_data().
> + */
> +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
> +{
> +	struct mm_struct *mm = task->mm;
> +	struct vm_area_struct *vma;
> +
> +	mmap_read_lock(mm);
>
> -		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> -		vdso_pagelist[i] = pg;
> +	for (vma = mm->mmap; vma; vma = vma->vm_next) {
> +		unsigned long size = vma->vm_end - vma->vm_start;
> +
> +		if (vma_is_special_mapping(vma, vdso_info.dm))
> +			zap_page_range(vma, vma->vm_start, size);
>  	}
> -	vdso_pagelist[i] = virt_to_page(vdso_data);
>
> +	mmap_read_unlock(mm);
>  	return 0;
>  }
> +
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	if (likely(vma->vm_mm == current->mm))
> +		return current->nsproxy->time_ns->vvar_page;
> +
> +	/*
> +	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
> +	 * through interfaces like /proc/$pid/mem or
> +	 * process_vm_{readv,writev}() as long as there's no .access()
> +	 * in special_mapping_vmops.
> +	 * For more details check_vma_flags() and __access_remote_vm()
> +	 */
> +	WARN(1, "vvar_page accessed remotely");
> +
> +	return NULL;
> +}
> +#else
> +static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
> +{
> +	return NULL;
> +}
> +#endif
> +
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> +			     struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	struct page *timens_page = find_timens_vvar_page(vma);
> +	unsigned long pfn;
> +
> +	switch (vmf->pgoff) {
> +	case VVAR_DATA_PAGE_OFFSET:
> +		if (timens_page)
> +			pfn = page_to_pfn(timens_page);
> +		else
> +			pfn = sym_to_pfn(vdso_data);
> +		break;
> +#ifdef CONFIG_TIME_NS
> +	case VVAR_TIMENS_PAGE_OFFSET:
> +		/*
> +		 * If a task belongs to a time namespace then a namespace
> +		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
> +		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
> +		 * offset.
> +		 * See also the comment near timens_setup_vdso_data().
> +		 */
> +		if (!timens_page)
> +			return VM_FAULT_SIGBUS;
> +		pfn = sym_to_pfn(vdso_data);
> +		break;
> +#endif /* CONFIG_TIME_NS */
> +	default:
> +		return VM_FAULT_SIGBUS;
> +	}
> +
> +	return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +enum rv_vdso_map {
> +	RV_VDSO_MAP_VVAR,
> +	RV_VDSO_MAP_VDSO,
> +};
> +
> +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
> +	[RV_VDSO_MAP_VVAR] = {
> +		.name   = "[vvar]",
> +		.fault = vvar_fault,
> +	},
> +	[RV_VDSO_MAP_VDSO] = {
> +		.name   = "[vdso]",
> +		.mremap = vdso_mremap,
> +	},
> +};
> +
> +static int __init vdso_init(void)
> +{
> +	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
> +	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
> +
> +	return __vdso_init();
> +}
>  arch_initcall(vdso_init);
>
> -int arch_setup_additional_pages(struct linux_binprm *bprm,
> -	int uses_interp)
> +static int __setup_additional_pages(struct mm_struct *mm,
> +				    struct linux_binprm *bprm,
> +				    int uses_interp)
>  {
> -	struct mm_struct *mm = current->mm;
> -	unsigned long vdso_base, vdso_len;
> -	int ret;
> +	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
> +	void *ret;
>
>  	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
>
> -	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
> +	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
> +	/* Be sure to map the data page */
> +	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
>
> -	if (mmap_write_lock_killable(mm))
> -		return -EINTR;
> -
> -	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
> +	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
>  	if (IS_ERR_VALUE(vdso_base)) {
> -		ret = vdso_base;
> -		goto end;
> +		ret = ERR_PTR(vdso_base);
> +		goto up_fail;
>  	}
>
> -	mm->context.vdso = NULL;
> -	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
> -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> -	if (unlikely(ret))
> -		goto end;
> +	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
> +		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> +	vdso_base += VVAR_SIZE;
> +	mm->context.vdso = (void *)vdso_base;
>  	ret =
> -	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
> -		vdso_pages << PAGE_SHIFT,
> +	   _install_special_mapping(mm, vdso_base, vdso_text_len,
>  		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> -		vdso_pagelist);
> +		vdso_info.cm);
>
> -	if (unlikely(ret))
> -		goto end;
> +	if (IS_ERR(ret))
> +		goto up_fail;
>
> -	/*
> -	 * Put vDSO base into mm struct. We need to do this before calling
> -	 * install_special_mapping or the perf counter mmap tracking code
> -	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
> -	 */
> -	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
> +	return 0;
>
> -end:
> -	mmap_write_unlock(mm);
> -	return ret;
> +up_fail:
> +	mm->context.vdso = NULL;
> +	return PTR_ERR(ret);
>  }
>
> -const char *arch_vma_name(struct vm_area_struct *vma)
> +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>  {
> -	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
> -		return "[vdso]";
> -	if (vma->vm_mm && (vma->vm_start ==
> -			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
> -		return "[vdso_data]";
> -	return NULL;
> +	struct mm_struct *mm = current->mm;
> +	int ret;
> +
> +	if (mmap_write_lock_killable(mm))
> +		return -EINTR;
> +
> +	ret = __setup_additional_pages(mm, bprm, uses_interp);
> +	mmap_write_unlock(mm);
> +
> +	return ret;
>  }
> diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> index e9111f700af0..01d94aae5bf5 100644
> --- a/arch/riscv/kernel/vdso/vdso.lds.S
> +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
>  SECTIONS
>  {
>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +#ifdef CONFIG_TIME_NS
> +	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
> +#endif
>  	. = SIZEOF_HEADERS;
>
>  	.hash		: { *(.hash) }			:text

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-10-02 23:18 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-01  3:20 [PATCH -next] riscv/vdso: Add support for time namespaces Tong Tiangen
2021-09-01  3:20 ` Tong Tiangen
2021-09-28  2:35 ` tongtiangen
2021-09-28  2:35   ` tongtiangen
2021-10-02 23:18 ` Palmer Dabbelt
2021-10-02 23:18   ` Palmer Dabbelt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.