All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
@ 2022-06-17 14:58 Huacai Chen
  2022-06-17 15:34 ` hev
  0 siblings, 1 reply; 5+ messages in thread
From: Huacai Chen @ 2022-06-17 14:58 UTC (permalink / raw)
  To: Arnd Bergmann, Huacai Chen
  Cc: loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Huacai Chen

We test 20 million times of getcpu(), the real syscall version take 25
seconds, while the vsyscall version take only 2.4 seconds.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/vdso.h      |  4 +++
 arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
 arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
 arch/loongarch/vdso/Makefile           |  3 +-
 arch/loongarch/vdso/vdso.lds.S         |  1 +
 arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
 6 files changed, 74 insertions(+), 10 deletions(-)
 create mode 100644 arch/loongarch/vdso/vgetcpu.c

diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
index 8f8a0f9a4953..e76d5e37480d 100644
--- a/arch/loongarch/include/asm/vdso.h
+++ b/arch/loongarch/include/asm/vdso.h
@@ -12,6 +12,10 @@
 
 #include <asm/barrier.h>
 
+typedef struct vdso_pcpu_data {
+	u32 node;
+} ____cacheline_aligned_in_smp vdso_pcpu_data;
+
 /*
  * struct loongarch_vdso_info - Details of a VDSO image.
  * @vdso: Pointer to VDSO image (page-aligned).
diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
index 5a01643a65b3..94055f7c54b7 100644
--- a/arch/loongarch/include/asm/vdso/vdso.h
+++ b/arch/loongarch/include/asm/vdso/vdso.h
@@ -8,6 +8,13 @@
 
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <asm/vdso.h>
+
+#if PAGE_SIZE < SZ_16K
+#define VDSO_DATA_SIZE SZ_16K
+#else
+#define VDSO_DATA_SIZE PAGE_SIZE
+#endif
 
 static inline unsigned long get_vdso_base(void)
 {
@@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void)
 
 static inline const struct vdso_data *get_vdso_data(void)
 {
-	return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
+	return (const struct vdso_data *)(get_vdso_base()
+			- VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
index e20c8ca87473..6ce322a1bf8b 100644
--- a/arch/loongarch/kernel/vdso.c
+++ b/arch/loongarch/kernel/vdso.c
@@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[];
 
 /* Kernel-provided data used by the VDSO. */
 static union loongarch_vdso_data {
-	u8 page[PAGE_SIZE];
-	struct vdso_data data[CS_BASES];
+	u8 page[VDSO_DATA_SIZE];
+	struct {
+		vdso_pcpu_data pdata[NR_CPUS];
+		struct vdso_data data[CS_BASES];
+	};
 } loongarch_vdso_data __page_aligned_data;
-struct vdso_data *vdso_data = loongarch_vdso_data.data;
+
 static struct page *vdso_pages[] = { NULL };
+struct vdso_data *vdso_data = loongarch_vdso_data.data;
 
 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
 {
@@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = {
 
 static int __init init_vdso(void)
 {
-	unsigned long i, pfn;
+	unsigned long i, cpu, pfn;
 
 	BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
 	BUG_ON(!PAGE_ALIGNED(vdso_info.size));
 
+	for_each_possible_cpu(cpu)
+		loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu);
+
 	pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
 	for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
 		vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
@@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	/*
 	 * Determine total area size. This includes the VDSO data itself
-	 * and the data page.
+	 * and the data pages.
 	 */
-	vvar_size = PAGE_SIZE;
+	vvar_size = VDSO_DATA_SIZE;
 	size = vvar_size + info->size;
 
 	data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
@@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	/* Map VDSO data page. */
 	ret = remap_pfn_range(vma, data_addr,
-			      virt_to_phys(vdso_data) >> PAGE_SHIFT,
-			      PAGE_SIZE, PAGE_READONLY);
+			      virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
+			      vvar_size, PAGE_READONLY);
 	if (ret)
 		goto out;
 
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index 6b6e16732c60..d89e2ac75f7b 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -6,7 +6,7 @@
 ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
 include $(srctree)/lib/vdso/Makefile
 
-obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
+obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
 
 # Common compiler flags between ABIs.
 ccflags-vdso := \
@@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
 
 cflags-vdso := $(ccflags-vdso) \
+	-isystem $(shell $(CC) -print-file-name=include) \
 	$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
 	-O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
 	-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S
index 955f02de4a2d..56ad855896de 100644
--- a/arch/loongarch/vdso/vdso.lds.S
+++ b/arch/loongarch/vdso/vdso.lds.S
@@ -58,6 +58,7 @@ VERSION
 {
 	LINUX_5.10 {
 	global:
+		__vdso_getcpu;
 		__vdso_clock_getres;
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
new file mode 100644
index 000000000000..23fe2362f4e0
--- /dev/null
+++ b/arch/loongarch/vdso/vgetcpu.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Fast user context implementation of getcpu()
+ */
+
+#include <asm/vdso.h>
+#include <linux/getcpu.h>
+
+static __always_inline int read_cpu_id(void)
+{
+	int cpu_id;
+
+	__asm__ __volatile__(
+	"	rdtime.d $zero, %0\n"
+	: "=r" (cpu_id)
+	:
+	: "memory");
+
+	return cpu_id;
+}
+
+static __always_inline const vdso_pcpu_data *get_pcpu_data(void)
+{
+	return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
+}
+
+int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
+{
+	int cpu_id;
+	const vdso_pcpu_data *data;
+
+	cpu_id = read_cpu_id();
+
+	if (cpu)
+		*cpu = cpu_id;
+
+	if (node) {
+		data = get_pcpu_data();
+		*node = data[cpu_id].node;
+	}
+
+	return 0;
+}
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
  2022-06-17 14:58 [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu() Huacai Chen
@ 2022-06-17 15:34 ` hev
  2022-06-18  9:10   ` Huacai Chen
  0 siblings, 1 reply; 5+ messages in thread
From: hev @ 2022-06-17 15:34 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Arnd Bergmann, Huacai Chen, loongarch, linux-arch, Xuefeng Li,
	Guo Ren, Xuerui Wang, Jiaxun Yang

Hello,

On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
>
> We test 20 million times of getcpu(), the real syscall version take 25
> seconds, while the vsyscall version take only 2.4 seconds.
>
> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> ---
>  arch/loongarch/include/asm/vdso.h      |  4 +++
>  arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
>  arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
>  arch/loongarch/vdso/Makefile           |  3 +-
>  arch/loongarch/vdso/vdso.lds.S         |  1 +
>  arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
>  6 files changed, 74 insertions(+), 10 deletions(-)
>  create mode 100644 arch/loongarch/vdso/vgetcpu.c
>
> diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
> index 8f8a0f9a4953..e76d5e37480d 100644
> --- a/arch/loongarch/include/asm/vdso.h
> +++ b/arch/loongarch/include/asm/vdso.h
> @@ -12,6 +12,10 @@
>
>  #include <asm/barrier.h>
>
> +typedef struct vdso_pcpu_data {
> +       u32 node;
> +} ____cacheline_aligned_in_smp vdso_pcpu_data;
> +
>  /*
>   * struct loongarch_vdso_info - Details of a VDSO image.
>   * @vdso: Pointer to VDSO image (page-aligned).
> diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
> index 5a01643a65b3..94055f7c54b7 100644
> --- a/arch/loongarch/include/asm/vdso/vdso.h
> +++ b/arch/loongarch/include/asm/vdso/vdso.h
> @@ -8,6 +8,13 @@
>
>  #include <asm/asm.h>
>  #include <asm/page.h>
> +#include <asm/vdso.h>
> +
> +#if PAGE_SIZE < SZ_16K
> +#define VDSO_DATA_SIZE SZ_16K

Whether we add members to the vdso data structure or extend
SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and
there is no assertion checking to help us catch bugs early. So I
suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data),
PAGE_SIZE).

hev

> +#else
> +#define VDSO_DATA_SIZE PAGE_SIZE
> +#endif
>
>  static inline unsigned long get_vdso_base(void)
>  {
> @@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void)
>
>  static inline const struct vdso_data *get_vdso_data(void)
>  {
> -       return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
> +       return (const struct vdso_data *)(get_vdso_base()
> +                       - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
>  }
>
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
> index e20c8ca87473..6ce322a1bf8b 100644
> --- a/arch/loongarch/kernel/vdso.c
> +++ b/arch/loongarch/kernel/vdso.c
> @@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[];
>
>  /* Kernel-provided data used by the VDSO. */
>  static union loongarch_vdso_data {
> -       u8 page[PAGE_SIZE];
> -       struct vdso_data data[CS_BASES];
> +       u8 page[VDSO_DATA_SIZE];
> +       struct {
> +               vdso_pcpu_data pdata[NR_CPUS];
> +               struct vdso_data data[CS_BASES];
> +       };
>  } loongarch_vdso_data __page_aligned_data;
> -struct vdso_data *vdso_data = loongarch_vdso_data.data;
> +
>  static struct page *vdso_pages[] = { NULL };
> +struct vdso_data *vdso_data = loongarch_vdso_data.data;
>
>  static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
>  {
> @@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = {
>
>  static int __init init_vdso(void)
>  {
> -       unsigned long i, pfn;
> +       unsigned long i, cpu, pfn;
>
>         BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
>         BUG_ON(!PAGE_ALIGNED(vdso_info.size));
>
> +       for_each_possible_cpu(cpu)
> +               loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu);
> +
>         pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
>         for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
>                 vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
> @@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>
>         /*
>          * Determine total area size. This includes the VDSO data itself
> -        * and the data page.
> +        * and the data pages.
>          */
> -       vvar_size = PAGE_SIZE;
> +       vvar_size = VDSO_DATA_SIZE;
>         size = vvar_size + info->size;
>
>         data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
> @@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>
>         /* Map VDSO data page. */
>         ret = remap_pfn_range(vma, data_addr,
> -                             virt_to_phys(vdso_data) >> PAGE_SHIFT,
> -                             PAGE_SIZE, PAGE_READONLY);
> +                             virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
> +                             vvar_size, PAGE_READONLY);
>         if (ret)
>                 goto out;
>
> diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
> index 6b6e16732c60..d89e2ac75f7b 100644
> --- a/arch/loongarch/vdso/Makefile
> +++ b/arch/loongarch/vdso/Makefile
> @@ -6,7 +6,7 @@
>  ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
>  include $(srctree)/lib/vdso/Makefile
>
> -obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
> +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
>
>  # Common compiler flags between ABIs.
>  ccflags-vdso := \
> @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
>  endif
>
>  cflags-vdso := $(ccflags-vdso) \
> +       -isystem $(shell $(CC) -print-file-name=include) \
>         $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
>         -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
>         -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
> diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S
> index 955f02de4a2d..56ad855896de 100644
> --- a/arch/loongarch/vdso/vdso.lds.S
> +++ b/arch/loongarch/vdso/vdso.lds.S
> @@ -58,6 +58,7 @@ VERSION
>  {
>         LINUX_5.10 {
>         global:
> +               __vdso_getcpu;
>                 __vdso_clock_getres;
>                 __vdso_clock_gettime;
>                 __vdso_gettimeofday;
> diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
> new file mode 100644
> index 000000000000..23fe2362f4e0
> --- /dev/null
> +++ b/arch/loongarch/vdso/vgetcpu.c
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Fast user context implementation of getcpu()
> + */
> +
> +#include <asm/vdso.h>
> +#include <linux/getcpu.h>
> +
> +static __always_inline int read_cpu_id(void)
> +{
> +       int cpu_id;
> +
> +       __asm__ __volatile__(
> +       "       rdtime.d $zero, %0\n"
> +       : "=r" (cpu_id)
> +       :
> +       : "memory");
> +
> +       return cpu_id;
> +}
> +
> +static __always_inline const vdso_pcpu_data *get_pcpu_data(void)
> +{
> +       return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
> +}
> +
> +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
> +{
> +       int cpu_id;
> +       const vdso_pcpu_data *data;
> +
> +       cpu_id = read_cpu_id();
> +
> +       if (cpu)
> +               *cpu = cpu_id;
> +
> +       if (node) {
> +               data = get_pcpu_data();
> +               *node = data[cpu_id].node;
> +       }
> +
> +       return 0;
> +}
> --
> 2.27.0
>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
  2022-06-17 15:34 ` hev
@ 2022-06-18  9:10   ` Huacai Chen
  2022-06-18 12:56     ` WANG Xuerui
  0 siblings, 1 reply; 5+ messages in thread
From: Huacai Chen @ 2022-06-18  9:10 UTC (permalink / raw)
  To: hev
  Cc: Huacai Chen, Arnd Bergmann, loongarch, linux-arch, Xuefeng Li,
	Guo Ren, Xuerui Wang, Jiaxun Yang

Hi,

On Fri, Jun 17, 2022 at 11:35 PM hev <r@hev.cc> wrote:
>
> Hello,
>
> On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
> >
> > We test 20 million times of getcpu(), the real syscall version take 25
> > seconds, while the vsyscall version take only 2.4 seconds.
> >
> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> > ---
> >  arch/loongarch/include/asm/vdso.h      |  4 +++
> >  arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
> >  arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
> >  arch/loongarch/vdso/Makefile           |  3 +-
> >  arch/loongarch/vdso/vdso.lds.S         |  1 +
> >  arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
> >  6 files changed, 74 insertions(+), 10 deletions(-)
> >  create mode 100644 arch/loongarch/vdso/vgetcpu.c
> >
> > diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
> > index 8f8a0f9a4953..e76d5e37480d 100644
> > --- a/arch/loongarch/include/asm/vdso.h
> > +++ b/arch/loongarch/include/asm/vdso.h
> > @@ -12,6 +12,10 @@
> >
> >  #include <asm/barrier.h>
> >
> > +typedef struct vdso_pcpu_data {
> > +       u32 node;
> > +} ____cacheline_aligned_in_smp vdso_pcpu_data;
> > +
> >  /*
> >   * struct loongarch_vdso_info - Details of a VDSO image.
> >   * @vdso: Pointer to VDSO image (page-aligned).
> > diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
> > index 5a01643a65b3..94055f7c54b7 100644
> > --- a/arch/loongarch/include/asm/vdso/vdso.h
> > +++ b/arch/loongarch/include/asm/vdso/vdso.h
> > @@ -8,6 +8,13 @@
> >
> >  #include <asm/asm.h>
> >  #include <asm/page.h>
> > +#include <asm/vdso.h>
> > +
> > +#if PAGE_SIZE < SZ_16K
> > +#define VDSO_DATA_SIZE SZ_16K
>
> Whether we add members to the vdso data structure or extend
> SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and
> there is no assertion checking to help us catch bugs early. So I
> suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data),
> PAGE_SIZE).
VSYSCALL usage is very limited (you know, VSYSCALL appears for so many
years, but the number nearly doesn't increase until now), so I think
16KB is enough in the future.

Huacai
>
> hev
>
> > +#else
> > +#define VDSO_DATA_SIZE PAGE_SIZE
> > +#endif
> >
> >  static inline unsigned long get_vdso_base(void)
> >  {
> > @@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void)
> >
> >  static inline const struct vdso_data *get_vdso_data(void)
> >  {
> > -       return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
> > +       return (const struct vdso_data *)(get_vdso_base()
> > +                       - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
> >  }
> >
> >  #endif /* __ASSEMBLY__ */
> > diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
> > index e20c8ca87473..6ce322a1bf8b 100644
> > --- a/arch/loongarch/kernel/vdso.c
> > +++ b/arch/loongarch/kernel/vdso.c
> > @@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[];
> >
> >  /* Kernel-provided data used by the VDSO. */
> >  static union loongarch_vdso_data {
> > -       u8 page[PAGE_SIZE];
> > -       struct vdso_data data[CS_BASES];
> > +       u8 page[VDSO_DATA_SIZE];
> > +       struct {
> > +               vdso_pcpu_data pdata[NR_CPUS];
> > +               struct vdso_data data[CS_BASES];
> > +       };
> >  } loongarch_vdso_data __page_aligned_data;
> > -struct vdso_data *vdso_data = loongarch_vdso_data.data;
> > +
> >  static struct page *vdso_pages[] = { NULL };
> > +struct vdso_data *vdso_data = loongarch_vdso_data.data;
> >
> >  static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
> >  {
> > @@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = {
> >
> >  static int __init init_vdso(void)
> >  {
> > -       unsigned long i, pfn;
> > +       unsigned long i, cpu, pfn;
> >
> >         BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
> >         BUG_ON(!PAGE_ALIGNED(vdso_info.size));
> >
> > +       for_each_possible_cpu(cpu)
> > +               loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu);
> > +
> >         pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
> >         for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
> >                 vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
> > @@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
> >
> >         /*
> >          * Determine total area size. This includes the VDSO data itself
> > -        * and the data page.
> > +        * and the data pages.
> >          */
> > -       vvar_size = PAGE_SIZE;
> > +       vvar_size = VDSO_DATA_SIZE;
> >         size = vvar_size + info->size;
> >
> >         data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
> > @@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
> >
> >         /* Map VDSO data page. */
> >         ret = remap_pfn_range(vma, data_addr,
> > -                             virt_to_phys(vdso_data) >> PAGE_SHIFT,
> > -                             PAGE_SIZE, PAGE_READONLY);
> > +                             virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
> > +                             vvar_size, PAGE_READONLY);
> >         if (ret)
> >                 goto out;
> >
> > diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
> > index 6b6e16732c60..d89e2ac75f7b 100644
> > --- a/arch/loongarch/vdso/Makefile
> > +++ b/arch/loongarch/vdso/Makefile
> > @@ -6,7 +6,7 @@
> >  ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
> >  include $(srctree)/lib/vdso/Makefile
> >
> > -obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
> > +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
> >
> >  # Common compiler flags between ABIs.
> >  ccflags-vdso := \
> > @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
> >  endif
> >
> >  cflags-vdso := $(ccflags-vdso) \
> > +       -isystem $(shell $(CC) -print-file-name=include) \
> >         $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
> >         -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
> >         -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
> > diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S
> > index 955f02de4a2d..56ad855896de 100644
> > --- a/arch/loongarch/vdso/vdso.lds.S
> > +++ b/arch/loongarch/vdso/vdso.lds.S
> > @@ -58,6 +58,7 @@ VERSION
> >  {
> >         LINUX_5.10 {
> >         global:
> > +               __vdso_getcpu;
> >                 __vdso_clock_getres;
> >                 __vdso_clock_gettime;
> >                 __vdso_gettimeofday;
> > diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
> > new file mode 100644
> > index 000000000000..23fe2362f4e0
> > --- /dev/null
> > +++ b/arch/loongarch/vdso/vgetcpu.c
> > @@ -0,0 +1,43 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Fast user context implementation of getcpu()
> > + */
> > +
> > +#include <asm/vdso.h>
> > +#include <linux/getcpu.h>
> > +
> > +static __always_inline int read_cpu_id(void)
> > +{
> > +       int cpu_id;
> > +
> > +       __asm__ __volatile__(
> > +       "       rdtime.d $zero, %0\n"
> > +       : "=r" (cpu_id)
> > +       :
> > +       : "memory");
> > +
> > +       return cpu_id;
> > +}
> > +
> > +static __always_inline const vdso_pcpu_data *get_pcpu_data(void)
> > +{
> > +       return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
> > +}
> > +
> > +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
> > +{
> > +       int cpu_id;
> > +       const vdso_pcpu_data *data;
> > +
> > +       cpu_id = read_cpu_id();
> > +
> > +       if (cpu)
> > +               *cpu = cpu_id;
> > +
> > +       if (node) {
> > +               data = get_pcpu_data();
> > +               *node = data[cpu_id].node;
> > +       }
> > +
> > +       return 0;
> > +}
> > --
> > 2.27.0
> >
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
  2022-06-18  9:10   ` Huacai Chen
@ 2022-06-18 12:56     ` WANG Xuerui
  2022-06-20  4:26       ` Huacai Chen
  0 siblings, 1 reply; 5+ messages in thread
From: WANG Xuerui @ 2022-06-18 12:56 UTC (permalink / raw)
  To: Huacai Chen, hev
  Cc: Huacai Chen, Arnd Bergmann, loongarch, linux-arch, Xuefeng Li,
	Guo Ren, Xuerui Wang, Jiaxun Yang

On 6/18/22 17:10, Huacai Chen wrote:
> Hi,
>
> On Fri, Jun 17, 2022 at 11:35 PM hev <r@hev.cc> wrote:
>> Hello,
>>
>> On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
>>> We test 20 million times of getcpu(), the real syscall version take 25
>>> seconds, while the vsyscall version take only 2.4 seconds.
>>>
>>> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
>>> ---
>>>   arch/loongarch/include/asm/vdso.h      |  4 +++
>>>   arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
>>>   arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
>>>   arch/loongarch/vdso/Makefile           |  3 +-
>>>   arch/loongarch/vdso/vdso.lds.S         |  1 +
>>>   arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
>>>   6 files changed, 74 insertions(+), 10 deletions(-)
>>>   create mode 100644 arch/loongarch/vdso/vgetcpu.c
>>>
>>> diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
>>> index 8f8a0f9a4953..e76d5e37480d 100644
>>> --- a/arch/loongarch/include/asm/vdso.h
>>> +++ b/arch/loongarch/include/asm/vdso.h
>>> @@ -12,6 +12,10 @@
>>>
>>>   #include <asm/barrier.h>
>>>
>>> +typedef struct vdso_pcpu_data {
>>> +       u32 node;
>>> +} ____cacheline_aligned_in_smp vdso_pcpu_data;
>>> +
>>>   /*
>>>    * struct loongarch_vdso_info - Details of a VDSO image.
>>>    * @vdso: Pointer to VDSO image (page-aligned).
>>> diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
>>> index 5a01643a65b3..94055f7c54b7 100644
>>> --- a/arch/loongarch/include/asm/vdso/vdso.h
>>> +++ b/arch/loongarch/include/asm/vdso/vdso.h
>>> @@ -8,6 +8,13 @@
>>>
>>>   #include <asm/asm.h>
>>>   #include <asm/page.h>
>>> +#include <asm/vdso.h>
>>> +
>>> +#if PAGE_SIZE < SZ_16K
>>> +#define VDSO_DATA_SIZE SZ_16K
>> Whether we add members to the vdso data structure or extend
>> SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and
>> there is no assertion checking to help us catch bugs early. So I
>> suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data),
>> PAGE_SIZE).
> VSYSCALL usage is very limited (you know, VSYSCALL appears for so many
> years, but the number nearly doesn't increase until now), so I think
> 16KB is enough in the future.

I don't think omitting compile-time assertions for *correctness* is 
worth the negligible improvement in brevity and ease of maintenance. In 
fact, static checks for correctness actually *lightens* maintenance 
burden, by explicitly calling out the assumptions so that newcomers 
(i.e. me or some other random linux/arch developer refactoring code) 
would find them very helpful.

So I'm in support for declaring the VDSO_DATA_SIZE explicitly in terms 
of sizeof(struct vdso_data) and PAGE_SIZE.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
  2022-06-18 12:56     ` WANG Xuerui
@ 2022-06-20  4:26       ` Huacai Chen
  0 siblings, 0 replies; 5+ messages in thread
From: Huacai Chen @ 2022-06-20  4:26 UTC (permalink / raw)
  To: WANG Xuerui
  Cc: hev, Huacai Chen, Arnd Bergmann, loongarch, linux-arch,
	Xuefeng Li, Guo Ren, Jiaxun Yang

Hi, Xuerui,

On Sat, Jun 18, 2022 at 8:56 PM WANG Xuerui <kernel@xen0n.name> wrote:
>
> On 6/18/22 17:10, Huacai Chen wrote:
> > Hi,
> >
> > On Fri, Jun 17, 2022 at 11:35 PM hev <r@hev.cc> wrote:
> >> Hello,
> >>
> >> On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
> >>> We test 20 million times of getcpu(), the real syscall version take 25
> >>> seconds, while the vsyscall version take only 2.4 seconds.
> >>>
> >>> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> >>> ---
> >>>   arch/loongarch/include/asm/vdso.h      |  4 +++
> >>>   arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
> >>>   arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
> >>>   arch/loongarch/vdso/Makefile           |  3 +-
> >>>   arch/loongarch/vdso/vdso.lds.S         |  1 +
> >>>   arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
> >>>   6 files changed, 74 insertions(+), 10 deletions(-)
> >>>   create mode 100644 arch/loongarch/vdso/vgetcpu.c
> >>>
> >>> diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
> >>> index 8f8a0f9a4953..e76d5e37480d 100644
> >>> --- a/arch/loongarch/include/asm/vdso.h
> >>> +++ b/arch/loongarch/include/asm/vdso.h
> >>> @@ -12,6 +12,10 @@
> >>>
> >>>   #include <asm/barrier.h>
> >>>
> >>> +typedef struct vdso_pcpu_data {
> >>> +       u32 node;
> >>> +} ____cacheline_aligned_in_smp vdso_pcpu_data;
> >>> +
> >>>   /*
> >>>    * struct loongarch_vdso_info - Details of a VDSO image.
> >>>    * @vdso: Pointer to VDSO image (page-aligned).
> >>> diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
> >>> index 5a01643a65b3..94055f7c54b7 100644
> >>> --- a/arch/loongarch/include/asm/vdso/vdso.h
> >>> +++ b/arch/loongarch/include/asm/vdso/vdso.h
> >>> @@ -8,6 +8,13 @@
> >>>
> >>>   #include <asm/asm.h>
> >>>   #include <asm/page.h>
> >>> +#include <asm/vdso.h>
> >>> +
> >>> +#if PAGE_SIZE < SZ_16K
> >>> +#define VDSO_DATA_SIZE SZ_16K
> >> Whether we add members to the vdso data structure or extend
> >> SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and
> >> there is no assertion checking to help us catch bugs early. So I
> >> suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data),
> >> PAGE_SIZE).
> > VSYSCALL usage is very limited (you know, VSYSCALL appears for so many
> > years, but the number nearly doesn't increase until now), so I think
> > 16KB is enough in the future.
>
> I don't think omitting compile-time assertions for *correctness* is
> worth the negligible improvement in brevity and ease of maintenance. In
> fact, static checks for correctness actually *lightens* maintenance
> burden, by explicitly calling out the assumptions so that newcomers
> (i.e. me or some other random linux/arch developer refactoring code)
> would find them very helpful.
>
> So I'm in support for declaring the VDSO_DATA_SIZE explicitly in terms
> of sizeof(struct vdso_data) and PAGE_SIZE.
I'll use hev's method, thank you.

Huacai

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-06-20  4:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-17 14:58 [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu() Huacai Chen
2022-06-17 15:34 ` hev
2022-06-18  9:10   ` Huacai Chen
2022-06-18 12:56     ` WANG Xuerui
2022-06-20  4:26       ` Huacai Chen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.