All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huacai Chen <chenhuacai@kernel.org>
To: hev <r@hev.cc>
Cc: Huacai Chen <chenhuacai@loongson.cn>,
	Arnd Bergmann <arnd@arndb.de>,
	loongarch@lists.linux.dev,
	 linux-arch <linux-arch@vger.kernel.org>,
	Xuefeng Li <lixuefeng@loongson.cn>,  Guo Ren <guoren@kernel.org>,
	Xuerui Wang <kernel@xen0n.name>,
	 Jiaxun Yang <jiaxun.yang@flygoat.com>
Subject: Re: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
Date: Sat, 18 Jun 2022 17:10:09 +0800	[thread overview]
Message-ID: <CAAhV-H4=04qygAFqm36RBM-ktXhO7M8HMBeCPBOnB8xYz268Zw@mail.gmail.com> (raw)
In-Reply-To: <CAHirt9hRs_iTvAZ=UxBBK448j7p+pYxKsMVise=Jj2qCtNky2Q@mail.gmail.com>

Hi,

On Fri, Jun 17, 2022 at 11:35 PM hev <r@hev.cc> wrote:
>
> Hello,
>
> On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
> >
> > We test 20 million times of getcpu(), the real syscall version take 25
> > seconds, while the vsyscall version take only 2.4 seconds.
> >
> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> > ---
> >  arch/loongarch/include/asm/vdso.h      |  4 +++
> >  arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
> >  arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
> >  arch/loongarch/vdso/Makefile           |  3 +-
> >  arch/loongarch/vdso/vdso.lds.S         |  1 +
> >  arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
> >  6 files changed, 74 insertions(+), 10 deletions(-)
> >  create mode 100644 arch/loongarch/vdso/vgetcpu.c
> >
> > diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
> > index 8f8a0f9a4953..e76d5e37480d 100644
> > --- a/arch/loongarch/include/asm/vdso.h
> > +++ b/arch/loongarch/include/asm/vdso.h
> > @@ -12,6 +12,10 @@
> >
> >  #include <asm/barrier.h>
> >
> > +typedef struct vdso_pcpu_data {
> > +       u32 node;
> > +} ____cacheline_aligned_in_smp vdso_pcpu_data;
> > +
> >  /*
> >   * struct loongarch_vdso_info - Details of a VDSO image.
> >   * @vdso: Pointer to VDSO image (page-aligned).
> > diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
> > index 5a01643a65b3..94055f7c54b7 100644
> > --- a/arch/loongarch/include/asm/vdso/vdso.h
> > +++ b/arch/loongarch/include/asm/vdso/vdso.h
> > @@ -8,6 +8,13 @@
> >
> >  #include <asm/asm.h>
> >  #include <asm/page.h>
> > +#include <asm/vdso.h>
> > +
> > +#if PAGE_SIZE < SZ_16K
> > +#define VDSO_DATA_SIZE SZ_16K
>
> Whether we add members to the vdso data structure or extend
> SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and
> there is no assertion checking to help us catch bugs early. So I
> suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data),
> PAGE_SIZE).
VSYSCALL usage is very limited (you know, VSYSCALL appears for so many
years, but the number nearly doesn't increase until now), so I think
16KB is enough in the future.

Huacai
>
> hev
>
> > +#else
> > +#define VDSO_DATA_SIZE PAGE_SIZE
> > +#endif
> >
> >  static inline unsigned long get_vdso_base(void)
> >  {
> > @@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void)
> >
> >  static inline const struct vdso_data *get_vdso_data(void)
> >  {
> > -       return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
> > +       return (const struct vdso_data *)(get_vdso_base()
> > +                       - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
> >  }
> >
> >  #endif /* __ASSEMBLY__ */
> > diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
> > index e20c8ca87473..6ce322a1bf8b 100644
> > --- a/arch/loongarch/kernel/vdso.c
> > +++ b/arch/loongarch/kernel/vdso.c
> > @@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[];
> >
> >  /* Kernel-provided data used by the VDSO. */
> >  static union loongarch_vdso_data {
> > -       u8 page[PAGE_SIZE];
> > -       struct vdso_data data[CS_BASES];
> > +       u8 page[VDSO_DATA_SIZE];
> > +       struct {
> > +               vdso_pcpu_data pdata[NR_CPUS];
> > +               struct vdso_data data[CS_BASES];
> > +       };
> >  } loongarch_vdso_data __page_aligned_data;
> > -struct vdso_data *vdso_data = loongarch_vdso_data.data;
> > +
> >  static struct page *vdso_pages[] = { NULL };
> > +struct vdso_data *vdso_data = loongarch_vdso_data.data;
> >
> >  static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
> >  {
> > @@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = {
> >
> >  static int __init init_vdso(void)
> >  {
> > -       unsigned long i, pfn;
> > +       unsigned long i, cpu, pfn;
> >
> >         BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
> >         BUG_ON(!PAGE_ALIGNED(vdso_info.size));
> >
> > +       for_each_possible_cpu(cpu)
> > +               loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu);
> > +
> >         pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
> >         for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
> >                 vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
> > @@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
> >
> >         /*
> >          * Determine total area size. This includes the VDSO data itself
> > -        * and the data page.
> > +        * and the data pages.
> >          */
> > -       vvar_size = PAGE_SIZE;
> > +       vvar_size = VDSO_DATA_SIZE;
> >         size = vvar_size + info->size;
> >
> >         data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
> > @@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
> >
> >         /* Map VDSO data page. */
> >         ret = remap_pfn_range(vma, data_addr,
> > -                             virt_to_phys(vdso_data) >> PAGE_SHIFT,
> > -                             PAGE_SIZE, PAGE_READONLY);
> > +                             virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
> > +                             vvar_size, PAGE_READONLY);
> >         if (ret)
> >                 goto out;
> >
> > diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
> > index 6b6e16732c60..d89e2ac75f7b 100644
> > --- a/arch/loongarch/vdso/Makefile
> > +++ b/arch/loongarch/vdso/Makefile
> > @@ -6,7 +6,7 @@
> >  ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
> >  include $(srctree)/lib/vdso/Makefile
> >
> > -obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
> > +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
> >
> >  # Common compiler flags between ABIs.
> >  ccflags-vdso := \
> > @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
> >  endif
> >
> >  cflags-vdso := $(ccflags-vdso) \
> > +       -isystem $(shell $(CC) -print-file-name=include) \
> >         $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
> >         -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
> >         -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
> > diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S
> > index 955f02de4a2d..56ad855896de 100644
> > --- a/arch/loongarch/vdso/vdso.lds.S
> > +++ b/arch/loongarch/vdso/vdso.lds.S
> > @@ -58,6 +58,7 @@ VERSION
> >  {
> >         LINUX_5.10 {
> >         global:
> > +               __vdso_getcpu;
> >                 __vdso_clock_getres;
> >                 __vdso_clock_gettime;
> >                 __vdso_gettimeofday;
> > diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
> > new file mode 100644
> > index 000000000000..23fe2362f4e0
> > --- /dev/null
> > +++ b/arch/loongarch/vdso/vgetcpu.c
> > @@ -0,0 +1,43 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Fast user context implementation of getcpu()
> > + */
> > +
> > +#include <asm/vdso.h>
> > +#include <linux/getcpu.h>
> > +
> > +static __always_inline int read_cpu_id(void)
> > +{
> > +       int cpu_id;
> > +
> > +       __asm__ __volatile__(
> > +       "       rdtime.d $zero, %0\n"
> > +       : "=r" (cpu_id)
> > +       :
> > +       : "memory");
> > +
> > +       return cpu_id;
> > +}
> > +
> > +static __always_inline const vdso_pcpu_data *get_pcpu_data(void)
> > +{
> > +       return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
> > +}
> > +
> > +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
> > +{
> > +       int cpu_id;
> > +       const vdso_pcpu_data *data;
> > +
> > +       cpu_id = read_cpu_id();
> > +
> > +       if (cpu)
> > +               *cpu = cpu_id;
> > +
> > +       if (node) {
> > +               data = get_pcpu_data();
> > +               *node = data[cpu_id].node;
> > +       }
> > +
> > +       return 0;
> > +}
> > --
> > 2.27.0
> >
> >

  reply	other threads:[~2022-06-18  9:10 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-17 14:58 [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu() Huacai Chen
2022-06-17 15:34 ` hev
2022-06-18  9:10   ` Huacai Chen [this message]
2022-06-18 12:56     ` WANG Xuerui
2022-06-20  4:26       ` Huacai Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAAhV-H4=04qygAFqm36RBM-ktXhO7M8HMBeCPBOnB8xYz268Zw@mail.gmail.com' \
    --to=chenhuacai@kernel.org \
    --cc=arnd@arndb.de \
    --cc=chenhuacai@loongson.cn \
    --cc=guoren@kernel.org \
    --cc=jiaxun.yang@flygoat.com \
    --cc=kernel@xen0n.name \
    --cc=linux-arch@vger.kernel.org \
    --cc=lixuefeng@loongson.cn \
    --cc=loongarch@lists.linux.dev \
    --cc=r@hev.cc \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.