All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huacai Chen <chenhuacai@loongson.cn>
To: Arnd Bergmann <arnd@arndb.de>, Huacai Chen <chenhuacai@kernel.org>
Cc: loongarch@lists.linux.dev, linux-arch@vger.kernel.org,
	Xuefeng Li <lixuefeng@loongson.cn>, Guo Ren <guoren@kernel.org>,
	Xuerui Wang <kernel@xen0n.name>,
	Jiaxun Yang <jiaxun.yang@flygoat.com>,
	Huacai Chen <chenhuacai@loongson.cn>
Subject: [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu()
Date: Fri, 17 Jun 2022 22:58:28 +0800	[thread overview]
Message-ID: <20220617145828.582117-1-chenhuacai@loongson.cn> (raw)

We test 20 million times of getcpu(), the real syscall version take 25
seconds, while the vsyscall version take only 2.4 seconds.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/vdso.h      |  4 +++
 arch/loongarch/include/asm/vdso/vdso.h | 10 +++++-
 arch/loongarch/kernel/vdso.c           | 23 +++++++++-----
 arch/loongarch/vdso/Makefile           |  3 +-
 arch/loongarch/vdso/vdso.lds.S         |  1 +
 arch/loongarch/vdso/vgetcpu.c          | 43 ++++++++++++++++++++++++++
 6 files changed, 74 insertions(+), 10 deletions(-)
 create mode 100644 arch/loongarch/vdso/vgetcpu.c

diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h
index 8f8a0f9a4953..e76d5e37480d 100644
--- a/arch/loongarch/include/asm/vdso.h
+++ b/arch/loongarch/include/asm/vdso.h
@@ -12,6 +12,10 @@
 
 #include <asm/barrier.h>
 
+typedef struct vdso_pcpu_data {
+	u32 node;
+} ____cacheline_aligned_in_smp vdso_pcpu_data;
+
 /*
  * struct loongarch_vdso_info - Details of a VDSO image.
  * @vdso: Pointer to VDSO image (page-aligned).
diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
index 5a01643a65b3..94055f7c54b7 100644
--- a/arch/loongarch/include/asm/vdso/vdso.h
+++ b/arch/loongarch/include/asm/vdso/vdso.h
@@ -8,6 +8,13 @@
 
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <asm/vdso.h>
+
+#if PAGE_SIZE < SZ_16K
+#define VDSO_DATA_SIZE SZ_16K
+#else
+#define VDSO_DATA_SIZE PAGE_SIZE
+#endif
 
 static inline unsigned long get_vdso_base(void)
 {
@@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void)
 
 static inline const struct vdso_data *get_vdso_data(void)
 {
-	return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE);
+	return (const struct vdso_data *)(get_vdso_base()
+			- VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS);
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
index e20c8ca87473..6ce322a1bf8b 100644
--- a/arch/loongarch/kernel/vdso.c
+++ b/arch/loongarch/kernel/vdso.c
@@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[];
 
 /* Kernel-provided data used by the VDSO. */
 static union loongarch_vdso_data {
-	u8 page[PAGE_SIZE];
-	struct vdso_data data[CS_BASES];
+	u8 page[VDSO_DATA_SIZE];
+	struct {
+		vdso_pcpu_data pdata[NR_CPUS];
+		struct vdso_data data[CS_BASES];
+	};
 } loongarch_vdso_data __page_aligned_data;
-struct vdso_data *vdso_data = loongarch_vdso_data.data;
+
 static struct page *vdso_pages[] = { NULL };
+struct vdso_data *vdso_data = loongarch_vdso_data.data;
 
 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
 {
@@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = {
 
 static int __init init_vdso(void)
 {
-	unsigned long i, pfn;
+	unsigned long i, cpu, pfn;
 
 	BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
 	BUG_ON(!PAGE_ALIGNED(vdso_info.size));
 
+	for_each_possible_cpu(cpu)
+		loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu);
+
 	pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
 	for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
 		vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
@@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	/*
 	 * Determine total area size. This includes the VDSO data itself
-	 * and the data page.
+	 * and the data pages.
 	 */
-	vvar_size = PAGE_SIZE;
+	vvar_size = VDSO_DATA_SIZE;
 	size = vvar_size + info->size;
 
 	data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
@@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	/* Map VDSO data page. */
 	ret = remap_pfn_range(vma, data_addr,
-			      virt_to_phys(vdso_data) >> PAGE_SHIFT,
-			      PAGE_SIZE, PAGE_READONLY);
+			      virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT,
+			      vvar_size, PAGE_READONLY);
 	if (ret)
 		goto out;
 
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index 6b6e16732c60..d89e2ac75f7b 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -6,7 +6,7 @@
 ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
 include $(srctree)/lib/vdso/Makefile
 
-obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
+obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
 
 # Common compiler flags between ABIs.
 ccflags-vdso := \
@@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
 
 cflags-vdso := $(ccflags-vdso) \
+	-isystem $(shell $(CC) -print-file-name=include) \
 	$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
 	-O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
 	-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S
index 955f02de4a2d..56ad855896de 100644
--- a/arch/loongarch/vdso/vdso.lds.S
+++ b/arch/loongarch/vdso/vdso.lds.S
@@ -58,6 +58,7 @@ VERSION
 {
 	LINUX_5.10 {
 	global:
+		__vdso_getcpu;
 		__vdso_clock_getres;
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
new file mode 100644
index 000000000000..23fe2362f4e0
--- /dev/null
+++ b/arch/loongarch/vdso/vgetcpu.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Fast user context implementation of getcpu()
+ */
+
+#include <asm/vdso.h>
+#include <linux/getcpu.h>
+
+static __always_inline int read_cpu_id(void)
+{
+	int cpu_id;
+
+	__asm__ __volatile__(
+	"	rdtime.d $zero, %0\n"
+	: "=r" (cpu_id)
+	:
+	: "memory");
+
+	return cpu_id;
+}
+
+static __always_inline const vdso_pcpu_data *get_pcpu_data(void)
+{
+	return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE);
+}
+
+int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
+{
+	int cpu_id;
+	const vdso_pcpu_data *data;
+
+	cpu_id = read_cpu_id();
+
+	if (cpu)
+		*cpu = cpu_id;
+
+	if (node) {
+		data = get_pcpu_data();
+		*node = data[cpu_id].node;
+	}
+
+	return 0;
+}
-- 
2.27.0


             reply	other threads:[~2022-06-17 14:57 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-17 14:58 Huacai Chen [this message]
2022-06-17 15:34 ` [PATCH] LoongArch: Add vDSO syscall __vdso_getcpu() hev
2022-06-18  9:10   ` Huacai Chen
2022-06-18 12:56     ` WANG Xuerui
2022-06-20  4:26       ` Huacai Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220617145828.582117-1-chenhuacai@loongson.cn \
    --to=chenhuacai@loongson.cn \
    --cc=arnd@arndb.de \
    --cc=chenhuacai@kernel.org \
    --cc=guoren@kernel.org \
    --cc=jiaxun.yang@flygoat.com \
    --cc=kernel@xen0n.name \
    --cc=linux-arch@vger.kernel.org \
    --cc=lixuefeng@loongson.cn \
    --cc=loongarch@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.