linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
@ 2021-11-21  9:35 Kefeng Wang
  2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-21  9:35 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86, Kefeng Wang

When support page mapping percpu first chunk allocator on arm64, we
found there are lots of duplicated codes in percpu embed/page first
chunk allocator. This patchset is aimed to cleanup them and should
no funciton change, only test on arm64.

Kefeng Wang (4):
  mm: percpu: Generalize percpu related config
  mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
  mm: percpu: Add generic pcpu_fc_alloc/free funciton
  mm: percpu: Add generic pcpu_populate_pte() function

 arch/arm64/Kconfig             |  20 +----
 arch/ia64/Kconfig              |   9 +--
 arch/mips/Kconfig              |  10 +--
 arch/mips/mm/init.c            |  14 +---
 arch/powerpc/Kconfig           |  17 +---
 arch/powerpc/kernel/setup_64.c |  92 +--------------------
 arch/riscv/Kconfig             |  10 +--
 arch/sparc/Kconfig             |  12 +--
 arch/sparc/kernel/smp_64.c     | 105 +-----------------------
 arch/x86/Kconfig               |  17 +---
 arch/x86/kernel/setup_percpu.c |  66 ++-------------
 drivers/base/arch_numa.c       |  68 +---------------
 include/linux/percpu.h         |  13 +--
 mm/Kconfig                     |  12 +++
 mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
 15 files changed, 165 insertions(+), 443 deletions(-)

-- 
2.26.2



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH RFC 1/4] mm: percpu: Generalize percpu related config
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
@ 2021-11-21  9:35 ` Kefeng Wang
  2021-11-29 22:36   ` Dennis Zhou
  2021-12-03 18:54   ` Catalin Marinas
  2021-11-21  9:35 ` [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef Kefeng Wang
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-21  9:35 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86, Kefeng Wang

The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
which has duplicate definitions on platforms that subscribe it.

Move them into mm, drop these redundant definitions and instead
just select it on applicable platforms.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/arm64/Kconfig   | 20 ++++----------------
 arch/ia64/Kconfig    |  9 ++-------
 arch/mips/Kconfig    | 10 ++--------
 arch/powerpc/Kconfig | 17 ++++-------------
 arch/riscv/Kconfig   | 10 ++--------
 arch/sparc/Kconfig   | 12 +++---------
 arch/x86/Kconfig     | 17 ++++-------------
 mm/Kconfig           | 12 ++++++++++++
 8 files changed, 33 insertions(+), 74 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4207cf9bb17..4ff73299f8a9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1135,6 +1135,10 @@ config NUMA
 	select GENERIC_ARCH_NUMA
 	select ACPI_NUMA if ACPI
 	select OF_NUMA
+	select HAVE_SETUP_PER_CPU_AREA
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
+	select NEED_PER_CPU_PAGE_FIRST_CHUNK
+	select USE_PERCPU_NUMA_NODE_ID
 	help
 	  Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -1151,22 +1155,6 @@ config NODES_SHIFT
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
 
-config USE_PERCPU_NUMA_NODE_ID
-	def_bool y
-	depends on NUMA
-
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool y
-	depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y
-	depends on NUMA
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-	def_bool y
-	depends on NUMA
-
 source "kernel/Kconfig.hz"
 
 config ARCH_SPARSEMEM_ENABLE
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1e33666fa679..703952819e10 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config IA64
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
+	select HAVE_SETUP_PER_CPU_AREA
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_VIRT_CPU_ACCOUNTING
@@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool y
-
 config DMI
 	bool
 	default y
@@ -292,6 +290,7 @@ config NUMA
 	bool "NUMA support"
 	depends on !FLATMEM
 	select SMP
+	select USE_PERCPU_NUMA_NODE_ID
 	help
 	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
 	  Access).  This option is for configuring high-end multiprocessor
@@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
 	def_bool y
 	depends on NUMA
 
-config USE_PERCPU_NUMA_NODE_ID
-	def_bool y
-	depends on NUMA
-
 config HAVE_MEMORYLESS_NODES
 	def_bool NUMA
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index de60ad190057..c106a2080877 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2666,6 +2666,8 @@ config NUMA
 	bool "NUMA Support"
 	depends on SYS_SUPPORTS_NUMA
 	select SMP
+	select HAVE_SETUP_PER_CPU_AREA
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	help
 	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
 	  Access).  This option improves performance on systems with more
@@ -2676,14 +2678,6 @@ config NUMA
 config SYS_SUPPORTS_NUMA
 	bool
 
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool y
-	depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y
-	depends on NUMA
-
 config RELOCATABLE
 	bool "Relocatable kernel"
 	depends on SYS_SUPPORTS_RELOCATABLE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dea74d7717c0..8badd39854a0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
 	default 9 if PPC_16K_PAGES	#  9 = 23 (8MB) - 14 (16K)
 	default 11			# 11 = 23 (8MB) - 12 (4K)
 
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y if PPC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-	def_bool y if PPC64
-
 config NR_IRQS
 	int "Number of virtual interrupt numbers"
 	range 32 1048576
@@ -240,6 +231,7 @@ config PPC
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
+	select HAVE_SETUP_PER_CPU_AREA		if PPC64
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
@@ -254,6 +246,8 @@ config PPC
 	select MMU_GATHER_RCU_TABLE_FREE
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK	if PPC64
+	select NEED_PER_CPU_PAGE_FIRST_CHUNK	if PPC64
 	select NEED_SG_DMA_LENGTH
 	select OF
 	select OF_DMA_DEFAULT_COHERENT		if !NOT_COHERENT_CACHE
@@ -659,6 +653,7 @@ config NUMA
 	bool "NUMA Memory Allocation and Scheduler Support"
 	depends on PPC64 && SMP
 	default y if PPC_PSERIES || PPC_POWERNV
+	select USE_PERCPU_NUMA_NODE_ID
 	help
 	  Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -672,10 +667,6 @@ config NODES_SHIFT
 	default "4"
 	depends on NUMA
 
-config USE_PERCPU_NUMA_NODE_ID
-	def_bool y
-	depends on NUMA
-
 config HAVE_MEMORYLESS_NODES
 	def_bool y
 	depends on NUMA
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 821252b65f89..bf66bcbc5a39 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -334,6 +334,8 @@ config NUMA
 	select GENERIC_ARCH_NUMA
 	select OF_NUMA
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select USE_PERCPU_NUMA_NODE_ID
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	help
 	  Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -349,14 +351,6 @@ config NODES_SHIFT
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
 
-config USE_PERCPU_NUMA_NODE_ID
-	def_bool y
-	depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y
-	depends on NUMA
-
 config RISCV_ISA_C
 	bool "Emit compressed instructions when building Linux"
 	default y
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 66fc08646be5..a6765e0fe6a8 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -97,6 +97,9 @@ config SPARC64
 	select PCI_DOMAINS if PCI
 	select ARCH_HAS_GIGANTIC_PAGE
 	select HAVE_SOFTIRQ_ON_OWN_STACK
+	select HAVE_SETUP_PER_CPU_AREA
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
+	select NEED_PER_CPU_PAGE_FIRST_CHUUNK
 
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
@@ -123,15 +126,6 @@ config AUDIT_ARCH
 	bool
 	default y
 
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool y if SPARC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y if SPARC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-	def_bool y if SPARC64
-
 config MMU
 	bool
 	default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7399327d1eff..ca120a1f5857 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -239,6 +239,7 @@ config X86
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
 	select HAVE_FUNCTION_ARG_ACCESS_API
+	select HAVE_SETUP_PER_CPU_AREA
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
 	select HAVE_STACK_VALIDATION		if X86_64
@@ -252,6 +253,8 @@ config X86
 	select HAVE_GENERIC_VDSO
 	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
+	select NEED_PER_CPU_PAGE_FIRST_CHUNK
 	select NEED_SG_DMA_LENGTH
 	select PCI_DOMAINS			if PCI
 	select PCI_LOCKLESS_CONFIG		if PCI
@@ -331,15 +334,6 @@ config ARCH_HAS_CPU_RELAX
 config ARCH_HAS_FILTER_PGPROT
 	def_bool y
 
-config HAVE_SETUP_PER_CPU_AREA
-	def_bool y
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-	def_bool y
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-	def_bool y
-
 config ARCH_HIBERNATION_POSSIBLE
 	def_bool y
 
@@ -1557,6 +1551,7 @@ config NUMA
 	depends on SMP
 	depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
 	default y if X86_BIGSMP
+	select USE_PERCPU_NUMA_NODE_ID
 	help
 	  Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -2430,10 +2425,6 @@ config ARCH_HAS_ADD_PAGES
 config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 	def_bool y
 
-config USE_PERCPU_NUMA_NODE_ID
-	def_bool y
-	depends on NUMA
-
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
diff --git a/mm/Kconfig b/mm/Kconfig
index 28edafc820ad..6bc5d780c51b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
 	bool
 	default y
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	bool
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	bool
+
+config USE_PERCPU_NUMA_NODE_ID
+	bool
+
+config HAVE_SETUP_PER_CPU_AREA
+	bool
+
 config CLEANCACHE
 	bool "Enable cleancache driver to cache clean pages if tmem is present"
 	help
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
  2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
@ 2021-11-21  9:35 ` Kefeng Wang
  2021-11-29 22:40   ` Dennis Zhou
  2021-11-21  9:35 ` [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton Kefeng Wang
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Kefeng Wang @ 2021-11-21  9:35 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86, Kefeng Wang

Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
pcpu first chunk allocation will call it to alloc memblock on the
corresponding node by it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/mips/mm/init.c            | 12 +++++++++---
 arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
 arch/sparc/kernel/smp_64.c     |  8 +++++---
 arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
 drivers/base/arch_numa.c       |  8 +++++---
 include/linux/percpu.h         |  7 +++++--
 mm/percpu.c                    | 14 +++++++++-----
 7 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 325e1552cbea..ebbf6923532c 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 	return node_distance(cpu_to_node(from), cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
-				       size_t align)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return cpu_to_node(cpu);
+}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
 				      MEMBLOCK_ALLOC_ACCESSIBLE,
-				      cpu_to_node(cpu));
+				      cpu_to_nd_fun(cpu));
 }
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
@@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void)
 	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
 				    pcpu_cpu_distance,
+				    pcpu_cpu_to_node,
 				    pcpu_fc_alloc, pcpu_fc_free);
 	if (rc < 0)
 		panic("Failed to initialize percpu areas.");
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6052f5d5ded3..9a5609c821df 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
 }
 
 #ifdef CONFIG_SMP
+
+static __init int pcpu_cpu_to_node(int cpu)
+{
+	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
+}
+
 /**
  * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
  * @cpu: cpu to allocate for
@@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
-					size_t align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
+					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NUMA
-	int node = early_cpu_to_node(cpu);
+	int node = cpu_to_nd_fun(cpu);
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
@@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void)
 
 	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+					    pcpu_cpu_to_node,
 					    pcpu_alloc_bootmem, pcpu_free_bootmem);
 		if (rc)
 			pr_warn("PERCPU: %s allocator failed (%d), "
@@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void)
 
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
+					   pcpu_cpu_to_node,
 					   pcpu_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b98a7bbe6728..026aa3ccbc30 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1539,12 +1539,12 @@ void smp_send_stop(void)
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
-					size_t align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
+					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NUMA
-	int node = cpu_to_node(cpu);
+	int node = cpu_to_nd_fn(cpu);
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
@@ -1641,6 +1641,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					    PERCPU_DYNAMIC_RESERVE, 4 << 20,
 					    pcpu_cpu_distance,
+					    cpu_to_node,
 					    pcpu_alloc_bootmem,
 					    pcpu_free_bootmem);
 		if (rc)
@@ -1652,6 +1653,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
 					   pcpu_alloc_bootmem,
 					   pcpu_free_bootmem,
+					   cpu_to_node,
 					   pcpu_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7b65275544b2..bba4fa174a16 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -97,12 +97,12 @@ static bool __init pcpu_need_numa(void)
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
-					unsigned long align)
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
+					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NUMA
-	int node = early_cpu_to_node(cpu);
+	int node = cpu_to_nd_fn(cpu);
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
@@ -128,9 +128,10 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 /*
  * Helpers for first chunk memory allocation
  */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
-	return pcpu_alloc_bootmem(cpu, size, align);
+	return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
 }
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
@@ -150,6 +151,11 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 #endif
 }
 
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
+}
+
 static void __init pcpup_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
@@ -205,6 +211,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 					    dyn_size, atom_size,
 					    pcpu_cpu_distance,
+					    pcpu_cpu_to_node,
 					    pcpu_fc_alloc, pcpu_fc_free);
 		if (rc < 0)
 			pr_warn("%s allocator failed (%d), falling back to page size\n",
@@ -213,6 +220,7 @@ void __init setup_per_cpu_areas(void)
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 					   pcpu_fc_alloc, pcpu_fc_free,
+					   pcpu_cpu_to_node,
 					   pcpup_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index bc1876915457..273543d9ff85 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -155,10 +155,10 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
-				       size_t align)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
-	int nid = early_cpu_to_node(cpu);
+	int nid = cpu_to_nd_fn(cpu);
 
 	return  memblock_alloc_try_nid(size, align,
 			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
@@ -229,6 +229,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
 					    pcpu_cpu_distance,
+					    early_cpu_to_node,
 					    pcpu_fc_alloc, pcpu_fc_free);
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 		if (rc < 0)
@@ -242,6 +243,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
 					   pcpu_fc_alloc,
 					   pcpu_fc_free,
+					   early_cpu_to_node,
 					   pcpu_populate_pte);
 #endif
 	if (rc < 0)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ae4004e7957e..41bb54715b0c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 
 extern enum pcpu_fc pcpu_chosen_fc;
 
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
-				     size_t align);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
+				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
@@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn);
 #endif
@@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index f5b2c2ea5a54..3f6cf1ff0be2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3001,6 +3001,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
  * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
  * @alloc_fn: function to allocate percpu page
  * @free_fn: function to free percpu page
  *
@@ -3030,6 +3031,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
 int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				  size_t atom_size,
 				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				  pcpu_fc_alloc_fn_t alloc_fn,
 				  pcpu_fc_free_fn_t free_fn)
 {
@@ -3066,7 +3068,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 		BUG_ON(cpu == NR_CPUS);
 
 		/* allocate space for the whole group */
-		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
 		if (!ptr) {
 			rc = -ENOMEM;
 			goto out_free_areas;
@@ -3145,6 +3147,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
  * @reserved_size: the size of reserved percpu area in bytes
  * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
  * @free_fn: function to free percpu page, always called with PAGE_SIZE
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
  * @populate_pte_fn: function to populate pte
  *
  * This is a helper to ease setting up page-remapped first percpu
@@ -3159,6 +3162,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 int __init pcpu_page_first_chunk(size_t reserved_size,
 				 pcpu_fc_alloc_fn_t alloc_fn,
 				 pcpu_fc_free_fn_t free_fn,
+				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
@@ -3201,7 +3205,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
-			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
 			if (!ptr) {
 				pr_warn("failed to allocate %s page for cpu%u\n",
 						psize_str, cpu);
@@ -3278,8 +3282,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
-				       size_t align)
+static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
+				       pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
 }
@@ -3300,7 +3304,7 @@ void __init setup_per_cpu_areas(void)
 	 * what the legacy allocator did.
 	 */
 	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
 				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
 		panic("Failed to initialize percpu areas.");
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
  2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
  2021-11-21  9:35 ` [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef Kefeng Wang
@ 2021-11-21  9:35 ` Kefeng Wang
  2021-11-29 22:45   ` Dennis Zhou
  2021-11-21  9:35 ` [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function Kefeng Wang
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Kefeng Wang @ 2021-11-21  9:35 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86, Kefeng Wang

With previous patch, we could add a generic pcpu first chunk
allocation and free function to cleanup the duplicated definations
on each architecture.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/mips/mm/init.c            | 16 +--------
 arch/powerpc/kernel/setup_64.c | 51 ++------------------------
 arch/sparc/kernel/smp_64.c     | 50 +-------------------------
 arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
 drivers/base/arch_numa.c       | 19 +---------
 include/linux/percpu.h         |  9 +----
 mm/percpu.c                    | 66 ++++++++++++++++++----------------
 7 files changed, 42 insertions(+), 228 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index ebbf6923532c..5a8002839550 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu)
 	return cpu_to_node(cpu);
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
-				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
-				      MEMBLOCK_ALLOC_ACCESSIBLE,
-				      cpu_to_nd_fun(cpu));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void)
 	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
 				    pcpu_cpu_distance,
-				    pcpu_cpu_to_node,
-				    pcpu_fc_alloc, pcpu_fc_free);
+				    pcpu_cpu_to_node);
 	if (rc < 0)
 		panic("Failed to initialize percpu areas.");
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9a5609c821df..364b1567f822 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu)
 	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
 }
 
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
-					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-	int node = cpu_to_nd_fun(cpu);
-	void *ptr;
-
-	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = memblock_alloc_from(size, align, goal);
-		pr_info("cpu %d has no node %d or node-local memory\n",
-			cpu, node);
-		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-			 cpu, size, __pa(ptr));
-	} else {
-		ptr = memblock_alloc_try_nid(size, align, goal,
-					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
-		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
-			 "%016lx\n", cpu, size, node, __pa(ptr));
-	}
-	return ptr;
-#else
-	return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
 	if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void)
 
 	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-					    pcpu_cpu_to_node,
-					    pcpu_alloc_bootmem, pcpu_free_bootmem);
+					    pcpu_cpu_to_node);
 		if (rc)
 			pr_warn("PERCPU: %s allocator failed (%d), "
 				"falling back to page size\n",
@@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void)
 	}
 
 	if (rc < 0)
-		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
-					   pcpu_cpu_to_node,
-					   pcpu_populate_pte);
+		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 026aa3ccbc30..198dadddb75d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1526,50 +1526,6 @@ void smp_send_stop(void)
 		smp_call_function(stop_this_cpu, NULL, 0);
 }
 
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
-					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-	int node = cpu_to_nd_fn(cpu);
-	void *ptr;
-
-	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = memblock_alloc_from(size, align, goal);
-		pr_info("cpu %d has no node %d or node-local memory\n",
-			cpu, node);
-		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-			 cpu, size, __pa(ptr));
-	} else {
-		ptr = memblock_alloc_try_nid(size, align, goal,
-					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
-		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
-			 "%016lx\n", cpu, size, node, __pa(ptr));
-	}
-	return ptr;
-#else
-	return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
 	if (cpu_to_node(from) == cpu_to_node(to))
@@ -1641,9 +1597,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					    PERCPU_DYNAMIC_RESERVE, 4 << 20,
 					    pcpu_cpu_distance,
-					    cpu_to_node,
-					    pcpu_alloc_bootmem,
-					    pcpu_free_bootmem);
+					    cpu_to_node);
 		if (rc)
 			pr_warn("PERCPU: %s allocator failed (%d), "
 				"falling back to page size\n",
@@ -1651,8 +1605,6 @@ void __init setup_per_cpu_areas(void)
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-					   pcpu_alloc_bootmem,
-					   pcpu_free_bootmem,
 					   cpu_to_node,
 					   pcpu_populate_pte);
 	if (rc < 0)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bba4fa174a16..cd672bd46241 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -84,61 +84,6 @@ static bool __init pcpu_need_numa(void)
 }
 #endif
 
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
-					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-	int node = cpu_to_nd_fn(cpu);
-	void *ptr;
-
-	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = memblock_alloc_from(size, align, goal);
-		pr_info("cpu %d has no node %d or node-local memory\n",
-			cpu, node);
-		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-			 cpu, size, __pa(ptr));
-	} else {
-		ptr = memblock_alloc_try_nid(size, align, goal,
-					     MEMBLOCK_ALLOC_ACCESSIBLE,
-					     node);
-
-		pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
-			 cpu, size, node, __pa(ptr));
-	}
-	return ptr;
-#else
-	return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
-				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
 #ifdef CONFIG_NUMA
@@ -211,15 +156,13 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 					    dyn_size, atom_size,
 					    pcpu_cpu_distance,
-					    pcpu_cpu_to_node,
-					    pcpu_fc_alloc, pcpu_fc_free);
+					    pcpu_cpu_to_node);
 		if (rc < 0)
 			pr_warn("%s allocator failed (%d), falling back to page size\n",
 				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
-					   pcpu_fc_alloc, pcpu_fc_free,
 					   pcpu_cpu_to_node,
 					   pcpup_populate_pte);
 	if (rc < 0)
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 273543d9ff85..23a10cc36165 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -155,20 +155,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
-				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	int nid = cpu_to_nd_fn(cpu);
-
-	return  memblock_alloc_try_nid(size, align,
-			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 static void __init pcpu_populate_pte(unsigned long addr)
 {
@@ -229,8 +215,7 @@ void __init setup_per_cpu_areas(void)
 		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
 					    pcpu_cpu_distance,
-					    early_cpu_to_node,
-					    pcpu_fc_alloc, pcpu_fc_free);
+					    early_cpu_to_node);
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 		if (rc < 0)
 			pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
@@ -241,8 +226,6 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-					   pcpu_fc_alloc,
-					   pcpu_fc_free,
 					   early_cpu_to_node,
 					   pcpu_populate_pte);
 #endif
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 41bb54715b0c..d73c97ef4ff4 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 extern enum pcpu_fc pcpu_chosen_fc;
 
 typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
-				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 
@@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
-				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn);
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
diff --git a/mm/percpu.c b/mm/percpu.c
index 3f6cf1ff0be2..efaa1cbaf73d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
 
 	return ai;
 }
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+	int node = NUMA_NO_NODE;
+	void *ptr;
+
+	if (cpu_to_nd_fn)
+		node = cpu_to_nd_fn(cpu);
+
+	if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+		ptr = memblock_alloc_from(size, align, goal);
+	} else {
+		ptr = memblock_alloc_try_nid(size, align, goal,
+					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
+	}
+	return ptr;
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+	memblock_free(ptr, size);
+}
 #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
 
 #if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3002,14 +3026,12 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
  * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
  *
  * If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
  * vmalloc area.  Allocations are always whole multiples of @atom_size
  * aligned to @atom_size.
  *
@@ -3023,7 +3045,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
  * @dyn_size specifies the minimum dynamic area size.
  *
  * If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
@@ -3031,9 +3053,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
 int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				  size_t atom_size,
 				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
-				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				  pcpu_fc_alloc_fn_t alloc_fn,
-				  pcpu_fc_free_fn_t free_fn)
+				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	void *base = (void *)ULONG_MAX;
 	void **areas = NULL;
@@ -3068,7 +3088,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 		BUG_ON(cpu == NR_CPUS);
 
 		/* allocate space for the whole group */
-		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
+		ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
 		if (!ptr) {
 			rc = -ENOMEM;
 			goto out_free_areas;
@@ -3107,12 +3127,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 		for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
 			if (gi->cpu_map[i] == NR_CPUS) {
 				/* unused unit, free whole */
-				free_fn(ptr, ai->unit_size);
+				pcpu_fc_free(ptr, ai->unit_size);
 				continue;
 			}
 			/* copy and return the unused part */
 			memcpy(ptr, __per_cpu_load, ai->static_size);
-			free_fn(ptr + size_sum, ai->unit_size - size_sum);
+			pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
 		}
 	}
 
@@ -3131,7 +3151,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 out_free_areas:
 	for (group = 0; group < ai->nr_groups; group++)
 		if (areas[group])
-			free_fn(areas[group],
+			pcpu_fc_free(areas[group],
 				ai->groups[group].nr_units * ai->unit_size);
 out_free:
 	pcpu_free_alloc_info(ai);
@@ -3145,8 +3165,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
  * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
  * @populate_pte_fn: function to populate pte
  *
@@ -3160,8 +3178,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
  * 0 on success, -errno on failure.
  */
 int __init pcpu_page_first_chunk(size_t reserved_size,
-				 pcpu_fc_alloc_fn_t alloc_fn,
-				 pcpu_fc_free_fn_t free_fn,
 				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
 				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
@@ -3205,7 +3221,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
-			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
+			ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
 			if (!ptr) {
 				pr_warn("failed to allocate %s page for cpu%u\n",
 						psize_str, cpu);
@@ -3257,7 +3273,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 
 enomem:
 	while (--j >= 0)
-		free_fn(page_address(pages[j]), PAGE_SIZE);
+		pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
 	rc = -ENOMEM;
 out_free_ar:
 	memblock_free(pages, pages_size);
@@ -3282,17 +3298,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
-				       pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
-{
-	return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
-	memblock_free(ptr, size);
-}
-
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -3303,9 +3308,8 @@ void __init setup_per_cpu_areas(void)
 	 * Always reserve area for module percpu variables.  That's
 	 * what the legacy allocator did.
 	 */
-	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
-				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+				    PAGE_SIZE, NULL, NULL);
 	if (rc < 0)
 		panic("Failed to initialize percpu areas.");
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
                   ` (2 preceding siblings ...)
  2021-11-21  9:35 ` [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton Kefeng Wang
@ 2021-11-21  9:35 ` Kefeng Wang
  2021-11-29 22:49   ` Dennis Zhou
  2021-11-29  2:51 ` [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
  2021-11-29 22:55 ` Dennis Zhou
  5 siblings, 1 reply; 19+ messages in thread
From: Kefeng Wang @ 2021-11-21  9:35 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86, Kefeng Wang

When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
populate pte, add a generic pcpu populate pte function and switch
to use it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/powerpc/kernel/setup_64.c | 47 +--------------------
 arch/sparc/kernel/smp_64.c     | 57 +------------------------
 arch/x86/kernel/setup_percpu.c |  5 +--
 drivers/base/arch_numa.c       | 51 +---------------------
 include/linux/percpu.h         |  5 +--
 mm/percpu.c                    | 77 +++++++++++++++++++++++++++++++---
 6 files changed, 79 insertions(+), 163 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 364b1567f822..1a17828af77f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
-static void __init pcpu_populate_pte(unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset_k(addr);
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	p4d = p4d_offset(pgd, addr);
-	if (p4d_none(*p4d)) {
-		pud_t *new;
-
-		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
-		if (!new)
-			goto err_alloc;
-		p4d_populate(&init_mm, p4d, new);
-	}
-
-	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud)) {
-		pmd_t *new;
-
-		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pud_populate(&init_mm, pud, new);
-	}
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd)) {
-		pte_t *new;
-
-		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pmd_populate_kernel(&init_mm, pmd, new);
-	}
-
-	return;
-
-err_alloc:
-	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-
-
 void __init setup_per_cpu_areas(void)
 {
 	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
 	}
 
 	if (rc < 0)
-		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
+		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 198dadddb75d..00dffe2d834b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 		return REMOTE_DISTANCE;
 }
 
-static void __init pcpu_populate_pte(unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset_k(addr);
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	if (pgd_none(*pgd)) {
-		pud_t *new;
-
-		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pgd_populate(&init_mm, pgd, new);
-	}
-
-	p4d = p4d_offset(pgd, addr);
-	if (p4d_none(*p4d)) {
-		pud_t *new;
-
-		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		p4d_populate(&init_mm, p4d, new);
-	}
-
-	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud)) {
-		pmd_t *new;
-
-		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pud_populate(&init_mm, pud, new);
-	}
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd)) {
-		pte_t *new;
-
-		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pmd_populate_kernel(&init_mm, pmd, new);
-	}
-
-	return;
-
-err_alloc:
-	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
 				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
-		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-					   cpu_to_node,
-					   pcpu_populate_pte);
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_node);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index cd672bd46241..4eadbe45078e 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
 	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
 }
 
-static void __init pcpup_populate_pte(unsigned long addr)
+void __init pcpu_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
 }
@@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
-					   pcpu_cpu_to_node,
-					   pcpup_populate_pte);
+					   pcpu_cpu_to_node);
 	if (rc < 0)
 		panic("cannot initialize percpu area (err=%d)", rc);
 
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 23a10cc36165..eaa31e567d1e 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,7 +14,6 @@
 #include <linux/of.h>
 
 #include <asm/sections.h>
-#include <asm/pgalloc.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
@@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-static void __init pcpu_populate_pte(unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset_k(addr);
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	p4d = p4d_offset(pgd, addr);
-	if (p4d_none(*p4d)) {
-		pud_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		p4d_populate(&init_mm, p4d, new);
-	}
-
-	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud)) {
-		pmd_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pud_populate(&init_mm, pud, new);
-	}
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd)) {
-		pte_t *new;
-
-		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		if (!new)
-			goto err_alloc;
-		pmd_populate_kernel(&init_mm, pmd, new);
-	}
-
-	return;
-
-err_alloc:
-	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-#endif
-
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 	if (rc < 0)
-		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-					   early_cpu_to_node,
-					   pcpu_populate_pte);
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
 #endif
 	if (rc < 0)
 		panic("Failed to initialize percpu areas (err=%d).", rc);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d73c97ef4ff4..f1ec5ad1351c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 extern enum pcpu_fc pcpu_chosen_fc;
 
 typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
-				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
diff --git a/mm/percpu.c b/mm/percpu.c
index efaa1cbaf73d..d907daed04eb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 #endif /* BUILD_EMBED_FIRST_CHUNK */
 
 #ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (pgd_none(*pgd)) {
+		p4d_t *new;
+
+		new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pgd_populate(&init_mm, pgd, new);
+	}
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		pud_t *new;
+
+		new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		p4d_populate(&init_mm, p4d, new);
+	}
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new;
+
+		new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pud_populate(&init_mm, pud, new);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+
+	return;
+
+err_alloc:
+	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @reserved_size: the size of reserved percpu area in bytes
  * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
- * @populate_pte_fn: function to populate pte
  *
  * This is a helper to ease setting up page-remapped first percpu
  * chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3177,9 +3246,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_page_first_chunk(size_t reserved_size,
-				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
-				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
 	static struct vm_struct vm;
 	struct pcpu_alloc_info *ai;
@@ -3243,7 +3310,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 			(unsigned long)vm.addr + unit * ai->unit_size;
 
 		for (i = 0; i < unit_pages; i++)
-			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+			pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
 		rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
                   ` (3 preceding siblings ...)
  2021-11-21  9:35 ` [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function Kefeng Wang
@ 2021-11-29  2:51 ` Kefeng Wang
  2021-11-29  2:54   ` Dennis Zhou
  2021-11-29 22:55 ` Dennis Zhou
  5 siblings, 1 reply; 19+ messages in thread
From: Kefeng Wang @ 2021-11-29  2:51 UTC (permalink / raw)
  To: dennis, akpm, linux-kernel, linux-mm
  Cc: tj, gregkh, cl, catalin.marinas, will, tsbogend, mpe, benh,
	paulus, paul.walmsley, palmer, aou, davem, tglx, mingo, bp,
	dave.hansen, hpa, linux-arm-kernel, linux-ia64, linux-mips,
	linuxppc-dev, linux-riscv, sparclinux, x86

Hi Dennis and all maintainers, any comments about the changes, many thanks.

On 2021/11/21 17:35, Kefeng Wang wrote:
> When support page mapping percpu first chunk allocator on arm64, we
> found there are lots of duplicated codes in percpu embed/page first
> chunk allocator. This patchset is aimed to cleanup them and should
> no funciton change, only test on arm64.
>
> Kefeng Wang (4):
>    mm: percpu: Generalize percpu related config
>    mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>    mm: percpu: Add generic pcpu_fc_alloc/free funciton
>    mm: percpu: Add generic pcpu_populate_pte() function
>
>   arch/arm64/Kconfig             |  20 +----
>   arch/ia64/Kconfig              |   9 +--
>   arch/mips/Kconfig              |  10 +--
>   arch/mips/mm/init.c            |  14 +---
>   arch/powerpc/Kconfig           |  17 +---
>   arch/powerpc/kernel/setup_64.c |  92 +--------------------
>   arch/riscv/Kconfig             |  10 +--
>   arch/sparc/Kconfig             |  12 +--
>   arch/sparc/kernel/smp_64.c     | 105 +-----------------------
>   arch/x86/Kconfig               |  17 +---
>   arch/x86/kernel/setup_percpu.c |  66 ++-------------
>   drivers/base/arch_numa.c       |  68 +---------------
>   include/linux/percpu.h         |  13 +--
>   mm/Kconfig                     |  12 +++
>   mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
>   15 files changed, 165 insertions(+), 443 deletions(-)
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
  2021-11-29  2:51 ` [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
@ 2021-11-29  2:54   ` Dennis Zhou
  2021-11-29  3:06     ` Kefeng Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29  2:54 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86

On Mon, Nov 29, 2021 at 10:51:18AM +0800, Kefeng Wang wrote:
> Hi Dennis and all maintainers, any comments about the changes, many thanks.
> 
> On 2021/11/21 17:35, Kefeng Wang wrote:
> > When support page mapping percpu first chunk allocator on arm64, we
> > found there are lots of duplicated codes in percpu embed/page first
> > chunk allocator. This patchset is aimed to cleanup them and should
> > no funciton change, only test on arm64.
> > 
> > Kefeng Wang (4):
> >    mm: percpu: Generalize percpu related config
> >    mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
> >    mm: percpu: Add generic pcpu_fc_alloc/free funciton
> >    mm: percpu: Add generic pcpu_populate_pte() function
> > 
> >   arch/arm64/Kconfig             |  20 +----
> >   arch/ia64/Kconfig              |   9 +--
> >   arch/mips/Kconfig              |  10 +--
> >   arch/mips/mm/init.c            |  14 +---
> >   arch/powerpc/Kconfig           |  17 +---
> >   arch/powerpc/kernel/setup_64.c |  92 +--------------------
> >   arch/riscv/Kconfig             |  10 +--
> >   arch/sparc/Kconfig             |  12 +--
> >   arch/sparc/kernel/smp_64.c     | 105 +-----------------------
> >   arch/x86/Kconfig               |  17 +---
> >   arch/x86/kernel/setup_percpu.c |  66 ++-------------
> >   drivers/base/arch_numa.c       |  68 +---------------
> >   include/linux/percpu.h         |  13 +--
> >   mm/Kconfig                     |  12 +++
> >   mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
> >   15 files changed, 165 insertions(+), 443 deletions(-)
> > 

Hi Kefang,

I apologize for the delay. It's a holiday week in the US + I had some
personal things come up at the beginning of last week. I'll have it
reviewed by tomorrow.

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
  2021-11-29  2:54   ` Dennis Zhou
@ 2021-11-29  3:06     ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-29  3:06 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/29 10:54, Dennis Zhou wrote:
> On Mon, Nov 29, 2021 at 10:51:18AM +0800, Kefeng Wang wrote:
>> Hi Dennis and all maintainers, any comments about the changes, many thanks.
>>
>> On 2021/11/21 17:35, Kefeng Wang wrote:
>>> When support page mapping percpu first chunk allocator on arm64, we
>>> found there are lots of duplicated codes in percpu embed/page first
>>> chunk allocator. This patchset is aimed to cleanup them and should
>>> no funciton change, only test on arm64.
>>>
>>> Kefeng Wang (4):
>>>     mm: percpu: Generalize percpu related config
>>>     mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>>>     mm: percpu: Add generic pcpu_fc_alloc/free funciton
>>>     mm: percpu: Add generic pcpu_populate_pte() function
>>>
>>>    arch/arm64/Kconfig             |  20 +----
>>>    arch/ia64/Kconfig              |   9 +--
>>>    arch/mips/Kconfig              |  10 +--
>>>    arch/mips/mm/init.c            |  14 +---
>>>    arch/powerpc/Kconfig           |  17 +---
>>>    arch/powerpc/kernel/setup_64.c |  92 +--------------------
>>>    arch/riscv/Kconfig             |  10 +--
>>>    arch/sparc/Kconfig             |  12 +--
>>>    arch/sparc/kernel/smp_64.c     | 105 +-----------------------
>>>    arch/x86/Kconfig               |  17 +---
>>>    arch/x86/kernel/setup_percpu.c |  66 ++-------------
>>>    drivers/base/arch_numa.c       |  68 +---------------
>>>    include/linux/percpu.h         |  13 +--
>>>    mm/Kconfig                     |  12 +++
>>>    mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
>>>    15 files changed, 165 insertions(+), 443 deletions(-)
>>>
> Hi Kefang,
>
> I apologize for the delay. It's a holiday week in the US + I had some
> personal things come up at the beginning of last week. I'll have it
> reviewed by tomorrow.
It's great to hear about your reply,  thanks.
>
> Thanks,
> Dennis
> .


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config
  2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
@ 2021-11-29 22:36   ` Dennis Zhou
  2021-11-30  6:22     ` Kefeng Wang
  2021-12-03 18:54   ` Catalin Marinas
  1 sibling, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29 22:36 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: dennis, akpm, linux-kernel, linux-mm, tj, gregkh, cl,
	catalin.marinas, will, tsbogend, mpe, benh, paulus,
	paul.walmsley, palmer, aou, davem, tglx, mingo, bp, dave.hansen,
	hpa, linux-arm-kernel, linux-ia64, linux-mips, linuxppc-dev,
	linux-riscv, sparclinux, x86

Hello,

On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
> The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
> NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
> which has duplicate definitions on platforms that subscribe it.
> 
> Move them into mm, drop these redundant definitions and instead
> just select it on applicable platforms.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/arm64/Kconfig   | 20 ++++----------------
>  arch/ia64/Kconfig    |  9 ++-------
>  arch/mips/Kconfig    | 10 ++--------
>  arch/powerpc/Kconfig | 17 ++++-------------
>  arch/riscv/Kconfig   | 10 ++--------
>  arch/sparc/Kconfig   | 12 +++---------
>  arch/x86/Kconfig     | 17 ++++-------------
>  mm/Kconfig           | 12 ++++++++++++
>  8 files changed, 33 insertions(+), 74 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index c4207cf9bb17..4ff73299f8a9 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1135,6 +1135,10 @@ config NUMA
>  	select GENERIC_ARCH_NUMA
>  	select ACPI_NUMA if ACPI
>  	select OF_NUMA
> +	select HAVE_SETUP_PER_CPU_AREA
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
> +	select NEED_PER_CPU_PAGE_FIRST_CHUNK
> +	select USE_PERCPU_NUMA_NODE_ID
>  	help
>  	  Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -1151,22 +1155,6 @@ config NODES_SHIFT
>  	  Specify the maximum number of NUMA Nodes available on the target
>  	  system.  Increases memory reserved to accommodate various tables.
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
>  source "kernel/Kconfig.hz"
>  
>  config ARCH_SPARSEMEM_ENABLE
> diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
> index 1e33666fa679..703952819e10 100644
> --- a/arch/ia64/Kconfig
> +++ b/arch/ia64/Kconfig
> @@ -32,6 +32,7 @@ config IA64
>  	select HAVE_FTRACE_MCOUNT_RECORD
>  	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
>  	select HAVE_FUNCTION_TRACER
> +	select HAVE_SETUP_PER_CPU_AREA
>  	select TTY
>  	select HAVE_ARCH_TRACEHOOK
>  	select HAVE_VIRT_CPU_ACCOUNTING
> @@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
>  	bool
>  	default y
>  
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y
> -
>  config DMI
>  	bool
>  	default y
> @@ -292,6 +290,7 @@ config NUMA
>  	bool "NUMA support"
>  	depends on !FLATMEM
>  	select SMP
> +	select USE_PERCPU_NUMA_NODE_ID
>  	help
>  	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
>  	  Access).  This option is for configuring high-end multiprocessor
> @@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
>  	def_bool y
>  	depends on NUMA
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
>  config HAVE_MEMORYLESS_NODES
>  	def_bool NUMA
>  
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index de60ad190057..c106a2080877 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2666,6 +2666,8 @@ config NUMA
>  	bool "NUMA Support"
>  	depends on SYS_SUPPORTS_NUMA
>  	select SMP
> +	select HAVE_SETUP_PER_CPU_AREA
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
>  	help
>  	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
>  	  Access).  This option improves performance on systems with more
> @@ -2676,14 +2678,6 @@ config NUMA
>  config SYS_SUPPORTS_NUMA
>  	bool
>  
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
>  config RELOCATABLE
>  	bool "Relocatable kernel"
>  	depends on SYS_SUPPORTS_RELOCATABLE
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index dea74d7717c0..8badd39854a0 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
>  	default 9 if PPC_16K_PAGES	#  9 = 23 (8MB) - 14 (16K)
>  	default 11			# 11 = 23 (8MB) - 12 (4K)
>  
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool PPC64
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y if PPC64
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> -	def_bool y if PPC64
> -
>  config NR_IRQS
>  	int "Number of virtual interrupt numbers"
>  	range 32 1048576
> @@ -240,6 +231,7 @@ config PPC
>  	select HAVE_REGS_AND_STACK_ACCESS_API
>  	select HAVE_RELIABLE_STACKTRACE
>  	select HAVE_RSEQ
> +	select HAVE_SETUP_PER_CPU_AREA		if PPC64
>  	select HAVE_SOFTIRQ_ON_OWN_STACK
>  	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
>  	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
> @@ -254,6 +246,8 @@ config PPC
>  	select MMU_GATHER_RCU_TABLE_FREE
>  	select MODULES_USE_ELF_RELA
>  	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK	if PPC64
> +	select NEED_PER_CPU_PAGE_FIRST_CHUNK	if PPC64
>  	select NEED_SG_DMA_LENGTH
>  	select OF
>  	select OF_DMA_DEFAULT_COHERENT		if !NOT_COHERENT_CACHE
> @@ -659,6 +653,7 @@ config NUMA
>  	bool "NUMA Memory Allocation and Scheduler Support"
>  	depends on PPC64 && SMP
>  	default y if PPC_PSERIES || PPC_POWERNV
> +	select USE_PERCPU_NUMA_NODE_ID
>  	help
>  	  Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -672,10 +667,6 @@ config NODES_SHIFT
>  	default "4"
>  	depends on NUMA
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
>  config HAVE_MEMORYLESS_NODES
>  	def_bool y
>  	depends on NUMA
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 821252b65f89..bf66bcbc5a39 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -334,6 +334,8 @@ config NUMA
>  	select GENERIC_ARCH_NUMA
>  	select OF_NUMA
>  	select ARCH_SUPPORTS_NUMA_BALANCING
> +	select USE_PERCPU_NUMA_NODE_ID
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
>  	help
>  	  Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -349,14 +351,6 @@ config NODES_SHIFT
>  	  Specify the maximum number of NUMA Nodes available on the target
>  	  system.  Increases memory reserved to accommodate various tables.
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
>  config RISCV_ISA_C
>  	bool "Emit compressed instructions when building Linux"
>  	default y
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index 66fc08646be5..a6765e0fe6a8 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -97,6 +97,9 @@ config SPARC64
>  	select PCI_DOMAINS if PCI
>  	select ARCH_HAS_GIGANTIC_PAGE
>  	select HAVE_SOFTIRQ_ON_OWN_STACK
> +	select HAVE_SETUP_PER_CPU_AREA
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
> +	select NEED_PER_CPU_PAGE_FIRST_CHUUNK
>  
>  config ARCH_PROC_KCORE_TEXT
>  	def_bool y
> @@ -123,15 +126,6 @@ config AUDIT_ARCH
>  	bool
>  	default y
>  
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y if SPARC64
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y if SPARC64
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> -	def_bool y if SPARC64
> -
>  config MMU
>  	bool
>  	default y
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 7399327d1eff..ca120a1f5857 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -239,6 +239,7 @@ config X86
>  	select HAVE_REGS_AND_STACK_ACCESS_API
>  	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
>  	select HAVE_FUNCTION_ARG_ACCESS_API
> +	select HAVE_SETUP_PER_CPU_AREA
>  	select HAVE_SOFTIRQ_ON_OWN_STACK
>  	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
>  	select HAVE_STACK_VALIDATION		if X86_64
> @@ -252,6 +253,8 @@ config X86
>  	select HAVE_GENERIC_VDSO
>  	select HOTPLUG_SMT			if SMP
>  	select IRQ_FORCED_THREADING
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
> +	select NEED_PER_CPU_PAGE_FIRST_CHUNK
>  	select NEED_SG_DMA_LENGTH
>  	select PCI_DOMAINS			if PCI
>  	select PCI_LOCKLESS_CONFIG		if PCI
> @@ -331,15 +334,6 @@ config ARCH_HAS_CPU_RELAX
>  config ARCH_HAS_FILTER_PGPROT
>  	def_bool y
>  
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> -	def_bool y
> -
>  config ARCH_HIBERNATION_POSSIBLE
>  	def_bool y
>  
> @@ -1557,6 +1551,7 @@ config NUMA
>  	depends on SMP
>  	depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
>  	default y if X86_BIGSMP
> +	select USE_PERCPU_NUMA_NODE_ID
>  	help
>  	  Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -2430,10 +2425,6 @@ config ARCH_HAS_ADD_PAGES
>  config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
>  	def_bool y
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
>  menu "Power management and ACPI options"
>  
>  config ARCH_HIBERNATION_HEADER
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 28edafc820ad..6bc5d780c51b 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
>  	bool
>  	default y
>  
> +config NEED_PER_CPU_EMBED_FIRST_CHUNK
> +	bool
> +
> +config NEED_PER_CPU_PAGE_FIRST_CHUNK
> +	bool
> +
> +config USE_PERCPU_NUMA_NODE_ID
> +	bool
> +
> +config HAVE_SETUP_PER_CPU_AREA
> +	bool
> +
>  config CLEANCACHE
>  	bool "Enable cleancache driver to cache clean pages if tmem is present"
>  	help
> -- 
> 2.26.2
> 

This makes sense and looks good. A series like this is a little tricky.
The latter patches change the contracts so it'd be easiest to run it
through my tree. We'd need to get explicit acks from each arch
maintainer to make sure they're fine with this.

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
  2021-11-21  9:35 ` [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef Kefeng Wang
@ 2021-11-29 22:40   ` Dennis Zhou
  2021-11-30  6:22     ` Kefeng Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29 22:40 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86

On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote:
> Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
> pcpu first chunk allocation will call it to alloc memblock on the
> corresponding node by it.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/mips/mm/init.c            | 12 +++++++++---
>  arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
>  arch/sparc/kernel/smp_64.c     |  8 +++++---
>  arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
>  drivers/base/arch_numa.c       |  8 +++++---
>  include/linux/percpu.h         |  7 +++++--
>  mm/percpu.c                    | 14 +++++++++-----
>  7 files changed, 57 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 325e1552cbea..ebbf6923532c 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -519,12 +519,17 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  	return node_distance(cpu_to_node(from), cpu_to_node(to));
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
> -				       size_t align)
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> +	return cpu_to_node(cpu);
> +}
> +
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> +				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
>  				      MEMBLOCK_ALLOC_ACCESSIBLE,
> -				      cpu_to_node(cpu));
> +				      cpu_to_nd_fun(cpu));
>  }
>  
>  static void __init pcpu_fc_free(void *ptr, size_t size)
> @@ -545,6 +550,7 @@ void __init setup_per_cpu_areas(void)
>  	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>  				    pcpu_cpu_distance,
> +				    pcpu_cpu_to_node,
>  				    pcpu_fc_alloc, pcpu_fc_free);
>  	if (rc < 0)
>  		panic("Failed to initialize percpu areas.");
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 6052f5d5ded3..9a5609c821df 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
>  }
>  
>  #ifdef CONFIG_SMP
> +
> +static __init int pcpu_cpu_to_node(int cpu)
> +{
> +	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> +}
> +
>  /**
>   * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
>   * @cpu: cpu to allocate for
> @@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
>   * RETURNS:
>   * Pointer to the allocated area on success, NULL on failure.
>   */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> -					size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> +					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>  #ifdef CONFIG_NUMA
> -	int node = early_cpu_to_node(cpu);
> +	int node = cpu_to_nd_fun(cpu);

^ typo - cpu_to_nd_fn().

>  	void *ptr;
>  
>  	if (!node_online(node) || !NODE_DATA(node)) {
> @@ -891,6 +897,7 @@ void __init setup_per_cpu_areas(void)
>  
>  	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
>  		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
> +					    pcpu_cpu_to_node,
>  					    pcpu_alloc_bootmem, pcpu_free_bootmem);
>  		if (rc)
>  			pr_warn("PERCPU: %s allocator failed (%d), "
> @@ -900,6 +907,7 @@ void __init setup_per_cpu_areas(void)
>  
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
> +					   pcpu_cpu_to_node,
>  					   pcpu_populate_pte);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index b98a7bbe6728..026aa3ccbc30 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1539,12 +1539,12 @@ void smp_send_stop(void)
>   * RETURNS:
>   * Pointer to the allocated area on success, NULL on failure.
>   */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
> -					size_t align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> +					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>  #ifdef CONFIG_NUMA
> -	int node = cpu_to_node(cpu);
> +	int node = cpu_to_nd_fn(cpu);
>  	void *ptr;
>  
>  	if (!node_online(node) || !NODE_DATA(node)) {
> @@ -1641,6 +1641,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  					    PERCPU_DYNAMIC_RESERVE, 4 << 20,
>  					    pcpu_cpu_distance,
> +					    cpu_to_node,
>  					    pcpu_alloc_bootmem,
>  					    pcpu_free_bootmem);
>  		if (rc)
> @@ -1652,6 +1653,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
>  					   pcpu_alloc_bootmem,
>  					   pcpu_free_bootmem,
> +					   cpu_to_node,
>  					   pcpu_populate_pte);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index 7b65275544b2..bba4fa174a16 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -97,12 +97,12 @@ static bool __init pcpu_need_numa(void)
>   * RETURNS:
>   * Pointer to the allocated area on success, NULL on failure.
>   */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
> -					unsigned long align)
> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
> +					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>  #ifdef CONFIG_NUMA
> -	int node = early_cpu_to_node(cpu);
> +	int node = cpu_to_nd_fn(cpu);
>  	void *ptr;
>  
>  	if (!node_online(node) || !NODE_DATA(node)) {
> @@ -128,9 +128,10 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
>  /*
>   * Helpers for first chunk memory allocation
>   */
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> +				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
> -	return pcpu_alloc_bootmem(cpu, size, align);
> +	return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
>  }
>  
>  static void __init pcpu_fc_free(void *ptr, size_t size)
> @@ -150,6 +151,11 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  #endif
>  }
>  
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> +	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> +}
> +
>  static void __init pcpup_populate_pte(unsigned long addr)
>  {
>  	populate_extra_pte(addr);
> @@ -205,6 +211,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
>  					    dyn_size, atom_size,
>  					    pcpu_cpu_distance,
> +					    pcpu_cpu_to_node,
>  					    pcpu_fc_alloc, pcpu_fc_free);
>  		if (rc < 0)
>  			pr_warn("%s allocator failed (%d), falling back to page size\n",
> @@ -213,6 +220,7 @@ void __init setup_per_cpu_areas(void)
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
>  					   pcpu_fc_alloc, pcpu_fc_free,
> +					   pcpu_cpu_to_node,
>  					   pcpup_populate_pte);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index bc1876915457..273543d9ff85 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -155,10 +155,10 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
> -				       size_t align)
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> +				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
> -	int nid = early_cpu_to_node(cpu);
> +	int nid = cpu_to_nd_fn(cpu);
>  
>  	return  memblock_alloc_try_nid(size, align,
>  			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> @@ -229,6 +229,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>  					    pcpu_cpu_distance,
> +					    early_cpu_to_node,
>  					    pcpu_fc_alloc, pcpu_fc_free);
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  		if (rc < 0)
> @@ -242,6 +243,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
>  					   pcpu_fc_alloc,
>  					   pcpu_fc_free,
> +					   early_cpu_to_node,
>  					   pcpu_populate_pte);
>  #endif
>  	if (rc < 0)
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index ae4004e7957e..41bb54715b0c 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>  
>  extern enum pcpu_fc pcpu_chosen_fc;
>  
> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
> -				     size_t align);
> +typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
> +				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>  typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>  typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>  typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
> @@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>  extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  				size_t atom_size,
>  				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				pcpu_fc_alloc_fn_t alloc_fn,
>  				pcpu_fc_free_fn_t free_fn);
>  #endif
> @@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  extern int __init pcpu_page_first_chunk(size_t reserved_size,
>  				pcpu_fc_alloc_fn_t alloc_fn,
>  				pcpu_fc_free_fn_t free_fn,
> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				pcpu_fc_populate_pte_fn_t populate_pte_fn);
>  #endif

Be consistent here. In pcpu_setup_first_chunk() you add the
cpu_to_node() before alloc()/free() and then in pcpu_embed_first_chunk()
you add it after. I'd prefer to add it before as to keep the
cpu_distance()/cpu_to_node() grouping.

>  
> diff --git a/mm/percpu.c b/mm/percpu.c
> index f5b2c2ea5a54..3f6cf1ff0be2 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -3001,6 +3001,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>   * @dyn_size: minimum free size for dynamic allocation in bytes
>   * @atom_size: allocation atom size
>   * @cpu_distance_fn: callback to determine distance between cpus, optional
> + * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
>   * @alloc_fn: function to allocate percpu page
>   * @free_fn: function to free percpu page
>   *
> @@ -3030,6 +3031,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>  int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  				  size_t atom_size,
>  				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> +				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				  pcpu_fc_alloc_fn_t alloc_fn,
>  				  pcpu_fc_free_fn_t free_fn)
>  {
> @@ -3066,7 +3068,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  		BUG_ON(cpu == NR_CPUS);
>  
>  		/* allocate space for the whole group */
> -		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
> +		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
>  		if (!ptr) {
>  			rc = -ENOMEM;
>  			goto out_free_areas;
> @@ -3145,6 +3147,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>   * @reserved_size: the size of reserved percpu area in bytes
>   * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
>   * @free_fn: function to free percpu page, always called with PAGE_SIZE
> + * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
>   * @populate_pte_fn: function to populate pte
>   *
>   * This is a helper to ease setting up page-remapped first percpu
> @@ -3159,6 +3162,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  int __init pcpu_page_first_chunk(size_t reserved_size,
>  				 pcpu_fc_alloc_fn_t alloc_fn,
>  				 pcpu_fc_free_fn_t free_fn,
> +				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
>  {
>  	static struct vm_struct vm;
> @@ -3201,7 +3205,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  		for (i = 0; i < unit_pages; i++) {
>  			void *ptr;
>  
> -			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
> +			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
>  			if (!ptr) {
>  				pr_warn("failed to allocate %s page for cpu%u\n",
>  						psize_str, cpu);
> @@ -3278,8 +3282,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
>  EXPORT_SYMBOL(__per_cpu_offset);
>  
> -static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
> -				       size_t align)
> +static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
> +				       pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
>  }
> @@ -3300,7 +3304,7 @@ void __init setup_per_cpu_areas(void)
>  	 * what the legacy allocator did.
>  	 */
>  	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> -				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
> +				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
>  				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
>  	if (rc < 0)
>  		panic("Failed to initialize percpu areas.");
> -- 
> 2.26.2
> 

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton
  2021-11-21  9:35 ` [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton Kefeng Wang
@ 2021-11-29 22:45   ` Dennis Zhou
  2021-11-30  6:27     ` Kefeng Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29 22:45 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86

On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote:
> With previous patch, we could add a generic pcpu first chunk
> allocation and free function to cleanup the duplicated definations
> on each architecture.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/mips/mm/init.c            | 16 +--------
>  arch/powerpc/kernel/setup_64.c | 51 ++------------------------
>  arch/sparc/kernel/smp_64.c     | 50 +-------------------------
>  arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
>  drivers/base/arch_numa.c       | 19 +---------
>  include/linux/percpu.h         |  9 +----
>  mm/percpu.c                    | 66 ++++++++++++++++++----------------
>  7 files changed, 42 insertions(+), 228 deletions(-)
> 
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index ebbf6923532c..5a8002839550 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -524,19 +524,6 @@ static int __init pcpu_cpu_to_node(int cpu)
>  	return cpu_to_node(cpu);
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> -				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
> -				      MEMBLOCK_ALLOC_ACCESSIBLE,
> -				      cpu_to_nd_fun(cpu));
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  void __init setup_per_cpu_areas(void)
>  {
>  	unsigned long delta;
> @@ -550,8 +537,7 @@ void __init setup_per_cpu_areas(void)
>  	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>  				    pcpu_cpu_distance,
> -				    pcpu_cpu_to_node,
> -				    pcpu_fc_alloc, pcpu_fc_free);
> +				    pcpu_cpu_to_node);
>  	if (rc < 0)
>  		panic("Failed to initialize percpu areas.");
>  
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 9a5609c821df..364b1567f822 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -777,50 +777,6 @@ static __init int pcpu_cpu_to_node(int cpu)
>  	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>  }
>  
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> -					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> -	int node = cpu_to_nd_fun(cpu);
> -	void *ptr;
> -
> -	if (!node_online(node) || !NODE_DATA(node)) {
> -		ptr = memblock_alloc_from(size, align, goal);
> -		pr_info("cpu %d has no node %d or node-local memory\n",
> -			cpu, node);
> -		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> -			 cpu, size, __pa(ptr));
> -	} else {
> -		ptr = memblock_alloc_try_nid(size, align, goal,
> -					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
> -		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
> -			 "%016lx\n", cpu, size, node, __pa(ptr));
> -	}
> -	return ptr;
> -#else
> -	return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -static void __init pcpu_free_bootmem(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  static int pcpu_cpu_distance(unsigned int from, unsigned int to)
>  {
>  	if (early_cpu_to_node(from) == early_cpu_to_node(to))
> @@ -897,8 +853,7 @@ void __init setup_per_cpu_areas(void)
>  
>  	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
>  		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
> -					    pcpu_cpu_to_node,
> -					    pcpu_alloc_bootmem, pcpu_free_bootmem);
> +					    pcpu_cpu_to_node);
>  		if (rc)
>  			pr_warn("PERCPU: %s allocator failed (%d), "
>  				"falling back to page size\n",
> @@ -906,9 +861,7 @@ void __init setup_per_cpu_areas(void)
>  	}
>  
>  	if (rc < 0)
> -		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
> -					   pcpu_cpu_to_node,
> -					   pcpu_populate_pte);
> +		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
>  
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 026aa3ccbc30..198dadddb75d 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1526,50 +1526,6 @@ void smp_send_stop(void)
>  		smp_call_function(stop_this_cpu, NULL, 0);
>  }
>  
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
> -					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> -	int node = cpu_to_nd_fn(cpu);
> -	void *ptr;
> -
> -	if (!node_online(node) || !NODE_DATA(node)) {
> -		ptr = memblock_alloc_from(size, align, goal);
> -		pr_info("cpu %d has no node %d or node-local memory\n",
> -			cpu, node);
> -		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> -			 cpu, size, __pa(ptr));
> -	} else {
> -		ptr = memblock_alloc_try_nid(size, align, goal,
> -					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
> -		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
> -			 "%016lx\n", cpu, size, node, __pa(ptr));
> -	}
> -	return ptr;
> -#else
> -	return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -static void __init pcpu_free_bootmem(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  {
>  	if (cpu_to_node(from) == cpu_to_node(to))
> @@ -1641,9 +1597,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  					    PERCPU_DYNAMIC_RESERVE, 4 << 20,
>  					    pcpu_cpu_distance,
> -					    cpu_to_node,
> -					    pcpu_alloc_bootmem,
> -					    pcpu_free_bootmem);
> +					    cpu_to_node);
>  		if (rc)
>  			pr_warn("PERCPU: %s allocator failed (%d), "
>  				"falling back to page size\n",
> @@ -1651,8 +1605,6 @@ void __init setup_per_cpu_areas(void)
>  	}
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> -					   pcpu_alloc_bootmem,
> -					   pcpu_free_bootmem,
>  					   cpu_to_node,
>  					   pcpu_populate_pte);
>  	if (rc < 0)
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index bba4fa174a16..cd672bd46241 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -84,61 +84,6 @@ static bool __init pcpu_need_numa(void)
>  }
>  #endif
>  
> -/**
> - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
> - * @cpu: cpu to allocate for
> - * @size: size allocation in bytes
> - * @align: alignment
> - *
> - * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
> - * does the right thing for NUMA regardless of the current
> - * configuration.
> - *
> - * RETURNS:
> - * Pointer to the allocated area on success, NULL on failure.
> - */
> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, unsigned long align,
> -					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> -#ifdef CONFIG_NUMA
> -	int node = cpu_to_nd_fn(cpu);
> -	void *ptr;
> -
> -	if (!node_online(node) || !NODE_DATA(node)) {
> -		ptr = memblock_alloc_from(size, align, goal);
> -		pr_info("cpu %d has no node %d or node-local memory\n",
> -			cpu, node);
> -		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
> -			 cpu, size, __pa(ptr));
> -	} else {
> -		ptr = memblock_alloc_try_nid(size, align, goal,
> -					     MEMBLOCK_ALLOC_ACCESSIBLE,
> -					     node);
> -
> -		pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
> -			 cpu, size, node, __pa(ptr));
> -	}
> -	return ptr;
> -#else
> -	return memblock_alloc_from(size, align, goal);
> -#endif
> -}
> -
> -/*
> - * Helpers for first chunk memory allocation
> - */
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> -				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	return pcpu_alloc_bootmem(cpu, size, align, cpu_to_nd_fn);
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  {
>  #ifdef CONFIG_NUMA
> @@ -211,15 +156,13 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
>  					    dyn_size, atom_size,
>  					    pcpu_cpu_distance,
> -					    pcpu_cpu_to_node,
> -					    pcpu_fc_alloc, pcpu_fc_free);
> +					    pcpu_cpu_to_node);
>  		if (rc < 0)
>  			pr_warn("%s allocator failed (%d), falling back to page size\n",
>  				pcpu_fc_names[pcpu_chosen_fc], rc);
>  	}
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> -					   pcpu_fc_alloc, pcpu_fc_free,
>  					   pcpu_cpu_to_node,
>  					   pcpup_populate_pte);
>  	if (rc < 0)
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index 273543d9ff85..23a10cc36165 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -155,20 +155,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
>  }
>  
> -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> -				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	int nid = cpu_to_nd_fn(cpu);
> -
> -	return  memblock_alloc_try_nid(size, align,
> -			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> -}
> -
> -static void __init pcpu_fc_free(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  static void __init pcpu_populate_pte(unsigned long addr)
>  {
> @@ -229,8 +215,7 @@ void __init setup_per_cpu_areas(void)
>  		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
>  					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
>  					    pcpu_cpu_distance,
> -					    early_cpu_to_node,
> -					    pcpu_fc_alloc, pcpu_fc_free);
> +					    early_cpu_to_node);
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  		if (rc < 0)
>  			pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
> @@ -241,8 +226,6 @@ void __init setup_per_cpu_areas(void)
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> -					   pcpu_fc_alloc,
> -					   pcpu_fc_free,
>  					   early_cpu_to_node,
>  					   pcpu_populate_pte);
>  #endif
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index 41bb54715b0c..d73c97ef4ff4 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>  extern enum pcpu_fc pcpu_chosen_fc;
>  
>  typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
> -				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> -typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>  typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>  typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>  
> @@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>  extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  				size_t atom_size,
>  				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> -				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> -				pcpu_fc_alloc_fn_t alloc_fn,
> -				pcpu_fc_free_fn_t free_fn);
> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>  #endif
>  
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  extern int __init pcpu_page_first_chunk(size_t reserved_size,
> -				pcpu_fc_alloc_fn_t alloc_fn,
> -				pcpu_fc_free_fn_t free_fn,
>  				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				pcpu_fc_populate_pte_fn_t populate_pte_fn);
>  #endif
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 3f6cf1ff0be2..efaa1cbaf73d 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>  
>  	return ai;
>  }
> +
> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
> +				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> +{
> +	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
> +	int node = NUMA_NO_NODE;
> +	void *ptr;
> +
> +	if (cpu_to_nd_fn)
> +		node = cpu_to_nd_fn(cpu);
> +
> +	if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
> +		ptr = memblock_alloc_from(size, align, goal);
> +	} else {
> +		ptr = memblock_alloc_try_nid(size, align, goal,
> +					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
> +	}
> +	return ptr;
> +}

My preference here would be to keep this identical to the x86
implementation where we #ifdef CONFIG_NUMA.

> +
> +static void __init pcpu_fc_free(void *ptr, size_t size)
> +{
> +	memblock_free(ptr, size);
> +}
>  #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
>  
>  #if defined(BUILD_EMBED_FIRST_CHUNK)
> @@ -3002,14 +3026,12 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>   * @atom_size: allocation atom size
>   * @cpu_distance_fn: callback to determine distance between cpus, optional
>   * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> - * @alloc_fn: function to allocate percpu page
> - * @free_fn: function to free percpu page
>   *
>   * This is a helper to ease setting up embedded first percpu chunk and
>   * can be called where pcpu_setup_first_chunk() is expected.
>   *
>   * If this function is used to setup the first chunk, it is allocated
> - * by calling @alloc_fn and used as-is without being mapped into
> + * by calling pcpu_fc_alloc and used as-is without being mapped into
>   * vmalloc area.  Allocations are always whole multiples of @atom_size
>   * aligned to @atom_size.
>   *
> @@ -3023,7 +3045,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>   * @dyn_size specifies the minimum dynamic area size.
>   *
>   * If the needed size is smaller than the minimum or specified unit
> - * size, the leftover is returned using @free_fn.
> + * size, the leftover is returned using pcpu_fc_free.
>   *
>   * RETURNS:
>   * 0 on success, -errno on failure.
> @@ -3031,9 +3053,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>  int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  				  size_t atom_size,
>  				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
> -				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> -				  pcpu_fc_alloc_fn_t alloc_fn,
> -				  pcpu_fc_free_fn_t free_fn)
> +				  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	void *base = (void *)ULONG_MAX;
>  	void **areas = NULL;
> @@ -3068,7 +3088,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  		BUG_ON(cpu == NR_CPUS);
>  
>  		/* allocate space for the whole group */
> -		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
> +		ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
>  		if (!ptr) {
>  			rc = -ENOMEM;
>  			goto out_free_areas;
> @@ -3107,12 +3127,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  		for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
>  			if (gi->cpu_map[i] == NR_CPUS) {
>  				/* unused unit, free whole */
> -				free_fn(ptr, ai->unit_size);
> +				pcpu_fc_free(ptr, ai->unit_size);
>  				continue;
>  			}
>  			/* copy and return the unused part */
>  			memcpy(ptr, __per_cpu_load, ai->static_size);
> -			free_fn(ptr + size_sum, ai->unit_size - size_sum);
> +			pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
>  		}
>  	}
>  
> @@ -3131,7 +3151,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  out_free_areas:
>  	for (group = 0; group < ai->nr_groups; group++)
>  		if (areas[group])
> -			free_fn(areas[group],
> +			pcpu_fc_free(areas[group],
>  				ai->groups[group].nr_units * ai->unit_size);
>  out_free:
>  	pcpu_free_alloc_info(ai);
> @@ -3145,8 +3165,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  /**
>   * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
>   * @reserved_size: the size of reserved percpu area in bytes
> - * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
> - * @free_fn: function to free percpu page, always called with PAGE_SIZE
>   * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
>   * @populate_pte_fn: function to populate pte
>   *
> @@ -3160,8 +3178,6 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>   * 0 on success, -errno on failure.
>   */
>  int __init pcpu_page_first_chunk(size_t reserved_size,
> -				 pcpu_fc_alloc_fn_t alloc_fn,
> -				 pcpu_fc_free_fn_t free_fn,
>  				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>  				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
>  {
> @@ -3205,7 +3221,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  		for (i = 0; i < unit_pages; i++) {
>  			void *ptr;
>  
> -			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
> +			ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
>  			if (!ptr) {
>  				pr_warn("failed to allocate %s page for cpu%u\n",
>  						psize_str, cpu);
> @@ -3257,7 +3273,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  
>  enomem:
>  	while (--j >= 0)
> -		free_fn(page_address(pages[j]), PAGE_SIZE);
> +		pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
>  	rc = -ENOMEM;
>  out_free_ar:
>  	memblock_free(pages, pages_size);
> @@ -3282,17 +3298,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
>  EXPORT_SYMBOL(__per_cpu_offset);
>  
> -static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align,
> -				       pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> -{
> -	return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
> -}
> -
> -static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
> -{
> -	memblock_free(ptr, size);
> -}
> -
>  void __init setup_per_cpu_areas(void)
>  {
>  	unsigned long delta;
> @@ -3303,9 +3308,8 @@ void __init setup_per_cpu_areas(void)
>  	 * Always reserve area for module percpu variables.  That's
>  	 * what the legacy allocator did.
>  	 */
> -	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> -				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, NULL,
> -				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
> +	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
> +				    PAGE_SIZE, NULL, NULL);
>  	if (rc < 0)
>  		panic("Failed to initialize percpu areas.");
>  
> -- 
> 2.26.2
> 

Overall this makes sense.

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function
  2021-11-21  9:35 ` [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function Kefeng Wang
@ 2021-11-29 22:49   ` Dennis Zhou
  2021-11-30  6:42     ` Kefeng Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29 22:49 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86

On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
> populate pte, add a generic pcpu populate pte function and switch
> to use it.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/powerpc/kernel/setup_64.c | 47 +--------------------
>  arch/sparc/kernel/smp_64.c     | 57 +------------------------
>  arch/x86/kernel/setup_percpu.c |  5 +--
>  drivers/base/arch_numa.c       | 51 +---------------------
>  include/linux/percpu.h         |  5 +--
>  mm/percpu.c                    | 77 +++++++++++++++++++++++++++++++---
>  6 files changed, 79 insertions(+), 163 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 364b1567f822..1a17828af77f 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
>  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
>  EXPORT_SYMBOL(__per_cpu_offset);
>  
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> -	pgd_t *pgd = pgd_offset_k(addr);
> -	p4d_t *p4d;
> -	pud_t *pud;
> -	pmd_t *pmd;
> -
> -	p4d = p4d_offset(pgd, addr);
> -	if (p4d_none(*p4d)) {
> -		pud_t *new;
> -
> -		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		p4d_populate(&init_mm, p4d, new);
> -	}
> -
> -	pud = pud_offset(p4d, addr);
> -	if (pud_none(*pud)) {
> -		pmd_t *new;
> -
> -		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pud_populate(&init_mm, pud, new);
> -	}
> -
> -	pmd = pmd_offset(pud, addr);
> -	if (!pmd_present(*pmd)) {
> -		pte_t *new;
> -
> -		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pmd_populate_kernel(&init_mm, pmd, new);
> -	}
> -
> -	return;
> -
> -err_alloc:
> -	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> -	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> -
>  void __init setup_per_cpu_areas(void)
>  {
>  	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
> @@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
>  	}
>  
>  	if (rc < 0)
> -		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
> +		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
>  
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 198dadddb75d..00dffe2d834b 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  		return REMOTE_DISTANCE;
>  }
>  
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> -	pgd_t *pgd = pgd_offset_k(addr);
> -	p4d_t *p4d;
> -	pud_t *pud;
> -	pmd_t *pmd;
> -
> -	if (pgd_none(*pgd)) {
> -		pud_t *new;
> -
> -		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pgd_populate(&init_mm, pgd, new);
> -	}
> -
> -	p4d = p4d_offset(pgd, addr);
> -	if (p4d_none(*p4d)) {
> -		pud_t *new;
> -
> -		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		p4d_populate(&init_mm, p4d, new);
> -	}
> -
> -	pud = pud_offset(p4d, addr);
> -	if (pud_none(*pud)) {
> -		pmd_t *new;
> -
> -		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pud_populate(&init_mm, pud, new);
> -	}
> -
> -	pmd = pmd_offset(pud, addr);
> -	if (!pmd_present(*pmd)) {
> -		pte_t *new;
> -
> -		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pmd_populate_kernel(&init_mm, pmd, new);
> -	}
> -
> -	return;
> -
> -err_alloc:
> -	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> -	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
>  void __init setup_per_cpu_areas(void)
>  {
>  	unsigned long delta;
> @@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
>  				pcpu_fc_names[pcpu_chosen_fc], rc);
>  	}
>  	if (rc < 0)
> -		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> -					   cpu_to_node,
> -					   pcpu_populate_pte);
> +		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_node);
>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
>  
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index cd672bd46241..4eadbe45078e 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
>  	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>  }
>  
> -static void __init pcpup_populate_pte(unsigned long addr)
> +void __init pcpu_populate_pte(unsigned long addr)
>  {
>  	populate_extra_pte(addr);
>  }
> @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
>  	}
>  	if (rc < 0)
>  		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> -					   pcpu_cpu_to_node,
> -					   pcpup_populate_pte);
> +					   pcpu_cpu_to_node);

x86 has it's own implementation that differs for 32 bit. I'm not
confident this is correct to drop in as a replacement for x86, so I'd
prefer to keep populate_pte_fn() around.

>  	if (rc < 0)
>  		panic("cannot initialize percpu area (err=%d)", rc);
>  
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index 23a10cc36165..eaa31e567d1e 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -14,7 +14,6 @@
>  #include <linux/of.h>
>  
>  #include <asm/sections.h>
> -#include <asm/pgalloc.h>
>  
>  struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
>  EXPORT_SYMBOL(node_data);
> @@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
>  	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
>  }
>  
> -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> -	pgd_t *pgd = pgd_offset_k(addr);
> -	p4d_t *p4d;
> -	pud_t *pud;
> -	pmd_t *pmd;
> -
> -	p4d = p4d_offset(pgd, addr);
> -	if (p4d_none(*p4d)) {
> -		pud_t *new;
> -
> -		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		p4d_populate(&init_mm, p4d, new);
> -	}
> -
> -	pud = pud_offset(p4d, addr);
> -	if (pud_none(*pud)) {
> -		pmd_t *new;
> -
> -		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pud_populate(&init_mm, pud, new);
> -	}
> -
> -	pmd = pmd_offset(pud, addr);
> -	if (!pmd_present(*pmd)) {
> -		pte_t *new;
> -
> -		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> -		if (!new)
> -			goto err_alloc;
> -		pmd_populate_kernel(&init_mm, pmd, new);
> -	}
> -
> -	return;
> -
> -err_alloc:
> -	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> -	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -#endif
> -
>  void __init setup_per_cpu_areas(void)
>  {
>  	unsigned long delta;
> @@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void)
>  
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>  	if (rc < 0)
> -		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> -					   early_cpu_to_node,
> -					   pcpu_populate_pte);
> +		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
>  #endif
>  	if (rc < 0)
>  		panic("Failed to initialize percpu areas (err=%d).", rc);
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index d73c97ef4ff4..f1ec5ad1351c 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>  extern enum pcpu_fc pcpu_chosen_fc;
>  
>  typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>  typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>  
>  extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
> @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  #endif
>  
>  #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> +void __init pcpu_populate_pte(unsigned long addr);
>  extern int __init pcpu_page_first_chunk(size_t reserved_size,
> -				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> -				pcpu_fc_populate_pte_fn_t populate_pte_fn);
> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>  #endif
>  
>  extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index efaa1cbaf73d..d907daed04eb 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>  #endif /* BUILD_EMBED_FIRST_CHUNK */
>  
>  #ifdef BUILD_PAGE_FIRST_CHUNK
> +#include <asm/pgalloc.h>
> +
> +#ifndef P4D_TABLE_SIZE
> +#define P4D_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PUD_TABLE_SIZE
> +#define PUD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PMD_TABLE_SIZE
> +#define PMD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PTE_TABLE_SIZE
> +#define PTE_TABLE_SIZE PAGE_SIZE
> +#endif
> +void __init __weak pcpu_populate_pte(unsigned long addr)
> +{
> +	pgd_t *pgd = pgd_offset_k(addr);
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +
> +	if (pgd_none(*pgd)) {
> +		p4d_t *new;
> +
> +		new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);

It's unnecessary to specify a min_addr to memblock_alloc_from() as it
won't allocate 0 anyway. So please use memblock_alloc() instead.

> +		if (!new)
> +			goto err_alloc;
> +		pgd_populate(&init_mm, pgd, new);
> +	}
> +
> +	p4d = p4d_offset(pgd, addr);
> +	if (p4d_none(*p4d)) {
> +		pud_t *new;
> +
> +		new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PAGE_SIZE);

See above.

> +		if (!new)
> +			goto err_alloc;
> +		p4d_populate(&init_mm, p4d, new);
> +	}
> +
> +	pud = pud_offset(p4d, addr);
> +	if (pud_none(*pud)) {
> +		pmd_t *new;
> +
> +		new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PAGE_SIZE);

See above.

> +		if (!new)
> +			goto err_alloc;
> +		pud_populate(&init_mm, pud, new);
> +	}
> +
> +	pmd = pmd_offset(pud, addr);
> +	if (!pmd_present(*pmd)) {
> +		pte_t *new;
> +
> +		new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PAGE_SIZE);

See above.

> +		if (!new)
> +			goto err_alloc;
> +		pmd_populate_kernel(&init_mm, pmd, new);
> +	}
> +
> +	return;
> +
> +err_alloc:
> +	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> +	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> +}
> +
>  /**
>   * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
>   * @reserved_size: the size of reserved percpu area in bytes
>   * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> - * @populate_pte_fn: function to populate pte
>   *
>   * This is a helper to ease setting up page-remapped first percpu
>   * chunk and can be called where pcpu_setup_first_chunk() is expected.
> @@ -3177,9 +3246,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>   * RETURNS:
>   * 0 on success, -errno on failure.
>   */
> -int __init pcpu_page_first_chunk(size_t reserved_size,
> -				 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> -				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
> +int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>  {
>  	static struct vm_struct vm;
>  	struct pcpu_alloc_info *ai;
> @@ -3243,7 +3310,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
>  			(unsigned long)vm.addr + unit * ai->unit_size;
>  
>  		for (i = 0; i < unit_pages; i++)
> -			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
> +			pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
>  
>  		/* pte already populated, the following shouldn't fail */
>  		rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
> -- 
> 2.26.2
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
  2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
                   ` (4 preceding siblings ...)
  2021-11-29  2:51 ` [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
@ 2021-11-29 22:55 ` Dennis Zhou
  2021-11-30  6:53   ` Kefeng Wang
  5 siblings, 1 reply; 19+ messages in thread
From: Dennis Zhou @ 2021-11-29 22:55 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86

Hello,

On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote:
> When support page mapping percpu first chunk allocator on arm64, we
> found there are lots of duplicated codes in percpu embed/page first
> chunk allocator. This patchset is aimed to cleanup them and should
> no funciton change, only test on arm64.
> 
> Kefeng Wang (4):
>   mm: percpu: Generalize percpu related config
>   mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>   mm: percpu: Add generic pcpu_fc_alloc/free funciton
>   mm: percpu: Add generic pcpu_populate_pte() function
> 
>  arch/arm64/Kconfig             |  20 +----
>  arch/ia64/Kconfig              |   9 +--
>  arch/mips/Kconfig              |  10 +--
>  arch/mips/mm/init.c            |  14 +---
>  arch/powerpc/Kconfig           |  17 +---
>  arch/powerpc/kernel/setup_64.c |  92 +--------------------
>  arch/riscv/Kconfig             |  10 +--
>  arch/sparc/Kconfig             |  12 +--
>  arch/sparc/kernel/smp_64.c     | 105 +-----------------------
>  arch/x86/Kconfig               |  17 +---
>  arch/x86/kernel/setup_percpu.c |  66 ++-------------
>  drivers/base/arch_numa.c       |  68 +---------------
>  include/linux/percpu.h         |  13 +--
>  mm/Kconfig                     |  12 +++
>  mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
>  15 files changed, 165 insertions(+), 443 deletions(-)
> 
> -- 
> 2.26.2
> 

I've made a few comments. I think this will be a little bit of a
challenge to get through due to it touching so many architectures. For
ease, it probably makes sense to run it through mny tree, but we'll need
explicit acks as I mentioned.

I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem()
functions. However, let's keep the implementation identical to x86.


I don't think we should get rid of the populate_pte_fn(). I'm not
comfortable changing x86's implementation. Simply offer a NULL, and if
NULL use the default.

Do you have a tree that intel pulls? I suggest cleaning up the patches
and pushing to a remote branch that they pick up. That would have caught
the mips typo. Send a PR creating a file in [1] for your branch, github
is fine. Basic validation needs to be done before I can pick this up
too on more than arm64.

[1] https://github.com/intel/lkp-tests/tree/master/repo/linux

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config
  2021-11-29 22:36   ` Dennis Zhou
@ 2021-11-30  6:22     ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-30  6:22 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/30 6:36, Dennis Zhou wrote:
> Hello,
>
> On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
>> The HAVE_SETUP_PER_CPU_AREA/NEED_PER_CPU_EMBED_FIRST_CHUNK/
>> NEED_PER_CPU_PAGE_FIRST_CHUNK/USE_PERCPU_NUMA_NODE_ID configs,
>> which has duplicate definitions on platforms that subscribe it.
>>
>> Move them into mm, drop these redundant definitions and instead
>> just select it on applicable platforms.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   arch/arm64/Kconfig   | 20 ++++----------------
>>   arch/ia64/Kconfig    |  9 ++-------
>>   arch/mips/Kconfig    | 10 ++--------
>>   arch/powerpc/Kconfig | 17 ++++-------------
>>   arch/riscv/Kconfig   | 10 ++--------
>>   arch/sparc/Kconfig   | 12 +++---------
>>   arch/x86/Kconfig     | 17 ++++-------------
>>   mm/Kconfig           | 12 ++++++++++++
>>   8 files changed, 33 insertions(+), 74 deletions(-)
...
>>
> This makes sense and looks good. A series like this is a little tricky.
> The latter patches change the contracts so it'd be easiest to run it
> through my tree. We'd need to get explicit acks from each arch
> maintainer to make sure they're fine with this.

Got it, I will resend without RFC and hope to get ACKs from related arch 
maintainers.


>
> Thanks,
> Dennis
> .


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
  2021-11-29 22:40   ` Dennis Zhou
@ 2021-11-30  6:22     ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-30  6:22 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/30 6:40, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:55PM +0800, Kefeng Wang wrote:
>> Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t,
>> pcpu first chunk allocation will call it to alloc memblock on the
>> corresponding node by it.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   arch/mips/mm/init.c            | 12 +++++++++---
>>   arch/powerpc/kernel/setup_64.c | 14 +++++++++++---
>>   arch/sparc/kernel/smp_64.c     |  8 +++++---
>>   arch/x86/kernel/setup_percpu.c | 18 +++++++++++++-----
>>   drivers/base/arch_numa.c       |  8 +++++---
>>   include/linux/percpu.h         |  7 +++++--
>>   mm/percpu.c                    | 14 +++++++++-----
>>   7 files changed, 57 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
...
>> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
>> index 6052f5d5ded3..9a5609c821df 100644
>> --- a/arch/powerpc/kernel/setup_64.c
>> +++ b/arch/powerpc/kernel/setup_64.c
>> @@ -771,6 +771,12 @@ void __init emergency_stack_init(void)
>>   }
>>   
>>   #ifdef CONFIG_SMP
>> +
>> +static __init int pcpu_cpu_to_node(int cpu)
>> +{
>> +	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>> +}
>> +
>>   /**
>>    * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
>>    * @cpu: cpu to allocate for
>> @@ -784,12 +790,12 @@ void __init emergency_stack_init(void)
>>    * RETURNS:
>>    * Pointer to the allocated area on success, NULL on failure.
>>    */
>> -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
>> -					size_t align)
>> +static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, size_t align,
>> +					pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>>   {
>>   	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>>   #ifdef CONFIG_NUMA
>> -	int node = early_cpu_to_node(cpu);
>> +	int node = cpu_to_nd_fun(cpu);
> ^ typo - cpu_to_nd_fn().

Will fix.

...

>>   	if (rc < 0)
>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index ae4004e7957e..41bb54715b0c 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>>   
>>   extern enum pcpu_fc pcpu_chosen_fc;
>>   
>> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
>> -				     size_t align);
>> +typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
>> +				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>>   typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>>   typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>>   typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>> @@ -111,6 +112,7 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>>   extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>>   				size_t atom_size,
>>   				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
>> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>>   				pcpu_fc_alloc_fn_t alloc_fn,
>>   				pcpu_fc_free_fn_t free_fn);
>>   #endif
>> @@ -119,6 +121,7 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>>   extern int __init pcpu_page_first_chunk(size_t reserved_size,
>>   				pcpu_fc_alloc_fn_t alloc_fn,
>>   				pcpu_fc_free_fn_t free_fn,
>> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>>   				pcpu_fc_populate_pte_fn_t populate_pte_fn);
>>   #endif
> Be consistent here. In pcpu_setup_first_chunk() you add the
> cpu_to_node() before alloc()/free() and then in pcpu_embed_first_chunk()
> you add it after. I'd prefer to add it before as to keep the
> cpu_distance()/cpu_to_node() grouping.
Sure, will adjust the order.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton
  2021-11-29 22:45   ` Dennis Zhou
@ 2021-11-30  6:27     ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-30  6:27 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/30 6:45, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:56PM +0800, Kefeng Wang wrote:
>> With previous patch, we could add a generic pcpu first chunk
>> allocation and free function to cleanup the duplicated definations
>> on each architecture.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   arch/mips/mm/init.c            | 16 +--------
>>   arch/powerpc/kernel/setup_64.c | 51 ++------------------------
>>   arch/sparc/kernel/smp_64.c     | 50 +-------------------------
>>   arch/x86/kernel/setup_percpu.c | 59 +-----------------------------
>>   drivers/base/arch_numa.c       | 19 +---------
>>   include/linux/percpu.h         |  9 +----
>>   mm/percpu.c                    | 66 ++++++++++++++++++----------------
>>   7 files changed, 42 insertions(+), 228 deletions(-)
...
>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index 41bb54715b0c..d73c97ef4ff4 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>>   extern enum pcpu_fc pcpu_chosen_fc;
>>   
>>   typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align,
>> -				     pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>> -typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
>>   typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>>   typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>>   
>> @@ -112,15 +109,11 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
>>   extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>>   				size_t atom_size,
>>   				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
>> -				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> -				pcpu_fc_alloc_fn_t alloc_fn,
>> -				pcpu_fc_free_fn_t free_fn);
>> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>>   #endif
>>   
>>   #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>>   extern int __init pcpu_page_first_chunk(size_t reserved_size,
>> -				pcpu_fc_alloc_fn_t alloc_fn,
>> -				pcpu_fc_free_fn_t free_fn,
>>   				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>>   				pcpu_fc_populate_pte_fn_t populate_pte_fn);
>>   #endif
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index 3f6cf1ff0be2..efaa1cbaf73d 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -2992,6 +2992,30 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
>>   
>>   	return ai;
>>   }
>> +
>> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
>> +				   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
>> +{
>> +	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
>> +	int node = NUMA_NO_NODE;
>> +	void *ptr;
>> +
>> +	if (cpu_to_nd_fn)
>> +		node = cpu_to_nd_fn(cpu);
>> +
>> +	if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
>> +		ptr = memblock_alloc_from(size, align, goal);
>> +	} else {
>> +		ptr = memblock_alloc_try_nid(size, align, goal,
>> +					     MEMBLOCK_ALLOC_ACCESSIBLE, node);
>> +	}
>> +	return ptr;
>> +}
> My preference here would be to keep this identical to the x86
> implementation where we #ifdef CONFIG_NUMA.

I will add back '#ifdef CONFIG_NUMA',  also will add back the 
pr_debug/pr_info part as x86 does,

> Overall this makes sense.
>
> Thanks,
> Dennis
> .


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function
  2021-11-29 22:49   ` Dennis Zhou
@ 2021-11-30  6:42     ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-30  6:42 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/30 6:49, Dennis Zhou wrote:
> On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
>> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
>> populate pte, add a generic pcpu populate pte function and switch
>> to use it.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   arch/powerpc/kernel/setup_64.c | 47 +--------------------
>>   arch/sparc/kernel/smp_64.c     | 57 +------------------------
>>   arch/x86/kernel/setup_percpu.c |  5 +--
>>   drivers/base/arch_numa.c       | 51 +---------------------
>>   include/linux/percpu.h         |  5 +--
>>   mm/percpu.c                    | 77 +++++++++++++++++++++++++++++++---
>>   6 files changed, 79 insertions(+), 163 deletions(-)
>>
...
>> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
>> index cd672bd46241..4eadbe45078e 100644
>> --- a/arch/x86/kernel/setup_percpu.c
>> +++ b/arch/x86/kernel/setup_percpu.c
>> @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
>>   	return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
>>   }
>>   
>> -static void __init pcpup_populate_pte(unsigned long addr)
>> +void __init pcpu_populate_pte(unsigned long addr)
>>   {
>>   	populate_extra_pte(addr);
>>   }
>> @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
>>   	}
>>   	if (rc < 0)
>>   		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
>> -					   pcpu_cpu_to_node,
>> -					   pcpup_populate_pte);
>> +					   pcpu_cpu_to_node);
> x86 has it's own implementation that differs for 32 bit. I'm not
> confident this is correct to drop in as a replacement for x86, so I'd
> prefer to keep populate_pte_fn() around.


The x86's pcpup_populate_pte() version is not dropped.

We define a __weak pcpu_populate_pte function in mm/percpu.c,

and there is a own version on x86,  so no function change on x86.

I will add this into changelog,

arch/x86/kernel/setup_percpu.c:  void __init pcpu_populate_pte(unsigned 
long addr)
include/linux/percpu.h:                 void __init 
pcpu_populate_pte(unsigned long addr);
mm/percpu.c:                                void __init __weak 
pcpu_populate_pte(unsigned long addr)
mm/percpu.c: pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));


>> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
>> index d73c97ef4ff4..f1ec5ad1351c 100644
>> --- a/include/linux/percpu.h
>> +++ b/include/linux/percpu.h
>> @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
>>   extern enum pcpu_fc pcpu_chosen_fc;
>>   
>>   typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
>> -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
>>   typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>>   
>>   extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
>> @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>>   #endif
>>   
>>   #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
>> +void __init pcpu_populate_pte(unsigned long addr);
>>   extern int __init pcpu_page_first_chunk(size_t reserved_size,
>> -				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
>> -				pcpu_fc_populate_pte_fn_t populate_pte_fn);
>> +				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
>>   #endif
>>   
>>   extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index efaa1cbaf73d..d907daed04eb 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
>>   #endif /* BUILD_EMBED_FIRST_CHUNK */
>>   
...
>> +void __init __weak pcpu_populate_pte(unsigned long addr)
>> +{
>> +	pgd_t *pgd = pgd_offset_k(addr);
>> +	p4d_t *p4d;
>> +	pud_t *pud;
>> +	pmd_t *pmd;
>> +
>> +	if (pgd_none(*pgd)) {
>> +		p4d_t *new;
>> +
>> +		new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);
> It's unnecessary to specify a min_addr to memblock_alloc_from() as it
> won't allocate 0 anyway. So please use memblock_alloc() instead.

ok, will use memblock_alloc in this function



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton
  2021-11-29 22:55 ` Dennis Zhou
@ 2021-11-30  6:53   ` Kefeng Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Kefeng Wang @ 2021-11-30  6:53 UTC (permalink / raw)
  To: Dennis Zhou
  Cc: akpm, linux-kernel, linux-mm, tj, gregkh, cl, catalin.marinas,
	will, tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou,
	davem, tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel,
	linux-ia64, linux-mips, linuxppc-dev, linux-riscv, sparclinux,
	x86


On 2021/11/30 6:55, Dennis Zhou wrote:
> Hello,
>
> On Sun, Nov 21, 2021 at 05:35:53PM +0800, Kefeng Wang wrote:
>> When support page mapping percpu first chunk allocator on arm64, we
>> found there are lots of duplicated codes in percpu embed/page first
>> chunk allocator. This patchset is aimed to cleanup them and should
>> no funciton change, only test on arm64.
>>
>> Kefeng Wang (4):
>>    mm: percpu: Generalize percpu related config
>>    mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef
>>    mm: percpu: Add generic pcpu_fc_alloc/free funciton
>>    mm: percpu: Add generic pcpu_populate_pte() function
>>
>>   arch/arm64/Kconfig             |  20 +----
>>   arch/ia64/Kconfig              |   9 +--
>>   arch/mips/Kconfig              |  10 +--
>>   arch/mips/mm/init.c            |  14 +---
>>   arch/powerpc/Kconfig           |  17 +---
>>   arch/powerpc/kernel/setup_64.c |  92 +--------------------
>>   arch/riscv/Kconfig             |  10 +--
>>   arch/sparc/Kconfig             |  12 +--
>>   arch/sparc/kernel/smp_64.c     | 105 +-----------------------
>>   arch/x86/Kconfig               |  17 +---
>>   arch/x86/kernel/setup_percpu.c |  66 ++-------------
>>   drivers/base/arch_numa.c       |  68 +---------------
>>   include/linux/percpu.h         |  13 +--
>>   mm/Kconfig                     |  12 +++
>>   mm/percpu.c                    | 143 +++++++++++++++++++++++++--------
>>   15 files changed, 165 insertions(+), 443 deletions(-)
>>
>> -- 
>> 2.26.2
>>
> I've made a few comments. I think this will be a little bit of a
> challenge to get through due to it touching so many architectures. For
> ease, it probably makes sense to run it through mny tree, but we'll need
> explicit acks as I mentioned.
>
> I like getting rid of the pcpu_alloc_bootmem()/pcpu_free_bootmem()
> functions. However, let's keep the implementation identical to x86.
ok , will change patch3 in v2
>
>
> I don't think we should get rid of the populate_pte_fn(). I'm not
> comfortable changing x86's implementation. Simply offer a NULL, and if
> NULL use the default.

As replied in patch4, we use __weak method, and x86's implementation is

not changed in patch4, is this ok?

>
> Do you have a tree that intel pulls? I suggest cleaning up the patches
> and pushing to a remote branch that they pick up. That would have caught
> the mips typo. Send a PR creating a file in [1] for your branch, github
> is fine. Basic validation needs to be done before I can pick this up
> too on more than arm64.

Ok, x86/arm64/riscv are tested, but I don't has ppc/mips/sparc compliler.

I will try to push new version into github and test by lkp.

Thanks.

>
> [1] https://github.com/intel/lkp-tests/tree/master/repo/linux
>
> Thanks,
> Dennis
> .


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC 1/4] mm: percpu: Generalize percpu related config
  2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
  2021-11-29 22:36   ` Dennis Zhou
@ 2021-12-03 18:54   ` Catalin Marinas
  1 sibling, 0 replies; 19+ messages in thread
From: Catalin Marinas @ 2021-12-03 18:54 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: dennis, akpm, linux-kernel, linux-mm, tj, gregkh, cl, will,
	tsbogend, mpe, benh, paulus, paul.walmsley, palmer, aou, davem,
	tglx, mingo, bp, dave.hansen, hpa, linux-arm-kernel, linux-ia64,
	linux-mips, linuxppc-dev, linux-riscv, sparclinux, x86

On Sun, Nov 21, 2021 at 05:35:54PM +0800, Kefeng Wang wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index c4207cf9bb17..4ff73299f8a9 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1135,6 +1135,10 @@ config NUMA
>  	select GENERIC_ARCH_NUMA
>  	select ACPI_NUMA if ACPI
>  	select OF_NUMA
> +	select HAVE_SETUP_PER_CPU_AREA
> +	select NEED_PER_CPU_EMBED_FIRST_CHUNK
> +	select NEED_PER_CPU_PAGE_FIRST_CHUNK
> +	select USE_PERCPU_NUMA_NODE_ID
>  	help
>  	  Enable NUMA (Non-Uniform Memory Access) support.
>  
> @@ -1151,22 +1155,6 @@ config NODES_SHIFT
>  	  Specify the maximum number of NUMA Nodes available on the target
>  	  system.  Increases memory reserved to accommodate various tables.
>  
> -config USE_PERCPU_NUMA_NODE_ID
> -	def_bool y
> -	depends on NUMA
> -
> -config HAVE_SETUP_PER_CPU_AREA
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_EMBED_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
> -config NEED_PER_CPU_PAGE_FIRST_CHUNK
> -	def_bool y
> -	depends on NUMA
> -
>  source "kernel/Kconfig.hz"
>  
>  config ARCH_SPARSEMEM_ENABLE

For arm64:

Acked-by: Catalin Marinas <catalin.marinas@arm.com>


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2021-12-03 18:55 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-21  9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
2021-11-21  9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
2021-11-29 22:36   ` Dennis Zhou
2021-11-30  6:22     ` Kefeng Wang
2021-12-03 18:54   ` Catalin Marinas
2021-11-21  9:35 ` [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef Kefeng Wang
2021-11-29 22:40   ` Dennis Zhou
2021-11-30  6:22     ` Kefeng Wang
2021-11-21  9:35 ` [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton Kefeng Wang
2021-11-29 22:45   ` Dennis Zhou
2021-11-30  6:27     ` Kefeng Wang
2021-11-21  9:35 ` [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function Kefeng Wang
2021-11-29 22:49   ` Dennis Zhou
2021-11-30  6:42     ` Kefeng Wang
2021-11-29  2:51 ` [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
2021-11-29  2:54   ` Dennis Zhou
2021-11-29  3:06     ` Kefeng Wang
2021-11-29 22:55 ` Dennis Zhou
2021-11-30  6:53   ` Kefeng Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).