From: Dennis Zhou <dennis@kernel.org>
To: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, tj@kernel.org, gregkh@linuxfoundation.org,
cl@linux.com, catalin.marinas@arm.com, will@kernel.org,
tsbogend@alpha.franken.de, mpe@ellerman.id.au,
benh@kernel.crashing.org, paulus@samba.org,
paul.walmsley@sifive.com, palmer@dabbelt.com,
aou@eecs.berkeley.edu, davem@davemloft.net, tglx@linutronix.de,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
hpa@zytor.com, linux-arm-kernel@lists.infradead.org,
linux-ia64@vger.kernel.org, linux-mips@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, linux-riscv@lists.infradead.org,
sparclinux@vger.kernel.org, x86@kernel.org
Subject: Re: [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function
Date: Mon, 29 Nov 2021 17:49:43 -0500 [thread overview]
Message-ID: <YaVZB/Lcis4iQ3RY@fedora> (raw)
In-Reply-To: <20211121093557.139034-5-wangkefeng.wang@huawei.com>
On Sun, Nov 21, 2021 at 05:35:57PM +0800, Kefeng Wang wrote:
> When NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to
> populate pte, add a generic pcpu populate pte function and switch
> to use it.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
> arch/powerpc/kernel/setup_64.c | 47 +--------------------
> arch/sparc/kernel/smp_64.c | 57 +------------------------
> arch/x86/kernel/setup_percpu.c | 5 +--
> drivers/base/arch_numa.c | 51 +---------------------
> include/linux/percpu.h | 5 +--
> mm/percpu.c | 77 +++++++++++++++++++++++++++++++---
> 6 files changed, 79 insertions(+), 163 deletions(-)
>
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 364b1567f822..1a17828af77f 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -788,51 +788,6 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
> unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> EXPORT_SYMBOL(__per_cpu_offset);
>
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> -
> void __init setup_per_cpu_areas(void)
> {
> const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
> @@ -861,7 +816,7 @@ void __init setup_per_cpu_areas(void)
> }
>
> if (rc < 0)
> - rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
> index 198dadddb75d..00dffe2d834b 100644
> --- a/arch/sparc/kernel/smp_64.c
> +++ b/arch/sparc/kernel/smp_64.c
> @@ -1534,59 +1534,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return REMOTE_DISTANCE;
> }
>
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - if (pgd_none(*pgd)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pgd_populate(&init_mm, pgd, new);
> - }
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -1604,9 +1551,7 @@ void __init setup_per_cpu_areas(void)
> pcpu_fc_names[pcpu_chosen_fc], rc);
> }
> if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - cpu_to_node,
> - pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_node);
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index cd672bd46241..4eadbe45078e 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int cpu)
> return IS_ENABLED(CONFIG_NUMA) ? early_cpu_to_node(cpu) : NUMA_NO_NODE;
> }
>
> -static void __init pcpup_populate_pte(unsigned long addr)
> +void __init pcpu_populate_pte(unsigned long addr)
> {
> populate_extra_pte(addr);
> }
> @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void)
> }
> if (rc < 0)
> rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
> - pcpu_cpu_to_node,
> - pcpup_populate_pte);
> + pcpu_cpu_to_node);
x86 has it's own implementation that differs for 32 bit. I'm not
confident this is correct to drop in as a replacement for x86, so I'd
prefer to keep populate_pte_fn() around.
> if (rc < 0)
> panic("cannot initialize percpu area (err=%d)", rc);
>
> diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
> index 23a10cc36165..eaa31e567d1e 100644
> --- a/drivers/base/arch_numa.c
> +++ b/drivers/base/arch_numa.c
> @@ -14,7 +14,6 @@
> #include <linux/of.h>
>
> #include <asm/sections.h>
> -#include <asm/pgalloc.h>
>
> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
> EXPORT_SYMBOL(node_data);
> @@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
> }
>
> -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> -static void __init pcpu_populate_pte(unsigned long addr)
> -{
> - pgd_t *pgd = pgd_offset_k(addr);
> - p4d_t *p4d;
> - pud_t *pud;
> - pmd_t *pmd;
> -
> - p4d = p4d_offset(pgd, addr);
> - if (p4d_none(*p4d)) {
> - pud_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - p4d_populate(&init_mm, p4d, new);
> - }
> -
> - pud = pud_offset(p4d, addr);
> - if (pud_none(*pud)) {
> - pmd_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pud_populate(&init_mm, pud, new);
> - }
> -
> - pmd = pmd_offset(pud, addr);
> - if (!pmd_present(*pmd)) {
> - pte_t *new;
> -
> - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> - if (!new)
> - goto err_alloc;
> - pmd_populate_kernel(&init_mm, pmd, new);
> - }
> -
> - return;
> -
> -err_alloc:
> - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> -}
> -#endif
> -
> void __init setup_per_cpu_areas(void)
> {
> unsigned long delta;
> @@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void)
>
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> if (rc < 0)
> - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
> - early_cpu_to_node,
> - pcpu_populate_pte);
> + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
> #endif
> if (rc < 0)
> panic("Failed to initialize percpu areas (err=%d).", rc);
> diff --git a/include/linux/percpu.h b/include/linux/percpu.h
> index d73c97ef4ff4..f1ec5ad1351c 100644
> --- a/include/linux/percpu.h
> +++ b/include/linux/percpu.h
> @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
> extern enum pcpu_fc pcpu_chosen_fc;
>
> typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
> -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
> typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
>
> extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
> @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> #endif
>
> #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
> +void __init pcpu_populate_pte(unsigned long addr);
> extern int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_populate_pte_fn_t populate_pte_fn);
> + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
> #endif
>
> extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index efaa1cbaf73d..d907daed04eb 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -3162,11 +3162,80 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> #endif /* BUILD_EMBED_FIRST_CHUNK */
>
> #ifdef BUILD_PAGE_FIRST_CHUNK
> +#include <asm/pgalloc.h>
> +
> +#ifndef P4D_TABLE_SIZE
> +#define P4D_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PUD_TABLE_SIZE
> +#define PUD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PMD_TABLE_SIZE
> +#define PMD_TABLE_SIZE PAGE_SIZE
> +#endif
> +
> +#ifndef PTE_TABLE_SIZE
> +#define PTE_TABLE_SIZE PAGE_SIZE
> +#endif
> +void __init __weak pcpu_populate_pte(unsigned long addr)
> +{
> + pgd_t *pgd = pgd_offset_k(addr);
> + p4d_t *p4d;
> + pud_t *pud;
> + pmd_t *pmd;
> +
> + if (pgd_none(*pgd)) {
> + p4d_t *new;
> +
> + new = memblock_alloc_from(P4D_TABLE_SIZE, P4D_TABLE_SIZE, PAGE_SIZE);
It's unnecessary to specify a min_addr to memblock_alloc_from() as it
won't allocate 0 anyway. So please use memblock_alloc() instead.
> + if (!new)
> + goto err_alloc;
> + pgd_populate(&init_mm, pgd, new);
> + }
> +
> + p4d = p4d_offset(pgd, addr);
> + if (p4d_none(*p4d)) {
> + pud_t *new;
> +
> + new = memblock_alloc_from(PUD_TABLE_SIZE, PUD_TABLE_SIZE, PAGE_SIZE);
See above.
> + if (!new)
> + goto err_alloc;
> + p4d_populate(&init_mm, p4d, new);
> + }
> +
> + pud = pud_offset(p4d, addr);
> + if (pud_none(*pud)) {
> + pmd_t *new;
> +
> + new = memblock_alloc_from(PMD_TABLE_SIZE, PMD_TABLE_SIZE, PAGE_SIZE);
See above.
> + if (!new)
> + goto err_alloc;
> + pud_populate(&init_mm, pud, new);
> + }
> +
> + pmd = pmd_offset(pud, addr);
> + if (!pmd_present(*pmd)) {
> + pte_t *new;
> +
> + new = memblock_alloc_from(PTE_TABLE_SIZE, PTE_TABLE_SIZE, PAGE_SIZE);
See above.
> + if (!new)
> + goto err_alloc;
> + pmd_populate_kernel(&init_mm, pmd, new);
> + }
> +
> + return;
> +
> +err_alloc:
> + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
> + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
> +}
> +
> /**
> * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
> * @reserved_size: the size of reserved percpu area in bytes
> * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
> - * @populate_pte_fn: function to populate pte
> *
> * This is a helper to ease setting up page-remapped first percpu
> * chunk and can be called where pcpu_setup_first_chunk() is expected.
> @@ -3177,9 +3246,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
> * RETURNS:
> * 0 on success, -errno on failure.
> */
> -int __init pcpu_page_first_chunk(size_t reserved_size,
> - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn,
> - pcpu_fc_populate_pte_fn_t populate_pte_fn)
> +int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
> {
> static struct vm_struct vm;
> struct pcpu_alloc_info *ai;
> @@ -3243,7 +3310,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
> (unsigned long)vm.addr + unit * ai->unit_size;
>
> for (i = 0; i < unit_pages; i++)
> - populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
> + pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
>
> /* pte already populated, the following shouldn't fail */
> rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
> --
> 2.26.2
>
next prev parent reply other threads:[~2021-11-29 22:50 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-21 9:35 [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
2021-11-21 9:35 ` [PATCH RFC 1/4] mm: percpu: Generalize percpu related config Kefeng Wang
2021-11-29 22:36 ` Dennis Zhou
2021-11-30 6:22 ` Kefeng Wang
2021-12-03 18:54 ` Catalin Marinas
2021-11-21 9:35 ` [PATCH RFC 2/4] mm: percpu: Add pcpu_fc_cpu_to_node_fn_t typedef Kefeng Wang
2021-11-29 22:40 ` Dennis Zhou
2021-11-30 6:22 ` Kefeng Wang
2021-11-21 9:35 ` [PATCH RFC 3/4] mm: percpu: Add generic pcpu_fc_alloc/free funciton Kefeng Wang
2021-11-29 22:45 ` Dennis Zhou
2021-11-30 6:27 ` Kefeng Wang
2021-11-21 9:35 ` [PATCH RFC 4/4] mm: percpu: Add generic pcpu_populate_pte() function Kefeng Wang
2021-11-29 22:49 ` Dennis Zhou [this message]
2021-11-30 6:42 ` Kefeng Wang
2021-11-29 2:51 ` [PATCH RFC 0/4] mm: percpu: Cleanup percpu first chunk funciton Kefeng Wang
2021-11-29 2:54 ` Dennis Zhou
2021-11-29 3:06 ` Kefeng Wang
2021-11-29 22:55 ` Dennis Zhou
2021-11-30 6:53 ` Kefeng Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YaVZB/Lcis4iQ3RY@fedora \
--to=dennis@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=aou@eecs.berkeley.edu \
--cc=benh@kernel.crashing.org \
--cc=bp@alien8.de \
--cc=catalin.marinas@arm.com \
--cc=cl@linux.com \
--cc=dave.hansen@linux.intel.com \
--cc=davem@davemloft.net \
--cc=gregkh@linuxfoundation.org \
--cc=hpa@zytor.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mips@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-riscv@lists.infradead.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mingo@redhat.com \
--cc=mpe@ellerman.id.au \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=paulus@samba.org \
--cc=sparclinux@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=tsbogend@alpha.franken.de \
--cc=wangkefeng.wang@huawei.com \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).