linux-riscv.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Anup Patel <anup@brainfault.org>
To: Greentime Hu <greentime.hu@sifive.com>
Cc: Mark Rutland <mark.rutland@arm.com>,
	Ganapatrao Prabhakerrao Kulkarni <gkulkarni@marvell.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	"linux-kernel@vger.kernel.org List"
	<linux-kernel@vger.kernel.org>,
	Mike Rapoport <rppt@linux.ibm.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Greentime Hu <green.hu@gmail.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	greentime@kernel.org,
	linux-riscv <linux-riscv@lists.infradead.org>,
	Will Deacon <will@kernel.org>, Christoph Hellwig <hch@lst.de>
Subject: Re: [RFC PATCH] riscv: Add numa support for riscv64 platform
Date: Tue, 24 Dec 2019 17:14:52 +0530	[thread overview]
Message-ID: <CAAhSdy2SV7WPKJGGXAaX8xN3rOSg-r0itDw4fHjoThgOuC5t2Q@mail.gmail.com> (raw)
In-Reply-To: <CAHCEehJKkgxMd3i-K4N6uzQmJ2nYEtB0w7nBVr1-Lh7xYSi6+A@mail.gmail.com>

On Tue, Dec 24, 2019 at 5:03 PM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> Hi Anup,
>
> On Tue, Dec 24, 2019 at 6:28 PM Anup Patel <anup@brainfault.org> wrote:
> >
> > +Christoph, +Mike, +Ganpatro
> >
> > On Tue, Dec 24, 2019 at 2:25 PM Greentime Hu <greentime.hu@sifive.com> wrote:
> > >
> > > This implementation is based on arm64 porting. It is tested with
> > > qemu-system-riscv64, unleashed board and OmniXtend FPGA platform.
> > >
> > > There will be 2 nodes in /sys/devices/system/node if it is described in dts and
> > > CONFIG_NUMA is enabled. We can use numastat/numactl/numademo to see its status.
> >
> > This patch can be broken down into separate (more granular) patches.
> > For example:
> > 1. asm/pgtable.h change can be separate patch
> > 2. Movement of unflatten_device_tree() from setup_arch() to paging_init()
> > 3. changes in kernel/smpboot.c can also be separate patch
> >
> > Also, since this is ported from arm64 implementation, I strongly
> > suggest having a generic NUMA support which can be shared
> > between arm64 and riscv. I think Ganpat (CC'ed) here could be
> > the best person to maintain the generic NUMA support since he
> > originally added it for arm64.
> >
> > >
> > > Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> > > ---
> > >  arch/riscv/Kconfig               |  30 ++-
> > >  arch/riscv/include/asm/mmzone.h  |  13 ++
> > >  arch/riscv/include/asm/numa.h    |  46 ++++
> > >  arch/riscv/include/asm/pci.h     |  10 +
> > >  arch/riscv/include/asm/pgtable.h |  20 ++
> > >  arch/riscv/kernel/setup.c        |  26 ++-
> > >  arch/riscv/kernel/smpboot.c      |  20 +-
> > >  arch/riscv/mm/Makefile           |   1 +
> > >  arch/riscv/mm/init.c             |   3 +
> > >  arch/riscv/mm/numa.c             | 372 +++++++++++++++++++++++++++++++
> > >  10 files changed, 536 insertions(+), 5 deletions(-)
> > >  create mode 100644 arch/riscv/include/asm/mmzone.h
> > >  create mode 100644 arch/riscv/include/asm/numa.h
> > >  create mode 100644 arch/riscv/mm/numa.c
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index bc7598fc5f00..53ae1816df50 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -22,7 +22,6 @@ config RISCV
> > >         select CLONE_BACKWARDS
> > >         select COMMON_CLK
> > >         select GENERIC_CLOCKEVENTS
> > > -       select GENERIC_CPU_DEVICES
> > >         select GENERIC_IRQ_SHOW
> > >         select GENERIC_PCI_IOMAP
> > >         select GENERIC_SCHED_CLOCK
> > > @@ -234,6 +233,35 @@ config TUNE_GENERIC
> > >         bool "generic"
> > >
> > >  endchoice
> > > +# Common NUMA Features
> > > +config NUMA
> > > +       bool "Numa Memory Allocation and Scheduler Support"
> > > +       select OF_NUMA
> > > +       select ARCH_SUPPORTS_NUMA_BALANCING
> > > +       depends on SPARSEMEM
> > > +       help
> > > +         Enable NUMA (Non Uniform Memory Access) support.
> > > +
> > > +         The kernel will try to allocate memory used by a CPU on the
> > > +         local memory of the CPU and add some more
> > > +         NUMA awareness to the kernel.
> > > +
> > > +config NODES_SHIFT
> > > +       int "Maximum NUMA Nodes (as a power of 2)"
> > > +       range 1 10
> > > +       default "2"
> > > +       depends on NEED_MULTIPLE_NODES
> > > +       help
> > > +         Specify the maximum number of NUMA Nodes available on the target
> > > +         system.  Increases memory reserved to accommodate various tables.
> > > +
> > > +config USE_PERCPU_NUMA_NODE_ID
> > > +       def_bool y
> > > +       depends on NUMA
> > > +
> > > +config NEED_PER_CPU_EMBED_FIRST_CHUNK
> > > +       def_bool y
> > > +       depends on NUMA
> > >
> > >  config RISCV_ISA_C
> > >         bool "Emit compressed instructions when building Linux"
> > > diff --git a/arch/riscv/include/asm/mmzone.h b/arch/riscv/include/asm/mmzone.h
> > > new file mode 100644
> > > index 000000000000..fa17e01d9ab2
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/mmzone.h
> > > @@ -0,0 +1,13 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef __ASM_MMZONE_H
> > > +#define __ASM_MMZONE_H
> > > +
> > > +#ifdef CONFIG_NUMA
> > > +
> > > +#include <asm/numa.h>
> > > +
> > > +extern struct pglist_data *node_data[];
> > > +#define NODE_DATA(nid)         (node_data[(nid)])
> > > +
> > > +#endif /* CONFIG_NUMA */
> > > +#endif /* __ASM_MMZONE_H */
> > > diff --git a/arch/riscv/include/asm/numa.h b/arch/riscv/include/asm/numa.h
> > > new file mode 100644
> > > index 000000000000..10a4513d078b
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/numa.h
> > > @@ -0,0 +1,46 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef __ASM_NUMA_H
> > > +#define __ASM_NUMA_H
> > > +
> > > +#include <asm/topology.h>
> > > +
> > > +#ifdef CONFIG_NUMA
> > > +
> > > +extern nodemask_t numa_nodes_parsed __initdata;
> > > +
> > > +extern bool numa_off;
> > > +
> > > +/* Mappings between node number and cpus on that node. */
> > > +extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
> > > +void numa_clear_node(unsigned int cpu);
> > > +
> > > +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> > > +const struct cpumask *cpumask_of_node(int node);
> > > +#else
> > > +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
> > > +static inline const struct cpumask *cpumask_of_node(int node)
> > > +{
> > > +       return node_to_cpumask_map[node];
> > > +}
> > > +#endif
> > > +
> > > +void __init riscv_numa_init(void);
> > > +int __init numa_add_memblk(int nodeid, u64 start, u64 end);
> > > +void __init numa_set_distance(int from, int to, int distance);
> > > +void __init numa_free_distance(void);
> > > +void __init early_map_cpu_to_node(unsigned int cpu, int nid);
> > > +void numa_store_cpu_info(unsigned int cpu);
> > > +void numa_add_cpu(unsigned int cpu);
> > > +void numa_remove_cpu(unsigned int cpu);
> > > +
> > > +#else  /* CONFIG_NUMA */
> > > +
> > > +static inline void numa_store_cpu_info(unsigned int cpu) { }
> > > +static inline void numa_add_cpu(unsigned int cpu) { }
> > > +static inline void numa_remove_cpu(unsigned int cpu) { }
> > > +static inline void riscv_numa_init(void) { }
> > > +static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
> > > +
> > > +#endif /* CONFIG_NUMA */
> > > +
> > > +#endif /* __ASM_NUMA_H */
> > > diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
> > > index 5ac8daa1cc36..781aa8b6dcd3 100644
> > > --- a/arch/riscv/include/asm/pci.h
> > > +++ b/arch/riscv/include/asm/pci.h
> > > @@ -32,6 +32,16 @@ static inline int pci_proc_domain(struct pci_bus *bus)
> > >         /* always show the domain in /proc */
> > >         return 1;
> > >  }
> > > +
> > > +#ifdef CONFIG_NUMA
> > > +int pcibus_to_node(struct pci_bus *bus);
> > > +#ifndef cpumask_of_pcibus
> > > +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ?            \
> > > +                                cpu_all_mask :                         \
> > > +                                cpumask_of_node(pcibus_to_node(bus)))
> > > +#endif
> > > +#endif /* CONFIG_NUMA */
> > > +
> > >  #endif  /* CONFIG_PCI */
> > >
> > >  #endif  /* __ASM_PCI_H */
> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > index d3221017194d..04b7c38870f7 100644
> > > --- a/arch/riscv/include/asm/pgtable.h
> > > +++ b/arch/riscv/include/asm/pgtable.h
> > > @@ -175,6 +175,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
> > >         return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT);
> > >  }
> > >
> > > +static inline pte_t pmd_pte(pmd_t pmd)
> > > +{
> > > +       return __pte(pmd_val(pmd));
> > > +}
> > > +
> > >  /* Yields the page frame number (PFN) of a page table entry */
> > >  static inline unsigned long pte_pfn(pte_t pte)
> > >  {
> > > @@ -288,6 +293,21 @@ static inline pte_t pte_mkhuge(pte_t pte)
> > >         return pte;
> > >  }
> > >
> > > +#ifdef CONFIG_NUMA_BALANCING
> > > +/*
> > > + * See the comment in include/asm-generic/pgtable.h
> > > + */
> > > +static inline int pte_protnone(pte_t pte)
> > > +{
> > > +       return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
> > > +}
> > > +
> > > +static inline int pmd_protnone(pmd_t pmd)
> > > +{
> > > +       return pte_protnone(pmd_pte(pmd));
> > > +}
> > > +#endif
> > > +
> > >  /* Modify page protection bits */
> > >  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
> > >  {
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 845ae0e12115..f6f2354036a0 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -53,6 +53,31 @@ void __init parse_dtb(void)
> > >  #endif
> > >  }
> > >
> > > +static DEFINE_PER_CPU(struct cpu, cpu_devices);
> > > +
> > > +static int __init topology_init(void)
> > > +{
> > > +       int i, ret;
> > > +
> > > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> > > +       for_each_online_node(i)
> > > +               register_one_node(i);
> > > +#endif
> > > +
> > > +       for_each_possible_cpu(i) {
> > > +               struct cpu *cpu = &per_cpu(cpu_devices, i);
> > > +
> > > +               cpu->hotpluggable = 1;
> >
> > Strange !!!
> >
> > We cannot claim CPUs are hotpluggable until Atish's
> > Linux SBI v0.2 HSM patches are available.
>
> Thanks. It should be set to 0 for now.
> cpu->hotpluggable = 0;
>
> > If required then Linux RISC-V NUMA patches should
> > be based upon Atish's Linux SBI v0.2 HSM support.
> >
> > > +               ret = register_cpu(cpu, i);
> > > +               if (unlikely(ret))
> > > +                       pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
> > > +                              __func__, i, ret);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +subsys_initcall(topology_init);
> > > +
> > >  void __init setup_arch(char **cmdline_p)
> > >  {
> > >         init_mm.start_code = (unsigned long) _stext;
> > > @@ -66,7 +91,6 @@ void __init setup_arch(char **cmdline_p)
> > >
> > >         setup_bootmem();
> > >         paging_init();
> > > -       unflatten_device_tree();
> >
> > Movement of unflatten_device_tree() call from here to
> > paging_init() needs explanation.
> >
>
> It is moved to paging_init() is because that of_numa_init() will use
> of_numa_parse_cpu_nodes() and of_numa_parse_memory_nodes().
> We have to unflatten_device_tree() first then we can call
> riscv_numa_init(), but riscv_numa_init() shall be called before
> memblocks_present() because the node information will be used in
> memblocks_present().
> So the order will be like this.
>
> unflatten_device_tree(); //To get dt information for memory and nodes
> riscv_numa_init(); //It can use of_numa_parse_* and set the nodes information
> memblocks_present(); //The node information can be used now

The reasoning is fine. Please split your patch into smaller patches
with detailed commit description in each patch.

Regards,
Anup


  reply	other threads:[~2019-12-24 11:45 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-24  8:55 [RFC PATCH] riscv: Add numa support for riscv64 platform Greentime Hu
2019-12-24 10:28 ` Anup Patel
2019-12-24 10:58   ` Ganapatrao Kulkarni
2019-12-24 11:33   ` Greentime Hu
2019-12-24 11:44     ` Anup Patel [this message]
2019-12-24 16:02 ` Randy Dunlap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAAhSdy2SV7WPKJGGXAaX8xN3rOSg-r0itDw4fHjoThgOuC5t2Q@mail.gmail.com \
    --to=anup@brainfault.org \
    --cc=catalin.marinas@arm.com \
    --cc=gkulkarni@marvell.com \
    --cc=green.hu@gmail.com \
    --cc=greentime.hu@sifive.com \
    --cc=greentime@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mark.rutland@arm.com \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=rppt@linux.ibm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).