linux-riscv.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Greentime Hu <green.hu@gmail.com>
To: Logan Gunthorpe <logang@deltatee.com>,
	greentime.hu@sifive.com, paul.walmsley@sifive.com
Cc: Rob Herring <robh@kernel.org>, Albert Ou <aou@eecs.berkeley.edu>,
	Andrew Waterman <andrew@sifive.com>,
	Palmer Dabbelt <palmer@sifive.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Stephen Bates <sbates@raithlin.com>, Zong Li <zong@andestech.com>,
	Olof Johansson <olof@lixom.net>,
	linux-riscv@lists.infradead.org,
	Michael Clark <michaeljclark@mac.com>,
	Christoph Hellwig <hch@lst.de>
Subject: Re: [PATCH v4 2/2] RISC-V: Implement sparsemem
Date: Wed, 31 Jul 2019 14:30:22 +0800	[thread overview]
Message-ID: <CAEbi=3d0RNVKbDUwRL-o70O12XBV7q6n_UT-pLqFoh9omYJZKQ@mail.gmail.com> (raw)
In-Reply-To: <20190109203911.7887-3-logang@deltatee.com>

Hi Logan,

Logan Gunthorpe <logang@deltatee.com> 於 2019年1月10日 週四 上午5:07寫道:
>
> This patch implements sparsemem support for risc-v which helps pave the
> way for memory hotplug and eventually P2P support.
>
> We introduce Kconfig options for virtual and physical address bits which
> are used to calculate the size of the vmemmap and set the
> MAX_PHYSMEM_BITS.
>
> The vmemmap is located directly before the VMALLOC region and sized
> such that we can allocate enough pages to populate all the virtual
> address space in the system (similar to the way it's done in arm64).
>
> During initialization, call memblocks_present() and sparse_init(),
> and provide a stub for vmemmap_populate() (all of which is similar to
> arm64).
>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Reviewed-by: Palmer Dabbelt <palmer@sifive.com>
> Cc: Albert Ou <aou@eecs.berkeley.edu>
> Cc: Andrew Waterman <andrew@sifive.com>
> Cc: Olof Johansson <olof@lixom.net>
> Cc: Michael Clark <michaeljclark@mac.com>
> Cc: Rob Herring <robh@kernel.org>
> Cc: Zong Li <zong@andestech.com>
> ---
>  arch/riscv/Kconfig                 | 23 +++++++++++++++++++++++
>  arch/riscv/include/asm/pgtable.h   | 21 +++++++++++++++++----
>  arch/riscv/include/asm/sparsemem.h | 11 +++++++++++
>  arch/riscv/kernel/setup.c          |  4 +++-
>  arch/riscv/mm/init.c               |  8 ++++++++
>  5 files changed, 62 insertions(+), 5 deletions(-)
>  create mode 100644 arch/riscv/include/asm/sparsemem.h
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e0d7d61779a6..bd659327bc6b 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -54,12 +54,32 @@ config ZONE_DMA32
>         bool
>         default y if 64BIT
>
> +config VA_BITS
> +       int
> +       default 32 if 32BIT
> +       default 39 if 64BIT
> +
> +config PA_BITS
> +       int
> +       default 34 if 32BIT
> +       default 56 if 64BIT
> +
>  config PAGE_OFFSET
>         hex
>         default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
>         default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
>         default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
>
> +config ARCH_FLATMEM_ENABLE
> +       def_bool y
> +
> +config ARCH_SPARSEMEM_ENABLE
> +       def_bool y
> +       select SPARSEMEM_VMEMMAP_ENABLE
> +
> +config ARCH_SELECT_MEMORY_MODEL
> +       def_bool ARCH_SPARSEMEM_ENABLE
> +
>  config STACKTRACE_SUPPORT
>         def_bool y
>
> @@ -94,6 +114,9 @@ config PGTABLE_LEVELS
>  config HAVE_KPROBES
>         def_bool n
>
> +config HAVE_ARCH_PFN_VALID
> +       def_bool y
> +
>  menu "Platform type"
>
>  choice
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 16301966d65b..e1162336f5ea 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -89,6 +89,23 @@ extern pgd_t swapper_pg_dir[];
>  #define __S110 PAGE_SHARED_EXEC
>  #define __S111 PAGE_SHARED_EXEC
>
> +#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
> +#define VMALLOC_END      (PAGE_OFFSET - 1)
> +#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
> +
> +/*
> + * Roughly size the vmemmap space to be large enough to fit enough
> + * struct pages to map half the virtual address space. Then
> + * position vmemmap directly below the VMALLOC region.
> + */
> +#define VMEMMAP_SHIFT \
> +       (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
> +#define VMEMMAP_SIZE   (1UL << VMEMMAP_SHIFT)
> +#define VMEMMAP_END    (VMALLOC_START - 1)
> +#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
> +
> +#define vmemmap                ((struct page *)VMEMMAP_START)
> +
>  /*
>   * ZERO_PAGE is a global shared page that is always zero,
>   * used for zero-mapped memory areas, etc.
> @@ -411,10 +428,6 @@ static inline void pgtable_cache_init(void)
>         /* No page table caches to initialize */
>  }
>
> -#define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
> -#define VMALLOC_END      (PAGE_OFFSET - 1)
> -#define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
> -
>  /*
>   * Task size is 0x40000000000 for RV64 or 0xb800000 for RV32.
>   * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
> diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
> new file mode 100644
> index 000000000000..b58ba2d9ed6e
> --- /dev/null
> +++ b/arch/riscv/include/asm/sparsemem.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_SPARSEMEM_H
> +#define __ASM_SPARSEMEM_H
> +
> +#ifdef CONFIG_SPARSEMEM
> +#define MAX_PHYSMEM_BITS       CONFIG_PA_BITS
> +#define SECTION_SIZE_BITS      27
> +#endif /* CONFIG_SPARSEMEM */
> +
> +#endif /* __ASM_SPARSEMEM_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index fc8006a042eb..98f39adefb1a 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -193,6 +193,9 @@ static void __init setup_bootmem(void)
>                                   PFN_PHYS(end_pfn - start_pfn),
>                                   &memblock.memory, 0);
>         }
> +
> +       memblocks_present();
> +       sparse_init();
>  }

I just applied this patch to Linux kernel 5.2.
I used a dts with 2 memory nodes with hole int it.

memory@80000000 {
    device_type = "memory";
    reg = <0x0 0x80000000 0x0 0x40000000>;
};
memory@180000000 {
    device_type = "memory";
    reg = <0x1 0x80000000 0x0 0x40000000>;
};

I found it will boot failure. Did I miss anything?

[ 0.000000] Sorting __ex_table...
[ 0.000000] BUG: Bad page state in process swapper pfn:180001
[ 0.000000] page:ffffffcf05400038 refcount:0 mapcount:94371937
mapping:00000000ffffffff index:0x4000000000000000
[ 0.000000] anon
[ 0.000000] flags: 0x0()
[ 0.000000] raw: 0000000000000000 0000000000000000 0000000000000000
00000000ffffffff
[ 0.000000] raw: 4000000000000000 ffffffcf05a00060 0000000005a00060
[ 0.000000] page dumped because: non-NULL mapping
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-00001-g737d8214d9a9 #3
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffe00017759c>] walk_stackframe+0x0/0xa0
[ 0.000000] [<ffffffe00017769c>] show_stack+0x2a/0x34
[ 0.000000] [<ffffffe00070c53e>] dump_stack+0x62/0x7c
[ 0.000000] [<ffffffe0002330ae>] bad_page+0xca/0x120
[ 0.000000] [<ffffffe00023313c>] free_pages_check_bad+0x38/0x7a
[ 0.000000] [<ffffffe00023368a>] __free_pages_ok+0x496/0x4ba
[ 0.000000] [<ffffffe000234a82>] __free_pages.part.4+0xe/0x22
[ 0.000000] [<ffffffe000234c9e>] __free_pages_core+0x9a/0xa6
[ 0.000000] [<ffffffe000009b0a>] memblock_free_pages+0x12/0x1a
[ 0.000000] [<ffffffe00000b496>] memblock_free_all+0x144/0x1a8
[ 0.000000] [<ffffffe00000274a>] mem_init+0x28/0x36
[ 0.000000] [<ffffffe0000008a0>] start_kernel+0x1bc/0x360
[ 0.000000] [<ffffffe000000074>] clear_bss_done+0x34/0x38
[ 0.000000] Disabling lock debugging due to kernel taint
[ 0.000000] BUG: Bad page state in process swapper pfn:180002
[ 0.000000] page:ffffffcf05400070 refcount:0 mapcount:94371993
mapping:00000000ffffffff index:0x4000000000000000
[ 0.000000] anon
[ 0.000000] flags: 0x0()
[ 0.000000] raw: 0000000000000000 0000000000000000 0000000000000000
00000000ffffffff
[ 0.000000] raw: 4000000000000000 ffffffcf05a00098 0000000005a00098
[ 0.000000] page dumped because: non-NULL mapping
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G B
5.2.0-00001-g737d8214d9a9 #3
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffe00017759c>] walk_stackframe+0x0/0xa0
[ 0.000000] [<ffffffe00017769c>] show_stack+0x2a/0x34
[ 0.000000] [<ffffffe00070c53e>] dump_stack+0x62/0x7c
[ 0.000000] [<ffffffe0002330ae>] bad_page+0xca/0x120
[ 0.000000] [<ffffffe00023313c>] free_pages_check_bad+0x38/0x7a
[ 0.000000] [<ffffffe00023368a>] __free_pages_ok+0x496/0x4ba
[ 0.000000] [<ffffffe000234a82>] __free_pages.part.4+0xe/0x22
[ 0.000000] [<ffffffe000234c9e>] __free_pages_core+0x9a/0xa6
[ 0.000000] [<ffffffe000009b0a>] memblock_free_pages+0x12/0x1a
[ 0.000000] [<ffffffe00000b496>] memblock_free_all+0x144/0x1a8
[ 0.000000] [<ffffffe00000274a>] mem_init+0x28/0x36
[ 0.000000] [<ffffffe0000008a0>] start_kernel+0x1bc/0x360
[ 0.000000] [<ffffffe000000074>] clear_bss_done+0x34/0x38
[ 0.000000] BUG: Bad page state in process swapper pfn:180003
[ 0.000000] page:ffffffcf054000a8 refcount:0 mapcount:94372049
mapping:00000000ffffffff index:0x4000000000000000
[ 0.000000] anon
[ 0.000000] flags: 0x0()
[ 0.000000] raw: 0000000000000000 0000000000000000 0000000000000000
00000000ffffffff
[ 0.000000] raw: 4000000000000000 ffffffcf05a000d0 0000000005a000d0
[ 0.000000] page dumped because: non-NULL mapping

I look this issue more closely.
I found it always sets each memblock region to node 0. Does this make sense?
I am not sure if I understand this correctly. Do you have any idea for
this? Thank you. :)

for_each_memblock(memory, reg) {
    unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
    unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
    memblock_set_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn -
start_pfn), &memblock.memory, 0);


                     ^^^
}

[ 0.000000] Early memory node ranges
[ 0.000000] node 0: [mem 0x0000000080200000-0x00000000bfffffff]
[ 0.000000] node 0: [mem 0x0000000180000000-0x00000001bfffffff]
[ 0.000000] Initmem setup node 0 [mem 0x0000000080200000-0x00000001bfffffff]

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2019-07-31  6:31 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-09 20:39 [PATCH v4 0/2] [PATCH v2 0/6] sparsemem support for RISC-V Logan Gunthorpe
2019-01-09 20:39 ` [PATCH v4 1/2] sh: mm: make use of new memblocks_present() helper Logan Gunthorpe
2019-01-15 13:58   ` Christoph Hellwig
2019-01-15 17:30     ` Logan Gunthorpe
2019-01-09 20:39 ` [PATCH v4 2/2] RISC-V: Implement sparsemem Logan Gunthorpe
2019-01-15 13:58   ` Christoph Hellwig
2019-07-31  6:30   ` Greentime Hu [this message]
2019-07-31 17:07     ` Logan Gunthorpe
2019-08-01  3:34       ` Greentime Hu
     [not found]         ` <CAEbi=3eZcgWevpX9VO9ohgxVDFVprk_t52Xbs3-TdtZ+js3NVA@mail.gmail.com>
2019-08-09 15:46           ` Logan Gunthorpe
2019-08-09 17:01             ` Greentime Hu
2019-08-09 19:03               ` Logan Gunthorpe
2019-08-12  4:01                 ` Greentime Hu
2019-08-12 15:51                   ` Logan Gunthorpe
2019-08-13  6:04                     ` Greentime Hu
2019-08-13 16:14                       ` Logan Gunthorpe
2019-08-13 16:39                         ` Paul Walmsley
2019-08-13 16:48                           ` Paul Walmsley
2019-08-13 16:49                           ` Logan Gunthorpe
2019-08-14 13:35                             ` Greentime Hu
2019-08-14 16:56                               ` Logan Gunthorpe
2019-08-14 17:40                                 ` Paul Walmsley
2019-08-14 17:46                                   ` Logan Gunthorpe
2019-08-14 20:09                                     ` Paul Walmsley
2019-08-14 22:21                               ` Logan Gunthorpe
2019-08-15  9:31                                 ` Greentime Hu
2019-08-15 16:20                                   ` Logan Gunthorpe
2019-08-16  2:07                                     ` Greentime Hu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAEbi=3d0RNVKbDUwRL-o70O12XBV7q6n_UT-pLqFoh9omYJZKQ@mail.gmail.com' \
    --to=green.hu@gmail.com \
    --cc=andrew@sifive.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=greentime.hu@sifive.com \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=logang@deltatee.com \
    --cc=michaeljclark@mac.com \
    --cc=olof@lixom.net \
    --cc=palmer@sifive.com \
    --cc=paul.walmsley@sifive.com \
    --cc=robh@kernel.org \
    --cc=sbates@raithlin.com \
    --cc=zong@andestech.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).