Linux-arch Archive on lore.kernel.org
 help / color / Atom feed
WARNING: multiple messages have this Message-ID
From: Mike Rapoport <rppt@kernel.org>
To: Baoquan He <bhe@redhat.com>
Cc: Rich Felker <dalias@libc.org>,
	linux-ia64@vger.kernel.org, linux-doc@vger.kernel.org,
	Catalin Marinas <catalin.marinas@arm.com>,
	Heiko Carstens <heiko.carstens@de.ibm.com>,
	Michal Hocko <mhocko@kernel.org>,
	"James E.J. Bottomley" <James.Bottomley@hansenpartnership.com>,
	Max Filippov <jcmvbkbc@gmail.com>, Guo Ren <guoren@kernel.org>,
	linux-csky@vger.kernel.org, linux-parisc@vger.kernel.org,
	sparclinux@vger.kernel.org, linux-hexagon@vger.kernel.org,
	linux-riscv@lists.infradead.org,
	Greg Ungerer <gerg@linux-m68k.org>,
	linux-arch@vger.kernel.org, linux-s390@vger.kernel.org,
	linux-snps-arc@lists.infradead.org, linux-c6x-dev@linux-c6x.org,
	Brian Cain <bcain@codeaurora.org>,
	Jonathan Corbet <corbet@lwn.net>,
	linux-sh@vger.kernel.org, Michael Ellerman <mpe@ellerman.id.au>,
	Helge Deller <deller@gmx.de>,
	x86@kernel.org, Russell King <linux@armlinux.org.uk>,
	Ley Foon Tan <ley.foon.>
Subject: Re: [PATCH 03/21] mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option
Date: Tue, 21 Apr 2020 12:09:08 +0300
Message-ID: <20200421090908.GC14260@kernel.org> (raw)
In-Reply-To: <20200421042316.GQ4247@MiWiFi-R3L-srv>

On Tue, Apr 21, 2020 at 12:23:16PM +0800, Baoquan He wrote:
> On 04/12/20 at 10:48pm, Mike Rapoport wrote:
> > From: Mike Rapoport <rppt@linux.ibm.com>
> > 
> > The CONFIG_HAVE_MEMBLOCK_NODE_MAP is used to differentiate initialization
> > of nodes and zones structures between the systems that have region to node
> > mapping in memblock and those that don't.
> > 
> > Currently all the NUMA architectures enable this option and for the
> > non-NUMA systems we can presume that all the memory belongs to node 0 and
> > therefore the compile time configuration option is not required.
> > 
> > The remaining few architectures that use DISCONTIGMEM without NUMA are
> > easily updated to use memblock_add_node() instead of memblock_add() and
> > thus have proper correspondence of memblock regions to NUMA nodes.
> > 
> > Still, free_area_init_node() must have a backward compatible version
> > because its semantics with and without CONFIG_HAVE_MEMBLOCK_NODE_MAP is
> > different. Once all the architectures will use the new semantics, the
> > entire compatibility layer can be dropped.
> > 
> > To avoid addition of extra run time memory to store node id for
> > architectures that keep memblock but have only a single node, the node id
> > field of the memblock_region is guarded by CONFIG_NEED_MULTIPLE_NODES and
> > the corresponding accessors presume that in those cases it is always 0.
> > 
> > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> > ---
> ...
> > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > index 6bc37a731d27..45abfc54da37 100644
> > --- a/include/linux/memblock.h
> > +++ b/include/linux/memblock.h
> > @@ -50,7 +50,7 @@ struct memblock_region {
> >  	phys_addr_t base;
> >  	phys_addr_t size;
> >  	enum memblock_flags flags;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	int nid;
> >  #endif
> >  };
> > @@ -215,7 +215,6 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
> >  	return m->flags & MEMBLOCK_NOMAP;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
> >  			    unsigned long  *end_pfn);
> >  void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> > @@ -234,7 +233,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> >  #define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid)		\
> >  	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
> >  	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  
> >  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> >  void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> > @@ -310,10 +308,10 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> >  	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
> >  			       nid, flags, p_start, p_end, p_nid)
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  		      struct memblock_type *type, int nid);
> >  
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  static inline void memblock_set_region_node(struct memblock_region *r, int nid)
> >  {
> >  	r->nid = nid;
> > @@ -332,7 +330,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
> >  {
> >  	return 0;
> >  }
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > +#endif /* CONFIG_NEED_MULTIPLE_NODES */
> >  
> >  /* Flags for memblock allocation APIs */
> >  #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index a404026d14d4..5903bbbdb336 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -2344,9 +2344,8 @@ static inline unsigned long get_num_physpages(void)
> >  	return phys_pages;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  /*
> > - * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
> > + * Using memblock node mappings, an architecture may initialise its
> >   * zones, allocate the backing mem_map and account for memory holes in a more
> >   * architecture independent manner. This is a substitute for creating the
> >   * zone_sizes[] and zholes_size[] arrays and passing them to
> > @@ -2367,9 +2366,6 @@ static inline unsigned long get_num_physpages(void)
> >   * registered physical page range.  Similarly
> >   * sparse_memory_present_with_active_regions() calls memory_present() for
> >   * each range when SPARSEMEM is enabled.
> > - *
> > - * See mm/page_alloc.c for more information on each function exposed by
> > - * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
> >   */
> >  extern void free_area_init_nodes(unsigned long *max_zone_pfn);
> >  unsigned long node_map_pfn_alignment(void);
> > @@ -2384,13 +2380,9 @@ extern void free_bootmem_with_active_regions(int nid,
> >  						unsigned long max_low_pfn);
> >  extern void sparse_memory_present_with_active_regions(int nid);
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> > -#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
> > -    !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
> > +#ifndef CONFIG_NEED_MULTIPLE_NODES
> >  static inline int early_pfn_to_nid(unsigned long pfn)
> >  {
> > -	BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
> >  	return 0;
> >  }
> >  #else
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 7b5b6eba402f..ffc2a3d6036b 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -874,7 +874,7 @@ extern int movable_zone;
> >  #ifdef CONFIG_HIGHMEM
> >  static inline int zone_movable_is_highmem(void)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	return movable_zone == ZONE_HIGHMEM;
> >  #else
> >  	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
> 
> If CONFIG_HIGHMEM is enabled, the above judgement is always true,
> wondering what's the purpose we have to do like this. It's not related
> to this patch though.

The whole is_higmem() implementation looks scary and begs for cleanup :)

> > diff --git a/mm/Kconfig b/mm/Kconfig index
> > c1acc34c1c35..aaa5bdaa1c8a 100644 --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -126,9 +126,6 @@ config SPARSEMEM_VMEMMAP
> >  	  pfn_to_page and page_to_pfn operations.  This is the most
> >  	  efficient option when sufficient kernel resources are available.
> >  
> > -config HAVE_MEMBLOCK_NODE_MAP
> > -	bool
> > -
> >  config HAVE_MEMBLOCK_PHYS_MAP
> >  	bool
> >  
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index 43e2fd3006c1..743659d88fc4 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -620,7 +620,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
> >  		 * area, insert that portion.
> >  		 */
> >  		if (rbase > base) {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  			WARN_ON(nid != memblock_get_region_node(rgn));
> >  #endif
> >  			WARN_ON(flags != rgn->flags);
> > @@ -1197,7 +1197,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
> >  	*idx = ULLONG_MAX;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  /*
> >   * Common iterator interface used to define for_each_mem_pfn_range().
> >   */
> > @@ -1247,6 +1246,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
> >  int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  				      struct memblock_type *type, int nid)
> >  {
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	int start_rgn, end_rgn;
> >  	int i, ret;
> >  
> > @@ -1258,9 +1258,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  		memblock_set_region_node(&type->regions[i], nid);
> >  
> >  	memblock_merge_regions(type);
> > +#endif
> >  	return 0;
> >  }
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > +
> >  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> >  /**
> >   * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
> > @@ -1799,7 +1800,6 @@ bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
> >  	return !memblock_is_nomap(&memblock.memory.regions[i]);
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
> >  			 unsigned long *start_pfn, unsigned long *end_pfn)
> >  {
> > @@ -1814,7 +1814,6 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
> >  
> >  	return memblock_get_region_node(&type->regions[mid]);
> >  }
> > -#endif
> >  
> >  /**
> >   * memblock_is_region_memory - check if a region is a subset of memory
> > @@ -1905,7 +1904,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
> >  		size = rgn->size;
> >  		end = base + size - 1;
> >  		flags = rgn->flags;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  		if (memblock_get_region_node(rgn) != MAX_NUMNODES)
> >  			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
> >  				 memblock_get_region_node(rgn));
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index fc0aad0bc1f5..e67dc501576a 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -1372,11 +1372,7 @@ check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
> >  
> >  static int __init cmdline_parse_movable_node(char *p)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  	movable_node_enabled = true;
> > -#else
> > -	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n");
> > -#endif
> 
> Wondering if this change will impact anything. Before, those ARCHes with
> CONFIG_HAVE_MEMBLOCK_NODE_MAP support movable_node. With this patch
> applied, those ARCHes which don't support CONFIG_HAVE_MEMBLOCK_NODE_MAP
> can also have 'movable_node' specified in kernel cmdline.
> 
> >  	return 0;
> >  }
> >  early_param("movable_node", cmdline_parse_movable_node);
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 1ac775bfc9cf..4530e9cfd9f7 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -335,7 +335,6 @@ static unsigned long nr_kernel_pages __initdata;
> >  static unsigned long nr_all_pages __initdata;
> >  static unsigned long dma_reserve __initdata;
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
> >  static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
> >  static unsigned long required_kernelcore __initdata;
> 
> Does it mean those ARCHes which don't support
> CONFIG_HAVE_MEMBLOCK_NODE_MAP before, will have 'kernelcore=' and
> 'movablecore=' now, and will have MOVABLE zone?

I hesitated a lot about whether to hide the kernelcore/movablecore and
related code behind an #ifdef.
In the end I've decided to keep the code compiled unconditionally as it
is anyway __init and no sane person would pass "kernelcore=" to the
kernel on a UMA system.

> > @@ -348,7 +347,6 @@ static bool mirrored_kernelcore __meminitdata;
> >  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
> >  int movable_zone;
> >  EXPORT_SYMBOL(movable_zone);
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  
> >  #if MAX_NUMNODES > 1
> >  unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
> > @@ -1499,8 +1497,7 @@ void __free_pages_core(struct page *page, unsigned int order)
> >  	__free_pages(page, order);
> >  }
> >  
> > -#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
> > -	defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  
> >  static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
> >  
> > @@ -1542,7 +1539,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
> >  
> >  	return nid;
> >  }
> > -#endif
> > +#endif /* CONFIG_NEED_MULTIPLE_NODES */
> >  
> >  #ifdef CONFIG_NODES_SPAN_OTHER_NODES
> >  /* Only safe to use early in boot when initialisation is single-threaded */
> > @@ -5924,7 +5921,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
> >  static bool __meminit
> >  overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  	static struct memblock_region *r;
> >  
> >  	if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
> > @@ -5940,7 +5936,6 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> >  			return true;
> >  		}
> >  	}
> > -#endif
> >  	return false;
> >  }
> >  
> > @@ -6573,8 +6568,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
> >  	return nr_absent;
> >  }
> >  
> > -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -static inline unsigned long __init zone_spanned_pages_in_node(int nid,
> > +static inline unsigned long __init compat_zone_spanned_pages_in_node(int nid,
> 
> Is it compact zone which has continuous memory region, and the
> compat here is typo? Or it's compatible zone? The name seems a little
> confusing, or I miss something.

It's 'compat' from 'compatibility'. This is kinda "the old way" and the
version that was defined when CONFIG_HAVE_MEMBLOCK_NODE_MAP=y is the
"new way", so I picked 'compat' for backwards compatibility. 
Anyway, it will go away later in pacth 19. 

> >  					unsigned long zone_type,
> >  					unsigned long node_start_pfn,
> >  					unsigned long node_end_pfn,
> > @@ -6593,7 +6587,7 @@ static inline unsigned long __init zone_spanned_pages_in_node(int nid,
> >  	return zones_size[zone_type];
> >  }
> >  
> > -static inline unsigned long __init zone_absent_pages_in_node(int nid,
> > +static inline unsigned long __init compat_zone_absent_pages_in_node(int nid,
> >  						unsigned long zone_type,
> >  						unsigned long node_start_pfn,
> >  						unsigned long node_end_pfn,
> > @@ -6605,13 +6599,12 @@ static inline unsigned long __init zone_absent_pages_in_node(int nid,
> >  	return zholes_size[zone_type];
> >  }
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> >  static void __init calculate_node_totalpages(struct pglist_data *pgdat,
> >  						unsigned long node_start_pfn,
> >  						unsigned long node_end_pfn,
> >  						unsigned long *zones_size,
> > -						unsigned long *zholes_size)
> > +						unsigned long *zholes_size,
> > +						bool compat)
> >  {
> >  	unsigned long realtotalpages = 0, totalpages = 0;
> >  	enum zone_type i;
> > @@ -6619,17 +6612,38 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
> >  	for (i = 0; i < MAX_NR_ZONES; i++) {
> >  		struct zone *zone = pgdat->node_zones + i;
> >  		unsigned long zone_start_pfn, zone_end_pfn;
> > +		unsigned long spanned, absent;
> >  		unsigned long size, real_size;
> >  
> > -		size = zone_spanned_pages_in_node(pgdat->node_id, i,
> > -						  node_start_pfn,
> > -						  node_end_pfn,
> > -						  &zone_start_pfn,
> > -						  &zone_end_pfn,
> > -						  zones_size);
> > -		real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
> > -						  node_start_pfn, node_end_pfn,
> > -						  zholes_size);
> > +		if (compat) {
> > +			spanned = compat_zone_spanned_pages_in_node(
> > +						pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						&zone_start_pfn,
> > +						&zone_end_pfn,
> > +						zones_size);
> > +			absent = compat_zone_absent_pages_in_node(
> > +						pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						zholes_size);
> > +		} else {
> > +			spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						&zone_start_pfn,
> > +						&zone_end_pfn,
> > +						zones_size);
> > +			absent = zone_absent_pages_in_node(pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						zholes_size);
> > +		}
> > +
> > +		size = spanned;
> > +		real_size = size - absent;
> > +
> >  		if (size)
> >  			zone->zone_start_pfn = zone_start_pfn;
> >  		else
> > @@ -6929,10 +6943,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
> >  	 */
> >  	if (pgdat == NODE_DATA(0)) {
> >  		mem_map = NODE_DATA(0)->node_mem_map;
> > -#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
> >  		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
> >  			mem_map -= offset;
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  	}
> >  #endif
> >  }
> > @@ -6949,9 +6961,10 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
> >  static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> >  #endif
> >  
> > -void __init free_area_init_node(int nid, unsigned long *zones_size,
> > -				   unsigned long node_start_pfn,
> > -				   unsigned long *zholes_size)
> > +static void __init __free_area_init_node(int nid, unsigned long *zones_size,
> > +					 unsigned long node_start_pfn,
> > +					 unsigned long *zholes_size,
> > +					 bool compat)
> >  {
> >  	pg_data_t *pgdat = NODE_DATA(nid);
> >  	unsigned long start_pfn = 0;
> > @@ -6963,16 +6976,16 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
> >  	pgdat->node_id = nid;
> >  	pgdat->node_start_pfn = node_start_pfn;
> >  	pgdat->per_cpu_nodestats = NULL;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > -	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> > -	pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
> > -		(u64)start_pfn << PAGE_SHIFT,
> > -		end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
> > -#else
> > -	start_pfn = node_start_pfn;
> > -#endif
> > +	if (!compat) {
> > +		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> > +		pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
> > +			(u64)start_pfn << PAGE_SHIFT,
> > +			end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
> > +	} else {
> > +		start_pfn = node_start_pfn;
> > +	}
> >  	calculate_node_totalpages(pgdat, start_pfn, end_pfn,
> > -				  zones_size, zholes_size);
> > +				  zones_size, zholes_size, compat);
> >  
> >  	alloc_node_mem_map(pgdat);
> >  	pgdat_set_deferred_range(pgdat);
> > @@ -6980,6 +6993,14 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
> >  	free_area_init_core(pgdat);
> >  }
> >  
> > +void __init free_area_init_node(int nid, unsigned long *zones_size,
> > +				unsigned long node_start_pfn,
> > +				unsigned long *zholes_size)
> > +{
> > +	__free_area_init_node(nid, zones_size, node_start_pfn, zholes_size,
> > +			      true);
> > +}
> > +
> >  #if !defined(CONFIG_FLAT_NODE_MEM_MAP)
> >  /*
> >   * Initialize all valid struct pages in the range [spfn, epfn) and mark them
> > @@ -7063,8 +7084,6 @@ static inline void __init init_unavailable_mem(void)
> >  }
> >  #endif /* !CONFIG_FLAT_NODE_MEM_MAP */
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > -
> >  #if MAX_NUMNODES > 1
> >  /*
> >   * Figure out the number of possible node ids.
> > @@ -7493,8 +7512,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
> >  	init_unavailable_mem();
> >  	for_each_online_node(nid) {
> >  		pg_data_t *pgdat = NODE_DATA(nid);
> > -		free_area_init_node(nid, NULL,
> > -				find_min_pfn_for_node(nid), NULL);
> > +		__free_area_init_node(nid, NULL,
> > +				      find_min_pfn_for_node(nid), NULL, false);
> >  
> >  		/* Any memory on that node */
> >  		if (pgdat->node_present_pages)
> > @@ -7559,8 +7578,6 @@ static int __init cmdline_parse_movablecore(char *p)
> >  early_param("kernelcore", cmdline_parse_kernelcore);
> >  early_param("movablecore", cmdline_parse_movablecore);
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> >  void adjust_managed_page_count(struct page *page, long count)
> >  {
> >  	atomic_long_add(count, &page_zone(page)->managed_pages);
> > -- 
> > 2.25.1
> > 
> 

-- 
Sincerely yours,
Mike.

From: Mike Rapoport <rppt@kernel.org>
To: Baoquan He <bhe@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Brian Cain <bcain@codeaurora.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	"David S. Miller" <davem@davemloft.net>,
	Geert Uytterhoeven <geert@linux-m68k.org>,
	Greentime Hu <green.hu@gmail.com>,
	Greg Ungerer <gerg@linux-m68k.org>, Guan Xuetao <gxt@pku.edu.cn>,
	Guo Ren <guoren@kernel.org>,
	Heiko Carstens <heiko.carstens@de.ibm.com>,
	Helge Deller <deller@gmx.de>,
	Hoan Tran <Hoan@os.amperecomputing.com>,
	"James E.J. Bottomley" <James.Bottomley@hansenpartnership.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Ley Foon Tan <ley.foon.tan@intel.com>,
	Mark Salter <msalter@redhat.com>,
	Matt Turner <mattst88@gmail.com>,
	Max Filippov <jcmvbkbc@gmail.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Michal Hocko <mhocko@kernel.org>, Michal Simek <monstr@monstr.eu>,
	Nick Hu <nickhu@andestech.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Richard Weinberger <richard@nod.at>,
	Rich Felker <dalias@libc.org>,
	Russell King <linux@armlinux.org.uk>,
	Stafford Horne <shorne@gmail.com>,
	Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	Tony Luck <tony.luck@intel.com>,
	Vineet Gupta <vgupta@synopsys.com>,
	x86@kernel.org, Yoshinori Sato <ysato@users.sourceforge.jp>,
	linux-alpha@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-c6x-dev@linux-c6x.org, linux-csky@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-hexagon@vger.kernel.org,
	linux-ia64@vger.kernel.org, linux-m68k@lists.linux-m68k.org,
	linux-mips@vger.kernel.org, linux-mm@kvack.org,
	linux-parisc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org,
	linux-sh@vger.kernel.org, linux-snps-arc@lists.infradead.org,
	linux-um@lists.infradead.org, linux-xtensa@linux-xtensa.org,
	openrisc@lists.librecores.org, sparclinux@vger.kernel.org,
	uclinux-h8-devel@lists.sourceforge.jp,
	Mike Rapoport <rppt@linux.ibm.com>
Subject: Re: [PATCH 03/21] mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option
Date: Tue, 21 Apr 2020 12:09:08 +0300
Message-ID: <20200421090908.GC14260@kernel.org> (raw)
Message-ID: <20200421090908.fyv5IBr3-1ZT1Sg_LzO3kv9i-Y8-BKX5bj71GVFRJnk@z> (raw)
In-Reply-To: <20200421042316.GQ4247@MiWiFi-R3L-srv>

On Tue, Apr 21, 2020 at 12:23:16PM +0800, Baoquan He wrote:
> On 04/12/20 at 10:48pm, Mike Rapoport wrote:
> > From: Mike Rapoport <rppt@linux.ibm.com>
> > 
> > The CONFIG_HAVE_MEMBLOCK_NODE_MAP is used to differentiate initialization
> > of nodes and zones structures between the systems that have region to node
> > mapping in memblock and those that don't.
> > 
> > Currently all the NUMA architectures enable this option and for the
> > non-NUMA systems we can presume that all the memory belongs to node 0 and
> > therefore the compile time configuration option is not required.
> > 
> > The remaining few architectures that use DISCONTIGMEM without NUMA are
> > easily updated to use memblock_add_node() instead of memblock_add() and
> > thus have proper correspondence of memblock regions to NUMA nodes.
> > 
> > Still, free_area_init_node() must have a backward compatible version
> > because its semantics with and without CONFIG_HAVE_MEMBLOCK_NODE_MAP is
> > different. Once all the architectures will use the new semantics, the
> > entire compatibility layer can be dropped.
> > 
> > To avoid addition of extra run time memory to store node id for
> > architectures that keep memblock but have only a single node, the node id
> > field of the memblock_region is guarded by CONFIG_NEED_MULTIPLE_NODES and
> > the corresponding accessors presume that in those cases it is always 0.
> > 
> > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
> > ---
> ...
> > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > index 6bc37a731d27..45abfc54da37 100644
> > --- a/include/linux/memblock.h
> > +++ b/include/linux/memblock.h
> > @@ -50,7 +50,7 @@ struct memblock_region {
> >  	phys_addr_t base;
> >  	phys_addr_t size;
> >  	enum memblock_flags flags;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	int nid;
> >  #endif
> >  };
> > @@ -215,7 +215,6 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
> >  	return m->flags & MEMBLOCK_NOMAP;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
> >  			    unsigned long  *end_pfn);
> >  void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> > @@ -234,7 +233,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> >  #define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid)		\
> >  	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
> >  	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  
> >  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> >  void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> > @@ -310,10 +308,10 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> >  	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
> >  			       nid, flags, p_start, p_end, p_nid)
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  		      struct memblock_type *type, int nid);
> >  
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  static inline void memblock_set_region_node(struct memblock_region *r, int nid)
> >  {
> >  	r->nid = nid;
> > @@ -332,7 +330,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
> >  {
> >  	return 0;
> >  }
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > +#endif /* CONFIG_NEED_MULTIPLE_NODES */
> >  
> >  /* Flags for memblock allocation APIs */
> >  #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index a404026d14d4..5903bbbdb336 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -2344,9 +2344,8 @@ static inline unsigned long get_num_physpages(void)
> >  	return phys_pages;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  /*
> > - * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
> > + * Using memblock node mappings, an architecture may initialise its
> >   * zones, allocate the backing mem_map and account for memory holes in a more
> >   * architecture independent manner. This is a substitute for creating the
> >   * zone_sizes[] and zholes_size[] arrays and passing them to
> > @@ -2367,9 +2366,6 @@ static inline unsigned long get_num_physpages(void)
> >   * registered physical page range.  Similarly
> >   * sparse_memory_present_with_active_regions() calls memory_present() for
> >   * each range when SPARSEMEM is enabled.
> > - *
> > - * See mm/page_alloc.c for more information on each function exposed by
> > - * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
> >   */
> >  extern void free_area_init_nodes(unsigned long *max_zone_pfn);
> >  unsigned long node_map_pfn_alignment(void);
> > @@ -2384,13 +2380,9 @@ extern void free_bootmem_with_active_regions(int nid,
> >  						unsigned long max_low_pfn);
> >  extern void sparse_memory_present_with_active_regions(int nid);
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> > -#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
> > -    !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
> > +#ifndef CONFIG_NEED_MULTIPLE_NODES
> >  static inline int early_pfn_to_nid(unsigned long pfn)
> >  {
> > -	BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
> >  	return 0;
> >  }
> >  #else
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 7b5b6eba402f..ffc2a3d6036b 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -874,7 +874,7 @@ extern int movable_zone;
> >  #ifdef CONFIG_HIGHMEM
> >  static inline int zone_movable_is_highmem(void)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	return movable_zone == ZONE_HIGHMEM;
> >  #else
> >  	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
> 
> If CONFIG_HIGHMEM is enabled, the above judgement is always true,
> wondering what's the purpose we have to do like this. It's not related
> to this patch though.

The whole is_higmem() implementation looks scary and begs for cleanup :)

> > diff --git a/mm/Kconfig b/mm/Kconfig index
> > c1acc34c1c35..aaa5bdaa1c8a 100644 --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -126,9 +126,6 @@ config SPARSEMEM_VMEMMAP
> >  	  pfn_to_page and page_to_pfn operations.  This is the most
> >  	  efficient option when sufficient kernel resources are available.
> >  
> > -config HAVE_MEMBLOCK_NODE_MAP
> > -	bool
> > -
> >  config HAVE_MEMBLOCK_PHYS_MAP
> >  	bool
> >  
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index 43e2fd3006c1..743659d88fc4 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -620,7 +620,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
> >  		 * area, insert that portion.
> >  		 */
> >  		if (rbase > base) {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  			WARN_ON(nid != memblock_get_region_node(rgn));
> >  #endif
> >  			WARN_ON(flags != rgn->flags);
> > @@ -1197,7 +1197,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
> >  	*idx = ULLONG_MAX;
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  /*
> >   * Common iterator interface used to define for_each_mem_pfn_range().
> >   */
> > @@ -1247,6 +1246,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
> >  int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  				      struct memblock_type *type, int nid)
> >  {
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  	int start_rgn, end_rgn;
> >  	int i, ret;
> >  
> > @@ -1258,9 +1258,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> >  		memblock_set_region_node(&type->regions[i], nid);
> >  
> >  	memblock_merge_regions(type);
> > +#endif
> >  	return 0;
> >  }
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > +
> >  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> >  /**
> >   * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
> > @@ -1799,7 +1800,6 @@ bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
> >  	return !memblock_is_nomap(&memblock.memory.regions[i]);
> >  }
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
> >  			 unsigned long *start_pfn, unsigned long *end_pfn)
> >  {
> > @@ -1814,7 +1814,6 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
> >  
> >  	return memblock_get_region_node(&type->regions[mid]);
> >  }
> > -#endif
> >  
> >  /**
> >   * memblock_is_region_memory - check if a region is a subset of memory
> > @@ -1905,7 +1904,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
> >  		size = rgn->size;
> >  		end = base + size - 1;
> >  		flags = rgn->flags;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  		if (memblock_get_region_node(rgn) != MAX_NUMNODES)
> >  			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
> >  				 memblock_get_region_node(rgn));
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index fc0aad0bc1f5..e67dc501576a 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -1372,11 +1372,7 @@ check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
> >  
> >  static int __init cmdline_parse_movable_node(char *p)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  	movable_node_enabled = true;
> > -#else
> > -	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n");
> > -#endif
> 
> Wondering if this change will impact anything. Before, those ARCHes with
> CONFIG_HAVE_MEMBLOCK_NODE_MAP support movable_node. With this patch
> applied, those ARCHes which don't support CONFIG_HAVE_MEMBLOCK_NODE_MAP
> can also have 'movable_node' specified in kernel cmdline.
> 
> >  	return 0;
> >  }
> >  early_param("movable_node", cmdline_parse_movable_node);
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 1ac775bfc9cf..4530e9cfd9f7 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -335,7 +335,6 @@ static unsigned long nr_kernel_pages __initdata;
> >  static unsigned long nr_all_pages __initdata;
> >  static unsigned long dma_reserve __initdata;
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
> >  static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
> >  static unsigned long required_kernelcore __initdata;
> 
> Does it mean those ARCHes which don't support
> CONFIG_HAVE_MEMBLOCK_NODE_MAP before, will have 'kernelcore=' and
> 'movablecore=' now, and will have MOVABLE zone?

I hesitated a lot about whether to hide the kernelcore/movablecore and
related code behind an #ifdef.
In the end I've decided to keep the code compiled unconditionally as it
is anyway __init and no sane person would pass "kernelcore=" to the
kernel on a UMA system.

> > @@ -348,7 +347,6 @@ static bool mirrored_kernelcore __meminitdata;
> >  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
> >  int movable_zone;
> >  EXPORT_SYMBOL(movable_zone);
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  
> >  #if MAX_NUMNODES > 1
> >  unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
> > @@ -1499,8 +1497,7 @@ void __free_pages_core(struct page *page, unsigned int order)
> >  	__free_pages(page, order);
> >  }
> >  
> > -#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
> > -	defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> > +#ifdef CONFIG_NEED_MULTIPLE_NODES
> >  
> >  static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
> >  
> > @@ -1542,7 +1539,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
> >  
> >  	return nid;
> >  }
> > -#endif
> > +#endif /* CONFIG_NEED_MULTIPLE_NODES */
> >  
> >  #ifdef CONFIG_NODES_SPAN_OTHER_NODES
> >  /* Only safe to use early in boot when initialisation is single-threaded */
> > @@ -5924,7 +5921,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
> >  static bool __meminit
> >  overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> >  {
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> >  	static struct memblock_region *r;
> >  
> >  	if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
> > @@ -5940,7 +5936,6 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> >  			return true;
> >  		}
> >  	}
> > -#endif
> >  	return false;
> >  }
> >  
> > @@ -6573,8 +6568,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
> >  	return nr_absent;
> >  }
> >  
> > -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -static inline unsigned long __init zone_spanned_pages_in_node(int nid,
> > +static inline unsigned long __init compat_zone_spanned_pages_in_node(int nid,
> 
> Is it compact zone which has continuous memory region, and the
> compat here is typo? Or it's compatible zone? The name seems a little
> confusing, or I miss something.

It's 'compat' from 'compatibility'. This is kinda "the old way" and the
version that was defined when CONFIG_HAVE_MEMBLOCK_NODE_MAP=y is the
"new way", so I picked 'compat' for backwards compatibility. 
Anyway, it will go away later in pacth 19. 

> >  					unsigned long zone_type,
> >  					unsigned long node_start_pfn,
> >  					unsigned long node_end_pfn,
> > @@ -6593,7 +6587,7 @@ static inline unsigned long __init zone_spanned_pages_in_node(int nid,
> >  	return zones_size[zone_type];
> >  }
> >  
> > -static inline unsigned long __init zone_absent_pages_in_node(int nid,
> > +static inline unsigned long __init compat_zone_absent_pages_in_node(int nid,
> >  						unsigned long zone_type,
> >  						unsigned long node_start_pfn,
> >  						unsigned long node_end_pfn,
> > @@ -6605,13 +6599,12 @@ static inline unsigned long __init zone_absent_pages_in_node(int nid,
> >  	return zholes_size[zone_type];
> >  }
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> >  static void __init calculate_node_totalpages(struct pglist_data *pgdat,
> >  						unsigned long node_start_pfn,
> >  						unsigned long node_end_pfn,
> >  						unsigned long *zones_size,
> > -						unsigned long *zholes_size)
> > +						unsigned long *zholes_size,
> > +						bool compat)
> >  {
> >  	unsigned long realtotalpages = 0, totalpages = 0;
> >  	enum zone_type i;
> > @@ -6619,17 +6612,38 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
> >  	for (i = 0; i < MAX_NR_ZONES; i++) {
> >  		struct zone *zone = pgdat->node_zones + i;
> >  		unsigned long zone_start_pfn, zone_end_pfn;
> > +		unsigned long spanned, absent;
> >  		unsigned long size, real_size;
> >  
> > -		size = zone_spanned_pages_in_node(pgdat->node_id, i,
> > -						  node_start_pfn,
> > -						  node_end_pfn,
> > -						  &zone_start_pfn,
> > -						  &zone_end_pfn,
> > -						  zones_size);
> > -		real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
> > -						  node_start_pfn, node_end_pfn,
> > -						  zholes_size);
> > +		if (compat) {
> > +			spanned = compat_zone_spanned_pages_in_node(
> > +						pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						&zone_start_pfn,
> > +						&zone_end_pfn,
> > +						zones_size);
> > +			absent = compat_zone_absent_pages_in_node(
> > +						pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						zholes_size);
> > +		} else {
> > +			spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						&zone_start_pfn,
> > +						&zone_end_pfn,
> > +						zones_size);
> > +			absent = zone_absent_pages_in_node(pgdat->node_id, i,
> > +						node_start_pfn,
> > +						node_end_pfn,
> > +						zholes_size);
> > +		}
> > +
> > +		size = spanned;
> > +		real_size = size - absent;
> > +
> >  		if (size)
> >  			zone->zone_start_pfn = zone_start_pfn;
> >  		else
> > @@ -6929,10 +6943,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
> >  	 */
> >  	if (pgdat == NODE_DATA(0)) {
> >  		mem_map = NODE_DATA(0)->node_mem_map;
> > -#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
> >  		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
> >  			mem_map -= offset;
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >  	}
> >  #endif
> >  }
> > @@ -6949,9 +6961,10 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
> >  static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
> >  #endif
> >  
> > -void __init free_area_init_node(int nid, unsigned long *zones_size,
> > -				   unsigned long node_start_pfn,
> > -				   unsigned long *zholes_size)
> > +static void __init __free_area_init_node(int nid, unsigned long *zones_size,
> > +					 unsigned long node_start_pfn,
> > +					 unsigned long *zholes_size,
> > +					 bool compat)
> >  {
> >  	pg_data_t *pgdat = NODE_DATA(nid);
> >  	unsigned long start_pfn = 0;
> > @@ -6963,16 +6976,16 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
> >  	pgdat->node_id = nid;
> >  	pgdat->node_start_pfn = node_start_pfn;
> >  	pgdat->per_cpu_nodestats = NULL;
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > -	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> > -	pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
> > -		(u64)start_pfn << PAGE_SHIFT,
> > -		end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
> > -#else
> > -	start_pfn = node_start_pfn;
> > -#endif
> > +	if (!compat) {
> > +		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> > +		pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
> > +			(u64)start_pfn << PAGE_SHIFT,
> > +			end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
> > +	} else {
> > +		start_pfn = node_start_pfn;
> > +	}
> >  	calculate_node_totalpages(pgdat, start_pfn, end_pfn,
> > -				  zones_size, zholes_size);
> > +				  zones_size, zholes_size, compat);
> >  
> >  	alloc_node_mem_map(pgdat);
> >  	pgdat_set_deferred_range(pgdat);
> > @@ -6980,6 +6993,14 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
> >  	free_area_init_core(pgdat);
> >  }
> >  
> > +void __init free_area_init_node(int nid, unsigned long *zones_size,
> > +				unsigned long node_start_pfn,
> > +				unsigned long *zholes_size)
> > +{
> > +	__free_area_init_node(nid, zones_size, node_start_pfn, zholes_size,
> > +			      true);
> > +}
> > +
> >  #if !defined(CONFIG_FLAT_NODE_MEM_MAP)
> >  /*
> >   * Initialize all valid struct pages in the range [spfn, epfn) and mark them
> > @@ -7063,8 +7084,6 @@ static inline void __init init_unavailable_mem(void)
> >  }
> >  #endif /* !CONFIG_FLAT_NODE_MEM_MAP */
> >  
> > -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> > -
> >  #if MAX_NUMNODES > 1
> >  /*
> >   * Figure out the number of possible node ids.
> > @@ -7493,8 +7512,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
> >  	init_unavailable_mem();
> >  	for_each_online_node(nid) {
> >  		pg_data_t *pgdat = NODE_DATA(nid);
> > -		free_area_init_node(nid, NULL,
> > -				find_min_pfn_for_node(nid), NULL);
> > +		__free_area_init_node(nid, NULL,
> > +				      find_min_pfn_for_node(nid), NULL, false);
> >  
> >  		/* Any memory on that node */
> >  		if (pgdat->node_present_pages)
> > @@ -7559,8 +7578,6 @@ static int __init cmdline_parse_movablecore(char *p)
> >  early_param("kernelcore", cmdline_parse_kernelcore);
> >  early_param("movablecore", cmdline_parse_movablecore);
> >  
> > -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> > -
> >  void adjust_managed_page_count(struct page *page, long count)
> >  {
> >  	atomic_long_add(count, &page_zone(page)->managed_pages);
> > -- 
> > 2.25.1
> > 
> 

-- 
Sincerely yours,
Mike.

  reply index

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-12 19:48 [PATCH 00/21] mm: rework free_area_init*() funcitons Mike Rapoport
2020-04-12 19:48 ` [PATCH 01/21] mm: memblock: replace dereferences of memblock_region.nid with API calls Mike Rapoport
2020-04-21  2:06   ` Baoquan He
2020-04-12 19:48 ` [PATCH 02/21] mm: make early_pfn_to_nid() and related defintions close to each other Mike Rapoport
2020-04-21  2:24   ` Baoquan He
2020-04-21  8:49     ` Mike Rapoport
2020-04-21  8:49       ` Mike Rapoport
2020-04-21  9:33       ` Baoquan He
2020-04-21  3:31   ` Baoquan He
2020-04-21  8:39     ` Mike Rapoport
2020-04-21  8:39       ` Mike Rapoport
2020-04-12 19:48 ` [PATCH 03/21] mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option Mike Rapoport
2020-04-21  4:23   ` Baoquan He
2020-04-21  9:09     ` Mike Rapoport [this message]
2020-04-21  9:09       ` Mike Rapoport
2020-04-21  9:45       ` Baoquan He
2020-04-12 19:48 ` [PATCH 04/21] mm: free_area_init: use maximal zone PFNs rather than zone sizes Mike Rapoport
2020-04-22 23:41   ` Baoquan He
2020-06-15  3:53   ` Greg Ungerer
2020-06-15  6:22     ` Mike Rapoport
2020-06-15  6:22       ` Mike Rapoport
2020-06-15  7:17       ` Greg Ungerer
2020-04-12 19:48 ` [PATCH 05/21] mm: use free_area_init() instead of free_area_init_nodes() Mike Rapoport
2020-04-23  0:02   ` Baoquan He
2020-04-12 19:48 ` [PATCH 06/21] alpha: simplify detection of memory zone boundaries Mike Rapoport
2020-04-12 19:48 ` [PATCH 07/21] arm: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 08/21] arm64: simplify detection of memory zone boundaries for UMA configs Mike Rapoport
2020-04-12 19:48 ` [PATCH 09/21] csky: simplify detection of memory zone boundaries Mike Rapoport
2020-04-12 19:48 ` [PATCH 10/21] m68k: mm: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 11/21] parisc: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 12/21] sparc32: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 13/21] unicore32: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 14/21] xtensa: " Mike Rapoport
2020-04-12 19:48 ` [PATCH 15/21] mm: memmap_init: iterate over memblock regions rather that check each PFN Mike Rapoport
2020-04-20 14:26   ` Qian Cai
2020-04-24  7:22   ` David Hildenbrand
2020-04-25 16:49     ` Mike Rapoport
2020-04-25 16:49       ` Mike Rapoport
2020-04-12 19:48 ` [PATCH 16/21] mm: remove early_pfn_in_nid() and CONFIG_NODES_SPAN_OTHER_NODES Mike Rapoport
2020-04-23  1:13   ` Baoquan He
2020-04-23  5:50     ` Mike Rapoport
2020-04-23  5:50       ` Mike Rapoport
2020-05-18 21:38     ` Hoan Tran
2020-04-12 19:48 ` [PATCH 17/21] mm: free_area_init: allow defining max_zone_pfn in descending order Mike Rapoport
2020-04-23  2:53   ` Baoquan He
2020-04-23  2:57     ` Baoquan He
2020-04-23  5:55       ` Mike Rapoport
2020-04-23  5:55         ` Mike Rapoport
2020-04-24  0:33         ` Baoquan He
2020-04-12 19:48 ` [PATCH 18/21] mm: rename free_area_init_node() to free_area_init_memoryless_node() Mike Rapoport
2020-04-23  3:14   ` Baoquan He
2020-04-23  6:18     ` Mike Rapoport
2020-04-23  6:18       ` Mike Rapoport
2020-04-12 19:48 ` [PATCH 19/21] mm: clean up free_area_init_node() and its helpers Mike Rapoport
2020-04-12 19:48 ` [PATCH 20/21] mm: simplify find_min_pfn_with_active_regions() Mike Rapoport
2020-04-12 19:48 ` [PATCH 21/21] docs/vm: update memory-models documentation Mike Rapoport

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200421090908.GC14260@kernel.org \
    --to=rppt@kernel.org \
    --cc=James.Bottomley@hansenpartnership.com \
    --cc=bcain@codeaurora.org \
    --cc=bhe@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=corbet@lwn.net \
    --cc=dalias@libc.org \
    --cc=deller@gmx.de \
    --cc=gerg@linux-m68k.org \
    --cc=guoren@kernel.org \
    --cc=heiko.carstens@de.ibm.com \
    --cc=jcmvbkbc@gmail.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-c6x-dev@linux-c6x.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-hexagon@vger.kernel.org \
    --cc=linux-ia64@vger.kernel.org \
    --cc=linux-parisc@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linux-snps-arc@lists.infradead.org \
    --cc=linux@armlinux.org.uk \
    --cc=mhocko@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=sparclinux@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-arch Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-arch/0 linux-arch/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-arch linux-arch/ https://lore.kernel.org/linux-arch \
		linux-arch@vger.kernel.org
	public-inbox-index linux-arch

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-arch


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git