All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@kernel.org>
To: Pingfan Liu <kernelfans@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>
Subject: Re: [PATCH] mm/alloc: fallback to first node if the wanted node offline
Date: Mon, 10 Dec 2018 13:37:38 +0100	[thread overview]
Message-ID: <20181210123738.GN1286@dhcp22.suse.cz> (raw)
In-Reply-To: <20181207155627.GG1286@dhcp22.suse.cz>

On Fri 07-12-18 16:56:27, Michal Hocko wrote:
> On Fri 07-12-18 22:27:13, Pingfan Liu wrote:
> [...]
> > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > index 1308f54..4dc497d 100644
> > --- a/arch/x86/mm/numa.c
> > +++ b/arch/x86/mm/numa.c
> > @@ -754,18 +754,23 @@ void __init init_cpu_to_node(void)
> >  {
> >         int cpu;
> >         u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
> > +       int node, nr;
> > 
> >         BUG_ON(cpu_to_apicid == NULL);
> > +       nr = cpumask_weight(cpu_possible_mask);
> > +
> > +       /* bring up all possible node, since dev->numa_node */
> > +       //should check acpi works for node possible,
> > +       for_each_node(node)
> > +               if (!node_online(node))
> > +                       init_memory_less_node(node);
> 
> I suspect there is no change if you replace for_each_node by
> 	for_each_node_mask(nid, node_possible_map)
> 
> here. If that is the case then we are probably calling
> free_area_init_node too early. I do not see it yet though.

OK, so it is not about calling it late or soon. It is just that
node_possible_map is a misnomer and it has a different semantic than
I've expected. numa_nodemask_from_meminfo simply considers only nodes
with some memory. So my patch didn't really make any difference and the
node stayed uninialized.

In other words. Does the following work? I am sorry to wildguess this
way but I am not able to recreate your setups to play with this myself.

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1308f5408bf7..d51643e10d00 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -216,8 +216,6 @@ static void __init alloc_node_data(int nid)
 
 	node_data[nid] = nd;
 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
-
-	node_set_online(nid);
 }
 
 /**
@@ -527,6 +525,19 @@ static void __init numa_clear_kernel_node_hotplug(void)
 	}
 }
 
+static void __init init_memory_less_node(int nid)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0};
+	unsigned long zholes_size[MAX_NR_ZONES] = {0};
+
+	free_area_init_node(nid, zones_size, 0, zholes_size);
+
+	/*
+	 * All zonelists will be built later in start_kernel() after per cpu
+	 * areas are initialized.
+	 */
+}
+
 static int __init numa_register_memblks(struct numa_meminfo *mi)
 {
 	unsigned long uninitialized_var(pfn_align);
@@ -570,7 +581,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 		return -EINVAL;
 
 	/* Finally register nodes. */
-	for_each_node_mask(nid, node_possible_map) {
+	for_each_node(nid) {
 		u64 start = PFN_PHYS(max_pfn);
 		u64 end = 0;
 
@@ -592,6 +603,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 			continue;
 
 		alloc_node_data(nid);
+		if (!end)
+			init_memory_less_node(nid);
+		else
+			node_set_online(nid);
 	}
 
 	/* Dump memblock with node info and return. */
@@ -721,21 +736,6 @@ void __init x86_numa_init(void)
 	numa_init(dummy_numa_init);
 }
 
-static void __init init_memory_less_node(int nid)
-{
-	unsigned long zones_size[MAX_NR_ZONES] = {0};
-	unsigned long zholes_size[MAX_NR_ZONES] = {0};
-
-	/* Allocate and initialize node data. Memory-less node is now online.*/
-	alloc_node_data(nid);
-	free_area_init_node(nid, zones_size, 0, zholes_size);
-
-	/*
-	 * All zonelists will be built later in start_kernel() after per cpu
-	 * areas are initialized.
-	 */
-}
-
 /*
  * Setup early cpu_to_node.
  *
@@ -763,9 +763,6 @@ void __init init_cpu_to_node(void)
 		if (node == NUMA_NO_NODE)
 			continue;
 
-		if (!node_online(node))
-			init_memory_less_node(node);
-
 		numa_set_node(cpu, node);
 	}
 }
-- 
Michal Hocko
SUSE Labs

  parent reply	other threads:[~2018-12-10 12:37 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-04  3:05 [PATCH] mm/alloc: fallback to first node if the wanted node offline Pingfan Liu
2018-12-04  3:53 ` David Rientjes
2018-12-04  7:16   ` Pingfan Liu
2018-12-05  5:49     ` Pingfan Liu
2018-12-05 19:00       ` David Rientjes
2018-12-04  6:54 ` Wei Yang
2018-12-04  7:20   ` Pingfan Liu
2018-12-04  8:34     ` Wei Yang
2018-12-04  8:52       ` Pingfan Liu
2018-12-04  9:09         ` Wei Yang
2018-12-05  5:50           ` Pingfan Liu
2018-12-04  7:22 ` Michal Hocko
2018-12-04  8:20   ` Pingfan Liu
2018-12-04  8:40     ` Wei Yang
2018-12-04  8:56       ` Pingfan Liu
2018-12-04  8:56     ` Michal Hocko
2018-12-04 14:42       ` Vlastimil Babka
2018-12-05  5:38       ` Pingfan Liu
2018-12-05  9:21         ` Michal Hocko
2018-12-05  9:29           ` Pingfan Liu
2018-12-05  9:40             ` Vlastimil Babka
2018-12-06  3:07               ` Pingfan Liu
2018-12-06  8:28                 ` Michal Hocko
2018-12-06 10:03                   ` Pingfan Liu
2018-12-06 10:44                     ` Pingfan Liu
2018-12-06 12:11                       ` Michal Hocko
2018-12-07  2:56                         ` Pingfan Liu
2018-12-07  7:53                           ` Michal Hocko
2018-12-07  9:40                             ` Pingfan Liu
2018-12-07 11:30                               ` Michal Hocko
2018-12-07 11:30                                 ` Michal Hocko
2018-12-07 13:20                                 ` Pingfan Liu
2018-12-07 14:22                                   ` Michal Hocko
2018-12-07 14:27                                     ` Pingfan Liu
2018-12-07 14:50                                       ` Michal Hocko
2018-12-07 15:56                                       ` Michal Hocko
2018-12-10  4:00                                         ` Pingfan Liu
2018-12-10  7:57                                           ` Pingfan Liu
2018-12-10 12:37                                         ` Michal Hocko [this message]
2018-12-11  8:05                                           ` Pingfan Liu
2018-12-11  9:44                                             ` Michal Hocko
2018-12-12  8:33                                               ` Pingfan Liu
2018-12-12  8:31                                           ` Pingfan Liu
2018-12-12 11:53                                             ` Michal Hocko
2018-12-13  8:37                                               ` Pingfan Liu
2018-12-13  9:04                                                 ` Pingfan Liu
2018-12-17 13:29                                                   ` Michal Hocko
2018-12-20  7:19                                                     ` Pingfan Liu
2018-12-20  9:19                                                       ` Michal Hocko
2019-01-08 14:34                                                         ` Michal Hocko
2019-01-09  3:13                                                           ` Pingfan Liu
2019-01-09  3:13                                                             ` Pingfan Liu
2019-01-11  3:12                                                           ` Pingfan Liu
2019-01-11  3:12                                                             ` Pingfan Liu
2019-01-11  9:23                                                             ` Michal Hocko
2018-12-17 12:57                                                 ` Michal Hocko
2018-12-05  9:43             ` Michal Hocko
2018-12-06  3:34               ` Pingfan Liu
2018-12-06  7:23                 ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181210123738.GN1286@dhcp22.suse.cz \
    --to=mhocko@kernel.org \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhelgaas@google.com \
    --cc=kernelfans@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.