From mboxrd@z Thu Jan 1 00:00:00 1970 From: Dario Faggioli Subject: [PATCH 7 of 8] libxl: automatic placement deals with node-affinity Date: Fri, 05 Oct 2012 16:08:25 +0200 Message-ID: References: Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: Andre Przywara , Ian Campbell , Anil Madhavapeddy , George Dunlap , Andrew Cooper , Juergen Gross , Ian Jackson , Jan Beulich , Marcus Granado , Daniel De Graaf , Matt Wilson List-Id: xen-devel@lists.xenproject.org Which basically means the following two things: 1) during domain creation, it is the node-affinity of the domain --rather than the vcpu-affinities of its vcpus-- that is affected by automatic placement; 2) during automatic placement, when counting how many vcpus are already "bound" to a placement candidate (as part of the process of choosing the best candidate), node-affinity is also considered, together with vcpu-affinity. Signed-off-by: Dario Faggioli diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc * { int found; libxl__numa_candidate candidate; - libxl_bitmap candidate_nodemap; + libxl_bitmap cpupool_nodemap; libxl_cpupoolinfo cpupool_info; int i, cpupool, rc = 0; uint32_t memkb; libxl__numa_candidate_init(&candidate); - libxl_bitmap_init(&candidate_nodemap); + libxl_bitmap_init(&cpupool_nodemap); /* * Extract the cpumap from the cpupool the domain belong to. In fact, @@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc * rc = libxl_domain_need_memory(CTX, info, &memkb); if (rc) goto out; - if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) { + if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) { rc = ERROR_FAIL; goto out; } @@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc * if (found == 0) goto out; - /* Map the candidate's node map to the domain's info->cpumap */ - libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap); - rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap); + /* Map the candidate's node map to the domain's info->nodemap */ + libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap); + + /* Avoid trying to set the affinity to nodes that might be in the + * candidate's nodemap but out of our cpupool. */ + rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap, + &cpupool_nodemap); if (rc) goto out; - /* Avoid trying to set the affinity to cpus that might be in the - * nodemap but not in our cpupool. */ - libxl_for_each_set_bit(i, info->cpumap) { - if (!libxl_bitmap_test(&cpupool_info.cpumap, i)) - libxl_bitmap_reset(&info->cpumap, i); + libxl_for_each_set_bit(i, info->nodemap) { + if (!libxl_bitmap_test(&cpupool_nodemap, i)) + libxl_bitmap_reset(&info->nodemap, i); } LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and " @@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc * out: libxl__numa_candidate_dispose(&candidate); - libxl_bitmap_dispose(&candidate_nodemap); + libxl_bitmap_dispose(&cpupool_nodemap); libxl_cpupoolinfo_dispose(&cpupool_info); return rc; } @@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint /* * Check if the domain has any CPU affinity. If not, try to build * up one. In case numa_place_domain() find at least a suitable - * candidate, it will affect info->cpumap accordingly; if it + * candidate, it will affect info->nodemap accordingly; if it * does not, it just leaves it as it is. This means (unless * some weird error manifests) the subsequent call to - * libxl_set_vcpuaffinity_all() will do the actual placement, + * libxl_domain_set_nodeaffinity() will do the actual placement, * whatever that turns out to be. */ if (libxl_defbool_val(info->numa_placement)) { diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c --- a/tools/libxl/libxl_numa.c +++ b/tools/libxl/libxl_numa.c @@ -171,7 +171,7 @@ static int nodemap_to_nr_vcpus(libxl__gc const libxl_bitmap *nodemap) { libxl_dominfo *dinfo = NULL; - libxl_bitmap vcpu_nodemap; + libxl_bitmap vcpu_nodemap, dom_nodemap; int nr_doms, nr_cpus; int nr_vcpus = 0; int i, j, k; @@ -185,6 +185,12 @@ static int nodemap_to_nr_vcpus(libxl__gc return ERROR_FAIL; } + if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) { + libxl_dominfo_list_free(dinfo, nr_doms); + libxl_bitmap_dispose(&vcpu_nodemap); + return ERROR_FAIL; + } + for (i = 0; i < nr_doms; i++) { libxl_vcpuinfo *vinfo; int nr_dom_vcpus; @@ -193,6 +199,9 @@ static int nodemap_to_nr_vcpus(libxl__gc if (vinfo == NULL) continue; + /* Retrieve the domain's node-affinity map (see below) */ + libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap); + /* For each vcpu of each domain ... */ for (j = 0; j < nr_dom_vcpus; j++) { @@ -201,9 +210,17 @@ static int nodemap_to_nr_vcpus(libxl__gc libxl_for_each_set_bit(k, vinfo[j].cpumap) libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node); - /* And check if that map has any intersection with our nodemap */ + /* + * We now check whether the && of the vcpu's nodemap and the + * domain's nodemap has any intersection with the nodemap of our + * canidate. + * Using both (vcpu's and domain's) nodemaps allows us to take + * both vcpu-affinity and node-affinity into account when counting + * the number of vcpus bound to the candidate. + */ libxl_for_each_set_bit(k, vcpu_nodemap) { - if (libxl_bitmap_test(nodemap, k)) { + if (libxl_bitmap_test(&dom_nodemap, k) && + libxl_bitmap_test(nodemap, k)) { nr_vcpus++; break; } @@ -213,6 +230,7 @@ static int nodemap_to_nr_vcpus(libxl__gc libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus); } + libxl_bitmap_dispose(&dom_nodemap); libxl_bitmap_dispose(&vcpu_nodemap); libxl_dominfo_list_free(dinfo, nr_doms); return nr_vcpus;