[RFC PATCH v2] tools: libxl/xl: run NUMA placement even when an hard-affinity is set

* [RFC PATCH v2] tools: libxl/xl: run NUMA placement even when an hard-affinity is set
@ 2018-10-19 15:54 Dario Faggioli
  2018-10-31 15:24 ` Wei Liu
  0 siblings, 1 reply; 2+ messages in thread
From: Dario Faggioli @ 2018-10-19 15:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Liu, Ian Jackson, George Dunlap

Right now, if either an hard or soft-affinity are explicitly specified
in a domain's config file, automatic NUMA placement is skipped. However,
automatic NUMA placement affects only the soft-affinity of the domain
which is being created.

Therefore, it is ok to let it run if an hard-affinity is specified. The
semantics will be that the best placement candidate would be found,
respecting the specified hard-affinity, i.e., using only the nodes that
contain the pcpus in the hard-affinity mask.

This is particularly helpful if global xl pinning masks are defined, as
made possible by commit aa67b97ed34279c43 ("xl.conf: Add global affinity
masks"). In fact, without this commit, defining a global affinity mask
would also mean disabling automatic placement, but that does not
necessarily have to be the case (especially in large systems).

Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: George Dunlap <george.dunlap@citrix.com>
---
Hey, I thought I had resent this patch, but apparently I haven't.
Sorry... here it comes.

Changes from v1:
- turned a WARN into ERR, as suggested by Wei (yes, I was wrong, it
  can trigger, and it makes sense for it to be an error, rather than
  a warning);
- about the //DEBUG, I do think it is a developer's check, useful for
  when we want to test new development done on affinity management,
  and it's not necessary to go through it at each domain creation
  in production, so I'm leaving it as it is, as suggested by Ian.
---
 tools/libxl/libxl_dom.c |   43 +++++++++++++++++++++++++++++++++++++------
 tools/xl/xl_parse.c     |    6 ++++--
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c66f3893d7..598af71562 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -27,6 +27,8 @@
 
 #include "_paths.h"
 
+//#define DEBUG 1
+
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -142,12 +144,13 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
 {
     int found;
     libxl__numa_candidate candidate;
-    libxl_bitmap cpupool_nodemap;
+    libxl_bitmap cpumap, cpupool_nodemap, *map;
     libxl_cpupoolinfo cpupool_info;
     int i, cpupool, rc = 0;
     uint64_t memkb;
 
     libxl__numa_candidate_init(&candidate);
+    libxl_bitmap_init(&cpumap);
     libxl_bitmap_init(&cpupool_nodemap);
     libxl_cpupoolinfo_init(&cpupool_info);
 
@@ -162,6 +165,35 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
     if (rc)
         goto out;
+    map = &cpupool_info.cpumap;
+
+    /*
+     * If there's a well defined hard affinity mask (i.e., the same one for all
+     * the vcpus), we can try to run the placement considering only the pcpus
+     * within such mask.
+     */
+    if (info->num_vcpu_hard_affinity)
+    {
+#ifdef DEBUG
+        int j;
+
+        for (j = 0; j < info->num_vcpu_hard_affinity; j++)
+            assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
+                                      &info->vcpu_hard_affinity[j], 0));
+#endif /* DEBUG */
+
+        rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
+                              &cpupool_info.cpumap);
+        if (rc)
+            goto out;
+
+        /* Hard affinity must contain at least one cpu of our cpupool */
+        if (libxl_bitmap_is_empty(&cpumap)) {
+            LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
+            rc = ERROR_INVAL;
+            goto out;
+        }
+    }
 
     rc = libxl_domain_need_memory(CTX, info, &memkb);
     if (rc)
@@ -174,8 +206,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     /* Find the best candidate with enough free memory and at least
      * as much pcpus as the domain has vcpus.  */
     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
-                                   0, 0, &cpupool_info.cpumap,
-                                   numa_cmpf, &candidate, &found);
+                                   0, 0, map, numa_cmpf, &candidate, &found);
     if (rc)
         goto out;
 
@@ -206,6 +237,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
  out:
     libxl__numa_candidate_dispose(&candidate);
     libxl_bitmap_dispose(&cpupool_nodemap);
+    libxl_bitmap_dispose(&cpumap);
     libxl_cpupoolinfo_dispose(&cpupool_info);
     return rc;
 }
@@ -373,9 +405,8 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
      * reflect the placement result if that is the case
      */
     if (libxl_defbool_val(info->numa_placement)) {
-        if (info->cpumap.size || info->num_vcpu_hard_affinity ||
-            info->num_vcpu_soft_affinity)
-            LOG(WARN, "Can't run NUMA placement, as an (hard or soft) "
+        if (info->cpumap.size || info->num_vcpu_soft_affinity)
+            LOG(WARN, "Can't run NUMA placement, as a soft "
                       "affinity has been specified explicitly");
         else if (info->nodemap.size)
             LOG(WARN, "Can't run NUMA placement, as the domain has "
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 0bda28152b..352cd214dd 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -356,7 +356,7 @@ static void parse_vcpu_affinity(libxl_domain_build_info *b_info,
             j++;
         }
 
-        /* We have a list of cpumaps, disable automatic placement */
+        /* When we have a list of cpumaps, always disable automatic placement */
         libxl_defbool_set(&b_info->numa_placement, false);
     } else {
         int i;
@@ -380,7 +380,9 @@ static void parse_vcpu_affinity(libxl_domain_build_info *b_info,
                               &vcpu_affinity_array[0]);
         }
 
-        libxl_defbool_set(&b_info->numa_placement, false);
+        /* We have soft affinity already, disable automatic placement */
+        if (!is_hard)
+            libxl_defbool_set(&b_info->numa_placement, false);
     }
 }
 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 2+ messages in thread