All of lore.kernel.org
 help / color / mirror / Atom feed
From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, JBeulich@suse.com,
	Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v10 8/9] libxl: vnuma nodes placement bits
Date: Wed,  3 Sep 2014 00:24:17 -0400	[thread overview]
Message-ID: <1409718258-3276-6-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1409718258-3276-1-git-send-email-ufimtseva@gmail.com>

Automatic numa placement cancels manual vnode placement
mechanism. If numa placement explicitly specified, try
to fit vnodes to the physical nodes.

Changes since v8:
    - Addded number of ranges vmemranges for future support of
      multi-range nodes; For PV guest it is set to same value
      as number of vNUMA nodes.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_create.c |    1 +
 tools/libxl/libxl_dom.c    |  204 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+)

diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index fc332ef..a4977d2 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -203,6 +203,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
     }
 
     libxl_defbool_setdefault(&b_info->numa_placement, true);
+    libxl_defbool_setdefault(&b_info->vnuma_autoplacement, true);
 
     if (b_info->max_memkb == LIBXL_MEMKB_DEFAULT)
         b_info->max_memkb = 32 * 1024;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c944804..b1376c4 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -23,6 +23,7 @@
 #include <xc_dom.h>
 #include <xen/hvm/hvm_info_table.h>
 #include <xen/hvm/hvm_xs_strings.h>
+#include <libxl_vnuma.h>
 
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
@@ -227,12 +228,114 @@ static void hvm_set_conf_params(xc_interface *handle, uint32_t domid,
                     libxl_defbool_val(info->u.hvm.nested_hvm));
 }
 
+/* sets vnode_to_pnode map. */
+static int libxl__init_vnode_to_pnode(libxl__gc *gc, uint32_t domid,
+                        libxl_domain_build_info *info)
+{
+    unsigned int i, n;
+    int nr_nodes = 0;
+    uint64_t *vnodes_mem;
+    unsigned long long *nodes_claim = NULL;
+    libxl_numainfo *ninfo = NULL;
+
+    if (info->vnuma_vnodemap == NULL) {
+        info->vnuma_vnodemap = libxl__calloc(gc, info->vnodes,
+                                      sizeof(*info->vnuma_vnodemap));
+    }
+
+    /* default setting. */
+    for (i = 0; i < info->vnodes; i++)
+        info->vnuma_vnodemap[i] = LIBXC_VNUMA_NO_NODE;
+
+    /* Get NUMA info. */
+    ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+    if (ninfo == NULL)
+        return ERROR_FAIL;
+    /* Nothing to see if only one NUMA node. */
+    if (nr_nodes <= 1)
+        return 0;
+
+    vnodes_mem = info->vnuma_mem;
+    /*
+     * TODO: change algorithm. The current just fits the nodes
+     * by its memory sizes. If no p-node found, will be used default
+     * value of LIBXC_VNUMA_NO_NODE.
+     */
+    nodes_claim = libxl__calloc(gc, info->vnodes, sizeof(*nodes_claim));
+    if ( !nodes_claim )
+        return ERROR_FAIL;
+
+    libxl_for_each_set_bit(n, info->nodemap)
+    {
+        for (i = 0; i < info->vnodes; i++)
+        {
+            unsigned long mem_sz = vnodes_mem[i] << 20;
+            if ((nodes_claim[n] + mem_sz <= ninfo[n].free) &&
+                 /* vnode was not set yet. */
+                 (info->vnuma_vnodemap[i] == LIBXC_VNUMA_NO_NODE ) )
+            {
+                info->vnuma_vnodemap[i] = n;
+                nodes_claim[n] += mem_sz;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Builds vnode memory regions from configuration info
+ * for vnuma nodes with multiple regions.
+ */
+static int libxl__build_vnuma_ranges(libxl__gc *gc,
+                              uint32_t domid,
+                              /* IN: mem sizes in megabytes */
+                              libxl_domain_build_info *b_info,
+                              /* OUT: linux NUMA blocks addresses */
+                              vmemrange_t **memrange)
+{
+    /*
+     * For non-PV domains, contruction of the regions will
+     * need to have its own implementation.
+     */
+    if (b_info->type != LIBXL_DOMAIN_TYPE_PV) {
+        LOG(DETAIL, "vNUMA is only supported for PV guests now.\n");
+        errno = EINVAL;
+        return -1;
+    }
+
+    if (b_info->vnodes == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    b_info->vmemranges = b_info->vnodes;
+
+    *memrange = libxl__calloc(gc, b_info->vnodes,
+                              sizeof(vmemrange_t));
+
+    /*
+     * For PV domain along with alignment, regions nid will
+     * be set to corresponding vnuma node number and ignored
+     * later during allocation.
+     */
+
+    if (libxl__vnuma_align_mem(gc, domid, b_info, *memrange) < 0) {
+        LOG(DETAIL, "Failed to align memory map.\n");
+        errno = ERROR_INVAL;
+        return ERROR_FAIL;
+    }
+
+    return 0;
+}
+
 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
               libxl_domain_config *d_config, libxl__domain_build_state *state)
 {
     libxl_domain_build_info *const info = &d_config->b_info;
     libxl_ctx *ctx = libxl__gc_owner(gc);
     char *xs_domid, *con_domid;
+    struct vmemrange *memrange;
     int rc;
 
     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
@@ -240,6 +343,20 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         return ERROR_FAIL;
     }
 
+    if (libxl__build_vnuma_ranges(gc, domid, info, &memrange) != 0) {
+        LOG(DETAIL, "Failed to build vnuma nodes memory ranges.\n");
+        return ERROR_FAIL;
+
+    }
+
+    /*
+     * NUMA placement and vNUMA autoplacement handling:
+     * If numa_placement is set to default, do not use vnode to pnode
+     * mapping as automatic placement algorithm will find best numa nodes.
+     * If numa_placement is not used, we can try and use domain vnode
+     * to pnode mask.
+     */
+
     /*
      * Check if the domain has any CPU or node affinity already. If not, try
      * to build up the latter via automatic NUMA placement. In fact, in case
@@ -298,7 +415,33 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
                                    NULL, &cpumap_soft);
 
         libxl_bitmap_dispose(&cpumap_soft);
+
+        /*
+         * If vnode_to_pnode mask was defined, dont use it if we automatically
+         * place domain on NUMA nodes, just give warning.
+         */
+        if (!libxl_defbool_val(info->vnuma_autoplacement)) {
+            LOG(INFO, "Automatic NUMA placement for domain is turned on. \
+                vnode to physical nodes mapping will not be used.");
+        }
+        if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) {
+            LOG(ERROR, "Failed to build vnode to pnode map\n");
+            return ERROR_FAIL;
+        }
+    } else {
+        if (!libxl_defbool_val(info->vnuma_autoplacement)) {
+                if (!libxl__vnodemap_is_usable(gc, info)) {
+                    LOG(ERROR, "Defined vnode to pnode domain map cannot be used.\n");
+                    return ERROR_FAIL;
+                }
+        } else {
+            if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) {
+                LOG(ERROR, "Failed to build vnode to pnode map.\n");
+                return ERROR_FAIL;
+            }
+        }
     }
+
     if (info->nodemap.size)
         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
     /* As mentioned in libxl.h, vcpu_hard_array takes precedence */
@@ -339,6 +482,22 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         return ERROR_FAIL;
     }
 
+    /*
+     * XEN_DOMCTL_setvnuma subop hypercall needs to know max mem
+     * for domain set by xc_domain_setmaxmem. So set vNUMA after
+     * maxmem is being set.
+     * memrange should contain regions if multi-region nodes are
+     * suppoted. For PV domain regions are ignored.
+     */
+    if (xc_domain_setvnuma(ctx->xch, domid, info->vnodes,
+        info->vmemranges,
+        info->max_vcpus, memrange,
+        info->vdistance, info->vnuma_vcpumap,
+        info->vnuma_vnodemap) < 0) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't set vnuma topology");
+        return ERROR_FAIL;
+    }
+
     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
     free(xs_domid);
@@ -434,6 +593,46 @@ retry_transaction:
     return 0;
 }
 
+/*
+ * Function fills the xc_dom_image with memory sizes for later
+ * use in domain memory allocator. No regions here are being
+ * filled and used in allocator as the regions do belong to one node.
+ */
+static int libxl__dom_vnuma_init(struct libxl_domain_build_info *info,
+                                 struct xc_dom_image *dom)
+{
+    errno = ERROR_INVAL;
+
+    if (info->vnodes == 0)
+        return -1;
+
+    info->vmemranges = info->vnodes;
+
+    dom->vnode_to_pnode = (unsigned int *)malloc(
+                            info->vnodes * sizeof(*info->vnuma_vnodemap));
+    dom->numa_memszs = (uint64_t *)malloc(
+                          info->vnodes * sizeof(*info->vnuma_mem));
+
+    errno = ERROR_FAIL;
+    if ( dom->numa_memszs == NULL || dom->vnode_to_pnode == NULL ) {
+        info->vnodes = 0;
+        if (dom->vnode_to_pnode)
+            free(dom->vnode_to_pnode);
+        if (dom->numa_memszs)
+            free(dom->numa_memszs);
+        return -1;
+    }
+
+    memcpy(dom->numa_memszs, info->vnuma_mem,
+            sizeof(*info->vnuma_mem) * info->vnodes);
+    memcpy(dom->vnode_to_pnode, info->vnuma_vnodemap,
+            sizeof(*info->vnuma_vnodemap) * info->vnodes);
+
+    dom->vnodes = info->vnodes;
+
+    return 0;
+}
+
 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
              libxl_domain_build_info *info, libxl__domain_build_state *state)
 {
@@ -491,6 +690,11 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
     dom->xenstore_domid = state->store_domid;
     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
 
+    if ( (ret = libxl__dom_vnuma_init(info, dom)) != 0 ) {
+        LOGE(ERROR, "Failed to set doman vnuma");
+        goto out;
+    }
+
     if ( (ret = xc_dom_boot_xen_init(dom, ctx->xch, domid)) != 0 ) {
         LOGE(ERROR, "xc_dom_boot_xen_init failed");
         goto out;
-- 
1.7.10.4

  parent reply	other threads:[~2014-09-03  4:24 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-03  4:24 [PATCH v10 3/9] vnuma hook to debug-keys u Elena Ufimtseva
2014-09-03  4:24 ` [PATCH v10 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
2014-09-03 14:53   ` Ian Campbell
2014-09-03  4:24 ` [PATCH v10 5/9] libxl: vnuma types declararion Elena Ufimtseva
2014-09-03 15:03   ` Ian Campbell
2014-09-03 16:04   ` Ian Campbell
2014-09-04  3:48     ` Elena Ufimtseva
     [not found]       ` <1409826927.15057.22.camel@kazak.uk.xensource.com>
2014-09-08 16:13         ` Dario Faggioli
2014-09-08 19:47           ` Elena Ufimtseva
2014-09-16  6:17         ` Elena Ufimtseva
2014-09-16  7:16           ` Dario Faggioli
2014-09-16 12:57             ` Elena Ufimtseva
2014-09-03  4:24 ` [PATCH v10 6/9] libxl: build numa nodes memory blocks Elena Ufimtseva
2014-09-03 15:21   ` Ian Campbell
2014-09-04  4:47     ` Elena Ufimtseva
2014-09-05  3:50     ` Elena Ufimtseva
2014-09-12 10:18   ` Dario Faggioli
2014-09-03  4:24 ` [PATCH v10 7/9] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
2014-09-03 15:26   ` Ian Campbell
2014-09-12 11:06   ` Dario Faggioli
2014-09-03  4:24 ` Elena Ufimtseva [this message]
2014-09-03 15:50   ` [PATCH v10 8/9] libxl: vnuma nodes placement bits Konrad Rzeszutek Wilk
2014-09-03 15:52   ` Ian Campbell
2014-09-12 16:51   ` Dario Faggioli
2014-09-03  4:24 ` [PATCH v10 9/9] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
2014-09-03 15:42   ` Konrad Rzeszutek Wilk
2014-09-11 17:13   ` Dario Faggioli
2014-09-12  2:04     ` Elena Ufimtseva
2014-09-12  9:02       ` Dario Faggioli
2014-09-12  9:31         ` Wei Liu
2014-09-12  9:59           ` Dario Faggioli
2014-09-03 15:37 ` [PATCH v10 3/9] vnuma hook to debug-keys u Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409718258-3276-6-git-send-email-ufimtseva@gmail.com \
    --to=ufimtseva@gmail.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.