All of lore.kernel.org
 help / color / mirror / Atom feed
From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, JBeulich@suse.com,
	Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v10 6/9] libxl: build numa nodes memory blocks
Date: Wed,  3 Sep 2014 00:24:15 -0400	[thread overview]
Message-ID: <1409718258-3276-4-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1409718258-3276-1-git-send-email-ufimtseva@gmail.com>

Create the vmemrange structure based on the
PV guests E820 map. Values are in in Megabytes.
Also export the E820 filter code e820_sanitize
out to be available internally.
As Xen can support mutiranges vNUMA nodes, for
PV guest it is one range per one node. The other
domain types should have their building routines
implemented.

Changes since v8:
    - Added setting of the vnuma memory ranges node ids;

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_internal.h |    9 ++
 tools/libxl/libxl_numa.c     |  201 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_x86.c      |    3 +-
 3 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index beb052e..63ccb5e 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
     libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
 }
 
+bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
+
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t *nr_entries,
+                  unsigned long map_limitkb, unsigned long balloon_kb);
+
+int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid,
+                           struct libxl_domain_build_info *b_info,
+                           vmemrange_t *memblks);
+
 _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
                                    const libxl_ms_vm_genid *id);
 
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 94ca4fe..c416faf 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -19,6 +19,10 @@
 
 #include "libxl_internal.h"
 
+#include "libxl_vnuma.h"
+
+#include "xc_private.h"
+
 /*
  * What follows are helpers for generating all the k-combinations
  * without repetitions of a set S with n elements in it. Formally
@@ -508,6 +512,203 @@ int libxl__get_numa_candidate(libxl__gc *gc,
 }
 
 /*
+ * Check if we can fit vnuma nodes to numa pnodes
+ * from vnode_to_pnode array.
+ */
+bool libxl__vnodemap_is_usable(libxl__gc *gc,
+                            libxl_domain_build_info *info)
+{
+    unsigned int i;
+    libxl_numainfo *ninfo = NULL;
+    unsigned long long *claim;
+    unsigned int node;
+    uint64_t *sz_array;
+    int nr_nodes = 0;
+
+    /* Cannot use specified mapping if not NUMA machine. */
+    ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+    if (ninfo == NULL)
+        return false;
+
+    sz_array = info->vnuma_mem;
+    claim = libxl__calloc(gc, info->vnodes, sizeof(*claim));
+    /* Get total memory required on each physical node. */
+    for (i = 0; i < info->vnodes; i++)
+    {
+        node = info->vnuma_vnodemap[i];
+
+        if (node < nr_nodes)
+            claim[node] += (sz_array[i] << 20);
+        else
+            goto vnodemapout;
+   }
+   for (i = 0; i < nr_nodes; i++) {
+       if (claim[i] > ninfo[i].free)
+          /* Cannot complete user request, falling to default. */
+          goto vnodemapout;
+   }
+
+ vnodemapout:
+   return true;
+}
+
+/*
+ * Returns number of absent pages within e820 map
+ * between start and end addresses passed. Needed
+ * to correctly set numa memory ranges for domain.
+ */
+static unsigned long e820_memory_hole_size(unsigned long start,
+                                            unsigned long end,
+                                            struct e820entry e820[],
+                                            unsigned int nr)
+{
+    unsigned int i;
+    unsigned long absent, start_blk, end_blk;
+
+    /* init absent number of pages with all memmap size. */
+    absent = end - start;
+    for (i = 0; i < nr; i++) {
+        /* if not E820_RAM region, skip it. */
+        if (e820[i].type != E820_RAM)
+            continue;
+
+        start_blk = e820[i].addr;
+        end_blk = e820[i].addr + e820[i].size;
+        /* beginning address is within this region? */
+        if (start >= start_blk && start <= end_blk) {
+            if (end > end_blk)
+                absent -= end_blk - start;
+            else
+                /* fit the region? then no absent pages. */
+                absent -= end - start;
+            continue;
+        }
+        /* found the end of range in this region? */
+        if (end <= end_blk && end >= start_blk) {
+            absent -= end - start_blk;
+            /* no need to look for more ranges. */
+            break;
+        }
+    }
+    return absent;
+}
+
+/*
+ * For each node, build memory block start and end addresses.
+ * Substract any memory hole from the range found in e820 map.
+ * vnode memory size are passed here in megabytes, the result is
+ * in memory block addresses.
+ * Linux kernel will adjust numa memory block sizes on its own.
+ * But we want to provide to the kernel numa block addresses that
+ * will be the same in kernel and hypervisor.
+ */
+int libxl__vnuma_align_mem(libxl__gc *gc,
+                            uint32_t domid,
+                            /* IN: mem sizes in megabytes */
+                            libxl_domain_build_info *b_info,
+                            /* OUT: linux NUMA blocks addresses */
+                            vmemrange_t *memblks)
+{
+    unsigned int i;
+    int j, rc;
+    uint64_t next_start_blk, end_max = 0, size;
+    uint32_t nr;
+    struct e820entry map[E820MAX];
+
+    errno = ERROR_INVAL;
+    if (b_info->vnodes == 0)
+        return -EINVAL;
+
+    if (!memblks || !b_info->vnuma_mem)
+        return -EINVAL;
+
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+
+    /* Retrieve e820 map for this host. */
+    rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+
+    if (rc < 0) {
+        errno = rc;
+        return -EINVAL;
+    }
+    nr = rc;
+    rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
+                       (b_info->max_memkb - b_info->target_memkb) +
+                       b_info->u.pv.slack_memkb);
+    if (rc) {
+        errno = rc;
+        return -EINVAL;
+    }
+
+    /* find max memory address for this host. */
+    for (j = 0; j < nr; j++) {
+        if (map[j].type == E820_RAM) {
+            end_max = max(end_max, map[j].addr + map[j].size);
+        }
+    }
+
+    memset(memblks, 0, sizeof(*memblks) * b_info->vnodes);
+    next_start_blk = 0;
+
+    memblks[0].start = map[0].addr;
+
+    for (i = 0; i < b_info->vnodes; i++) {
+
+        memblks[i].start += next_start_blk;
+        memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
+
+        if (memblks[i].end > end_max) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+                    "Shrunk vNUMA memory block %d address to max e820 address: \
+                    %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
+            memblks[i].end = end_max;
+            break;
+        }
+
+        size = memblks[i].end - memblks[i].start;
+        /*
+         * For pv host with e820_host option turned on we need
+         * to take into account memory holes. For pv host with
+         * e820_host disabled or unset, the map is a contiguous
+         * RAM region.
+         */
+        if (libxl_defbool_val(b_info->u.pv.e820_host)) {
+            while((memblks[i].end - memblks[i].start -
+                   e820_memory_hole_size(memblks[i].start,
+                   memblks[i].end, map, nr)) < size ) {
+
+                memblks[i].end += MIN_VNODE_SIZE << 10;
+                if (memblks[i].end > end_max) {
+                    memblks[i].end = end_max;
+                    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+                            "Shrunk vNUMA memory block %d address to max e820 \
+                            address: %#010lx -> %#010lx\n", i, memblks[i].end,
+                            end_max);
+                    break;
+                }
+            }
+        }
+        next_start_blk = memblks[i].end;
+        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start  = %#010lx, \
+                    end = %#010lx\n", i, memblks[i].start, memblks[i].end);
+    }
+
+    /* Did not form memory addresses for every node? */
+    if (i != b_info->vnodes)  {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with \
+                block addresses, only %d out of %d", i, b_info->vnodes);
+        return -EINVAL;
+    }
+
+    for (i = 0; i < b_info->vnodes; i++) {
+        memblks[i].nid = i;
+        memblks[i].flags = 0;
+    }
+
+    return 0;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-basic-offset: 4
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 7589060..46e84e4 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -1,5 +1,6 @@
 #include "libxl_internal.h"
 #include "libxl_arch.h"
+#include "libxl_vnuma.h"
 
 static const char *e820_names(int type)
 {
@@ -14,7 +15,7 @@ static const char *e820_names(int type)
     return "Unknown";
 }
 
-static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
                          uint32_t *nr_entries,
                          unsigned long map_limitkb,
                          unsigned long balloon_kb)
-- 
1.7.10.4

  parent reply	other threads:[~2014-09-03  4:24 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-03  4:24 [PATCH v10 3/9] vnuma hook to debug-keys u Elena Ufimtseva
2014-09-03  4:24 ` [PATCH v10 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
2014-09-03 14:53   ` Ian Campbell
2014-09-03  4:24 ` [PATCH v10 5/9] libxl: vnuma types declararion Elena Ufimtseva
2014-09-03 15:03   ` Ian Campbell
2014-09-03 16:04   ` Ian Campbell
2014-09-04  3:48     ` Elena Ufimtseva
     [not found]       ` <1409826927.15057.22.camel@kazak.uk.xensource.com>
2014-09-08 16:13         ` Dario Faggioli
2014-09-08 19:47           ` Elena Ufimtseva
2014-09-16  6:17         ` Elena Ufimtseva
2014-09-16  7:16           ` Dario Faggioli
2014-09-16 12:57             ` Elena Ufimtseva
2014-09-03  4:24 ` Elena Ufimtseva [this message]
2014-09-03 15:21   ` [PATCH v10 6/9] libxl: build numa nodes memory blocks Ian Campbell
2014-09-04  4:47     ` Elena Ufimtseva
2014-09-05  3:50     ` Elena Ufimtseva
2014-09-12 10:18   ` Dario Faggioli
2014-09-03  4:24 ` [PATCH v10 7/9] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
2014-09-03 15:26   ` Ian Campbell
2014-09-12 11:06   ` Dario Faggioli
2014-09-03  4:24 ` [PATCH v10 8/9] libxl: vnuma nodes placement bits Elena Ufimtseva
2014-09-03 15:50   ` Konrad Rzeszutek Wilk
2014-09-03 15:52   ` Ian Campbell
2014-09-12 16:51   ` Dario Faggioli
2014-09-03  4:24 ` [PATCH v10 9/9] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
2014-09-03 15:42   ` Konrad Rzeszutek Wilk
2014-09-11 17:13   ` Dario Faggioli
2014-09-12  2:04     ` Elena Ufimtseva
2014-09-12  9:02       ` Dario Faggioli
2014-09-12  9:31         ` Wei Liu
2014-09-12  9:59           ` Dario Faggioli
2014-09-03 15:37 ` [PATCH v10 3/9] vnuma hook to debug-keys u Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409718258-3276-4-git-send-email-ufimtseva@gmail.com \
    --to=ufimtseva@gmail.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.