All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v8 3/9] vnuma hook to debug-keys u
@ 2014-08-26  7:45 Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
                   ` (7 more replies)
  0 siblings, 8 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

Add debug-keys hook to display vnuma topology.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 xen/arch/x86/numa.c |   35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index b141877..9efceac 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(node_data);
 static void dump_numa(unsigned char key)
 {
 	s_time_t now = NOW();
-	int i;
+	int i, j, n, err;
 	struct domain *d;
 	struct page_info *page;
 	unsigned int page_num_node[MAX_NUMNODES];
@@ -389,6 +389,39 @@ static void dump_numa(unsigned char key)
 
 		for_each_online_node(i)
 			printk("    Node %u: %u\n", i, page_num_node[i]);
+
+		if ( d->vnuma )
+		{
+			printk("    Domain has %u vnodes, %u vcpus\n",
+				   d->vnuma->nr_vnodes, d->max_vcpus);
+			for ( i = 0; i < d->vnuma->nr_vnodes; i++ )
+			{
+				err = snprintf(keyhandler_scratch, 12, "%u",
+						       d->vnuma->vnode_to_pnode[i]);
+				if ( err < 0 || d->vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
+					snprintf(keyhandler_scratch, 3, "???");
+				printk("        vnode %3u - pnode %s,", i, keyhandler_scratch);
+
+				printk(" %"PRIu64" MB, ",
+					   (d->vnuma->vmemrange[i].end - d->vnuma->vmemrange[i].start) >> 20);
+
+				printk("vcpu nrs: ");
+				for ( j = 0, n = 0; j < d->max_vcpus; j++ )
+				{
+					if ( d->vnuma->vcpu_to_vnode[j] == i )
+					{
+						if ( ((n + 1) % 8) == 0 )
+							printk("%d\n", j);
+						else if ( !(n % 8) && n != 0 )
+							printk("%s%d ", "             ", j);
+						else
+							printk("%d ", j);
+						n++;
+					}
+				}
+				printk("\n");
+			}
+		}
 	}
 
 	rcu_read_unlock(&domlist_read_lock);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-27 20:31   ` Konrad Rzeszutek Wilk
  2014-08-26  7:45 ` [PATCH v8 5/9] libxl: vnuma types declararion Elena Ufimtseva
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

With the introduction of the XEN_DOMCTL_setvnumainfo
in patch titled: "xen: vnuma topology and subop hypercalls"
we put in the plumbing here to use from the toolstack. The user
is allowed to call this multiple times if they wish so.
It will error out if the nr_vnodes or nr_vcpus is zero.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxc/xc_domain.c |   65 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/libxc/xenctrl.h   |   10 ++++++++
 2 files changed, 75 insertions(+)

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index c67ac9a..1708766 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2124,6 +2124,71 @@ int xc_domain_set_max_evtchn(xc_interface *xch, uint32_t domid,
     return do_domctl(xch, &domctl);
 }
 
+/* Plumbing Xen with vNUMA topology */
+int xc_domain_setvnuma(xc_interface *xch,
+                       uint32_t domid,
+                       uint32_t nr_vnodes,
+                       uint32_t nr_regions,
+                       uint32_t nr_vcpus,
+                       vmemrange_t *vmemrange,
+                       unsigned int *vdistance,
+                       unsigned int *vcpu_to_vnode,
+                       unsigned int *vnode_to_pnode)
+{
+    int rc;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(vmemrange, sizeof(*vmemrange) * nr_regions,
+                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    DECLARE_HYPERCALL_BOUNCE(vdistance, sizeof(*vdistance) *
+                             nr_vnodes * nr_vnodes,
+                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    DECLARE_HYPERCALL_BOUNCE(vcpu_to_vnode, sizeof(*vcpu_to_vnode) * nr_vcpus,
+                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    DECLARE_HYPERCALL_BOUNCE(vnode_to_pnode, sizeof(*vnode_to_pnode) *
+                             nr_vnodes,
+                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    errno = EINVAL;
+
+    if ( nr_vnodes == 0 || nr_regions == 0 || nr_regions < nr_vnodes )
+        return -1;
+
+    if ( !vdistance || !vcpu_to_vnode || !vmemrange || !vnode_to_pnode )
+    {
+        PERROR("%s: Cant set vnuma without initializing topology", __func__);
+        return -1;
+    }
+
+    if ( xc_hypercall_bounce_pre(xch, vmemrange)      ||
+         xc_hypercall_bounce_pre(xch, vdistance)      ||
+         xc_hypercall_bounce_pre(xch, vcpu_to_vnode)  ||
+         xc_hypercall_bounce_pre(xch, vnode_to_pnode) )
+    {
+        rc = -1;
+        goto vnumaset_fail;
+
+    }
+
+    set_xen_guest_handle(domctl.u.vnuma.vmemrange, vmemrange);
+    set_xen_guest_handle(domctl.u.vnuma.vdistance, vdistance);
+    set_xen_guest_handle(domctl.u.vnuma.vcpu_to_vnode, vcpu_to_vnode);
+    set_xen_guest_handle(domctl.u.vnuma.vnode_to_pnode, vnode_to_pnode);
+
+    domctl.cmd = XEN_DOMCTL_setvnumainfo;
+    domctl.domain = (domid_t)domid;
+    domctl.u.vnuma.nr_vnodes = nr_vnodes;
+    domctl.u.vnuma.nr_regions = nr_regions;
+
+    rc = do_domctl(xch, &domctl);
+
+ vnumaset_fail:
+    xc_hypercall_bounce_post(xch, vmemrange);
+    xc_hypercall_bounce_post(xch, vdistance);
+    xc_hypercall_bounce_post(xch, vcpu_to_vnode);
+    xc_hypercall_bounce_post(xch, vnode_to_pnode);
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 1c5d0db..1c8aa42 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1245,6 +1245,16 @@ int xc_domain_set_memmap_limit(xc_interface *xch,
                                uint32_t domid,
                                unsigned long map_limitkb);
 
+int xc_domain_setvnuma(xc_interface *xch,
+                        uint32_t domid,
+                        uint32_t nr_vnodes,
+                        uint32_t nr_regions,
+                        uint32_t nr_vcpus,
+                        vmemrange_t *vmemrange,
+                        unsigned int *vdistance,
+                        unsigned int *vcpu_to_vnode,
+                        unsigned int *vnode_to_pnode);
+
 #if defined(__i386__) || defined(__x86_64__)
 /*
  * PC BIOS standard E820 types and structure.
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 5/9] libxl: vnuma types declararion
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 6/9] libxl: build numa nodes memory blocks Elena Ufimtseva
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

Adds vnuma topology types declarations to libxl_domain_build_info
structure.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_types.idl |    8 +++++++-
 tools/libxl/libxl_vnuma.h   |   16 ++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 tools/libxl/libxl_vnuma.h

diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 0b3496f..ed48b80 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -333,7 +333,13 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("disable_migrate", libxl_defbool),
     ("cpuid",           libxl_cpuid_policy_list),
     ("blkdev_start",    string),
-    
+    ("vnodes",          uint32),
+    ("vregions",        uint32),
+    ("vnuma_mem",       Array(uint64, "num_vnuma_mem")),
+    ("vnuma_vcpumap",   Array(uint32, "num_vnuma_vcpumap")),
+    ("vdistance",       Array(uint32, "num_vdistance")),
+    ("vnuma_vnodemap",  Array(uint32, "num_vnuma_vnondemap")),
+    ("vnuma_autoplacement",  libxl_defbool),
     ("device_model_version", libxl_device_model_version),
     ("device_model_stubdomain", libxl_defbool),
     # if you set device_model you must set device_model_version too
diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h
new file mode 100644
index 0000000..2ec46db
--- /dev/null
+++ b/tools/libxl/libxl_vnuma.h
@@ -0,0 +1,16 @@
+#ifndef LIBXL_VNUMA_H
+#define LIBXL_VNUMA_H
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#define VNUMA_NO_NODE ~((unsigned int)0)
+
+/*
+ * Min vNUMA node size from Linux for x86 architecture.
+ * See linux source code arch/x86/include/asm/numa.h
+ */
+#define MIN_VNODE_SIZE  (4*1024*1024)
+
+#define MAX_VNUMA_NODES ((unsigned int)1 << 10)
+
+#endif
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 6/9] libxl: build numa nodes memory blocks
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 5/9] libxl: vnuma types declararion Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 7/9] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

Create the vmemrange structure based on the
PV guests E820 map. Values are in in Megabytes.
Also export the E820 filter code e820_sanitize
out to be available internally.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_internal.h |    9 ++
 tools/libxl/libxl_numa.c     |  201 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_x86.c      |    3 +-
 3 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index beb052e..63ccb5e 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
     libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
 }
 
+bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
+
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t *nr_entries,
+                  unsigned long map_limitkb, unsigned long balloon_kb);
+
+int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid,
+                           struct libxl_domain_build_info *b_info,
+                           vmemrange_t *memblks);
+
 _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
                                    const libxl_ms_vm_genid *id);
 
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 94ca4fe..c416faf 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -19,6 +19,10 @@
 
 #include "libxl_internal.h"
 
+#include "libxl_vnuma.h"
+
+#include "xc_private.h"
+
 /*
  * What follows are helpers for generating all the k-combinations
  * without repetitions of a set S with n elements in it. Formally
@@ -508,6 +512,203 @@ int libxl__get_numa_candidate(libxl__gc *gc,
 }
 
 /*
+ * Check if we can fit vnuma nodes to numa pnodes
+ * from vnode_to_pnode array.
+ */
+bool libxl__vnodemap_is_usable(libxl__gc *gc,
+                            libxl_domain_build_info *info)
+{
+    unsigned int i;
+    libxl_numainfo *ninfo = NULL;
+    unsigned long long *claim;
+    unsigned int node;
+    uint64_t *sz_array;
+    int nr_nodes = 0;
+
+    /* Cannot use specified mapping if not NUMA machine. */
+    ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+    if (ninfo == NULL)
+        return false;
+
+    sz_array = info->vnuma_mem;
+    claim = libxl__calloc(gc, info->vnodes, sizeof(*claim));
+    /* Get total memory required on each physical node. */
+    for (i = 0; i < info->vnodes; i++)
+    {
+        node = info->vnuma_vnodemap[i];
+
+        if (node < nr_nodes)
+            claim[node] += (sz_array[i] << 20);
+        else
+            goto vnodemapout;
+   }
+   for (i = 0; i < nr_nodes; i++) {
+       if (claim[i] > ninfo[i].free)
+          /* Cannot complete user request, falling to default. */
+          goto vnodemapout;
+   }
+
+ vnodemapout:
+   return true;
+}
+
+/*
+ * Returns number of absent pages within e820 map
+ * between start and end addresses passed. Needed
+ * to correctly set numa memory ranges for domain.
+ */
+static unsigned long e820_memory_hole_size(unsigned long start,
+                                            unsigned long end,
+                                            struct e820entry e820[],
+                                            unsigned int nr)
+{
+    unsigned int i;
+    unsigned long absent, start_blk, end_blk;
+
+    /* init absent number of pages with all memmap size. */
+    absent = end - start;
+    for (i = 0; i < nr; i++) {
+        /* if not E820_RAM region, skip it. */
+        if (e820[i].type != E820_RAM)
+            continue;
+
+        start_blk = e820[i].addr;
+        end_blk = e820[i].addr + e820[i].size;
+        /* beginning address is within this region? */
+        if (start >= start_blk && start <= end_blk) {
+            if (end > end_blk)
+                absent -= end_blk - start;
+            else
+                /* fit the region? then no absent pages. */
+                absent -= end - start;
+            continue;
+        }
+        /* found the end of range in this region? */
+        if (end <= end_blk && end >= start_blk) {
+            absent -= end - start_blk;
+            /* no need to look for more ranges. */
+            break;
+        }
+    }
+    return absent;
+}
+
+/*
+ * For each node, build memory block start and end addresses.
+ * Substract any memory hole from the range found in e820 map.
+ * vnode memory size are passed here in megabytes, the result is
+ * in memory block addresses.
+ * Linux kernel will adjust numa memory block sizes on its own.
+ * But we want to provide to the kernel numa block addresses that
+ * will be the same in kernel and hypervisor.
+ */
+int libxl__vnuma_align_mem(libxl__gc *gc,
+                            uint32_t domid,
+                            /* IN: mem sizes in megabytes */
+                            libxl_domain_build_info *b_info,
+                            /* OUT: linux NUMA blocks addresses */
+                            vmemrange_t *memblks)
+{
+    unsigned int i;
+    int j, rc;
+    uint64_t next_start_blk, end_max = 0, size;
+    uint32_t nr;
+    struct e820entry map[E820MAX];
+
+    errno = ERROR_INVAL;
+    if (b_info->vnodes == 0)
+        return -EINVAL;
+
+    if (!memblks || !b_info->vnuma_mem)
+        return -EINVAL;
+
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+
+    /* Retrieve e820 map for this host. */
+    rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+
+    if (rc < 0) {
+        errno = rc;
+        return -EINVAL;
+    }
+    nr = rc;
+    rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
+                       (b_info->max_memkb - b_info->target_memkb) +
+                       b_info->u.pv.slack_memkb);
+    if (rc) {
+        errno = rc;
+        return -EINVAL;
+    }
+
+    /* find max memory address for this host. */
+    for (j = 0; j < nr; j++) {
+        if (map[j].type == E820_RAM) {
+            end_max = max(end_max, map[j].addr + map[j].size);
+        }
+    }
+
+    memset(memblks, 0, sizeof(*memblks) * b_info->vnodes);
+    next_start_blk = 0;
+
+    memblks[0].start = map[0].addr;
+
+    for (i = 0; i < b_info->vnodes; i++) {
+
+        memblks[i].start += next_start_blk;
+        memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
+
+        if (memblks[i].end > end_max) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+                    "Shrunk vNUMA memory block %d address to max e820 address: \
+                    %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
+            memblks[i].end = end_max;
+            break;
+        }
+
+        size = memblks[i].end - memblks[i].start;
+        /*
+         * For pv host with e820_host option turned on we need
+         * to take into account memory holes. For pv host with
+         * e820_host disabled or unset, the map is a contiguous
+         * RAM region.
+         */
+        if (libxl_defbool_val(b_info->u.pv.e820_host)) {
+            while((memblks[i].end - memblks[i].start -
+                   e820_memory_hole_size(memblks[i].start,
+                   memblks[i].end, map, nr)) < size ) {
+
+                memblks[i].end += MIN_VNODE_SIZE << 10;
+                if (memblks[i].end > end_max) {
+                    memblks[i].end = end_max;
+                    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+                            "Shrunk vNUMA memory block %d address to max e820 \
+                            address: %#010lx -> %#010lx\n", i, memblks[i].end,
+                            end_max);
+                    break;
+                }
+            }
+        }
+        next_start_blk = memblks[i].end;
+        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start  = %#010lx, \
+                    end = %#010lx\n", i, memblks[i].start, memblks[i].end);
+    }
+
+    /* Did not form memory addresses for every node? */
+    if (i != b_info->vnodes)  {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with \
+                block addresses, only %d out of %d", i, b_info->vnodes);
+        return -EINVAL;
+    }
+
+    for (i = 0; i < b_info->vnodes; i++) {
+        memblks[i].nid = i;
+        memblks[i].flags = 0;
+    }
+
+    return 0;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-basic-offset: 4
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 7589060..46e84e4 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -1,5 +1,6 @@
 #include "libxl_internal.h"
 #include "libxl_arch.h"
+#include "libxl_vnuma.h"
 
 static const char *e820_names(int type)
 {
@@ -14,7 +15,7 @@ static const char *e820_names(int type)
     return "Unknown";
 }
 
-static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
                          uint32_t *nr_entries,
                          unsigned long map_limitkb,
                          unsigned long balloon_kb)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 7/9] libxc: allocate domain memory for vnuma enabled
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
                   ` (2 preceding siblings ...)
  2014-08-26  7:45 ` [PATCH v8 6/9] libxl: build numa nodes memory blocks Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 8/9] libxl: vnuma nodes placement bits Elena Ufimtseva
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

vNUMA-aware domain memory allocation based on provided
vnode to pnode map. If this map is not defined, use
default allocation. Default allocation will not specify
any physical node when allocating memory.
Domain creation will fail if at least one node was not defined.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxc/xc_dom.h     |   13 ++++++++
 tools/libxc/xc_dom_x86.c |   76 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h
index 6ae6a9f..61c2a06 100644
--- a/tools/libxc/xc_dom.h
+++ b/tools/libxc/xc_dom.h
@@ -164,6 +164,16 @@ struct xc_dom_image {
 
     /* kernel loader */
     struct xc_dom_arch *arch_hooks;
+
+   /*
+    * vNUMA topology and memory allocation structure.
+    * Defines the way to allocate memory on per NUMA
+    * physical defined by vnode_to_pnode.
+    */
+    uint32_t vnodes;
+    uint64_t *numa_memszs;
+    unsigned int *vnode_to_pnode;
+
     /* allocate up to virt_alloc_end */
     int (*allocate) (struct xc_dom_image * dom, xen_vaddr_t up_to);
 };
@@ -385,6 +395,9 @@ static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom,
 int arch_setup_meminit(struct xc_dom_image *dom);
 int arch_setup_bootearly(struct xc_dom_image *dom);
 int arch_setup_bootlate(struct xc_dom_image *dom);
+int arch_boot_alloc(struct xc_dom_image *dom);
+
+#define LIBXC_VNUMA_NO_NODE ~((unsigned int)0)
 
 /*
  * Local variables:
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index bf06fe4..f2b4c98 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -759,7 +759,7 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
 int arch_setup_meminit(struct xc_dom_image *dom)
 {
     int rc;
-    xen_pfn_t pfn, allocsz, i, j, mfn;
+    xen_pfn_t pfn, i, j, mfn;
 
     rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
     if ( rc )
@@ -811,25 +811,77 @@ int arch_setup_meminit(struct xc_dom_image *dom)
         /* setup initial p2m */
         for ( pfn = 0; pfn < dom->total_pages; pfn++ )
             dom->p2m_host[pfn] = pfn;
+
+        /*
+         * Any PV domain should have at least one vNUMA node.
+         * If no config was defined, one default vNUMA node
+         * will be set.
+         */
+        if ( dom->vnodes == 0 ) {
+            xc_dom_printf(dom->xch,
+                         "%s: Cannot construct vNUMA topology with 0 vnodes\n",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
         
         /* allocate guest memory */
-        for ( i = rc = allocsz = 0;
-              (i < dom->total_pages) && !rc;
-              i += allocsz )
-        {
-            allocsz = dom->total_pages - i;
-            if ( allocsz > 1024*1024 )
-                allocsz = 1024*1024;
-            rc = xc_domain_populate_physmap_exact(
-                dom->xch, dom->guest_domid, allocsz,
-                0, 0, &dom->p2m_host[i]);
-        }
+        rc = arch_boot_alloc(dom);
+        if ( rc )
+            return rc;
 
         /* Ensure no unclaimed pages are left unused.
          * OK to call if hadn't done the earlier claim call. */
         (void)xc_domain_claim_pages(dom->xch, dom->guest_domid,
                                     0 /* cancels the claim */);
     }
+    return rc;
+}
+
+/*
+ * Allocates domain memory taking into account
+ * defined vnuma topology and vnode_to_pnode map.
+ * Any pv guest will have at least one vnuma node
+ * with vnuma_memszs[0] = domain memory and the rest
+ * topology initialized with default values.
+ */
+int arch_boot_alloc(struct xc_dom_image *dom)
+{
+    int rc;
+    unsigned int n, memflags;
+    unsigned long long vnode_pages;
+    unsigned long long allocsz = 0, node_pfn_base, i;
+
+    rc = allocsz = node_pfn_base = n = 0;
+
+    for ( n = 0; n < dom->vnodes; n++ )
+    {
+        memflags = 0;
+        if ( dom->vnode_to_pnode[n] != LIBXC_VNUMA_NO_NODE )
+        {
+            memflags |= XENMEMF_exact_node(dom->vnode_to_pnode[n]);
+            memflags |= XENMEMF_exact_node_request;
+        }
+        /* memeszs are in megabytes, calc pages from it for this node. */
+        vnode_pages = (dom->numa_memszs[n] << 20) >> PAGE_SHIFT_X86;
+        for ( i = 0; i < vnode_pages; i += allocsz )
+        {
+            allocsz = vnode_pages - i;
+            if ( allocsz > 1024*1024 )
+                allocsz = 1024*1024;
+
+            rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
+                                            allocsz, 0, memflags,
+                                            &dom->p2m_host[node_pfn_base + i]);
+            if ( rc )
+            {
+                xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                        "%s: Failed allocation of %Lu pages for vnode %d on pnode %d out of %lu\n",
+                        __FUNCTION__, vnode_pages, n, dom->vnode_to_pnode[n], dom->total_pages);
+                return rc;
+            }
+        }
+        node_pfn_base += i;
+    }
 
     return rc;
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 8/9] libxl: vnuma nodes placement bits
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
                   ` (3 preceding siblings ...)
  2014-08-26  7:45 ` [PATCH v8 7/9] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-26  7:45 ` [PATCH v8 9/9] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

Automatic numa placement cancels manual vnode placement
mechanism. If numa placement explicitly specified, try
to fit vnodes to the physical nodes.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_create.c |    1 +
 tools/libxl/libxl_dom.c    |  204 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+)

diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 9b66294..19023cf 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -203,6 +203,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
     }
 
     libxl_defbool_setdefault(&b_info->numa_placement, true);
+    libxl_defbool_setdefault(&b_info->vnuma_autoplacement, true);
 
     if (b_info->max_memkb == LIBXL_MEMKB_DEFAULT)
         b_info->max_memkb = 32 * 1024;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c944804..5b059c2 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -23,6 +23,7 @@
 #include <xc_dom.h>
 #include <xen/hvm/hvm_info_table.h>
 #include <xen/hvm/hvm_xs_strings.h>
+#include <libxl_vnuma.h>
 
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
@@ -227,12 +228,114 @@ static void hvm_set_conf_params(xc_interface *handle, uint32_t domid,
                     libxl_defbool_val(info->u.hvm.nested_hvm));
 }
 
+/* sets vnode_to_pnode map. */
+static int libxl__init_vnode_to_pnode(libxl__gc *gc, uint32_t domid,
+                        libxl_domain_build_info *info)
+{
+    unsigned int i, n;
+    int nr_nodes = 0;
+    uint64_t *vnodes_mem;
+    unsigned long long *nodes_claim = NULL;
+    libxl_numainfo *ninfo = NULL;
+
+    if (info->vnuma_vnodemap == NULL) {
+        info->vnuma_vnodemap = libxl__calloc(gc, info->vnodes,
+                                      sizeof(*info->vnuma_vnodemap));
+    }
+
+    /* default setting. */
+    for (i = 0; i < info->vnodes; i++)
+        info->vnuma_vnodemap[i] = LIBXC_VNUMA_NO_NODE;
+
+    /* Get NUMA info. */
+    ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+    if (ninfo == NULL)
+        return ERROR_FAIL;
+    /* Nothing to see if only one NUMA node. */
+    if (nr_nodes <= 1)
+        return 0;
+
+    vnodes_mem = info->vnuma_mem;
+    /*
+     * TODO: change algorithm. The current just fits the nodes
+     * by its memory sizes. If no p-node found, will be used default
+     * value of LIBXC_VNUMA_NO_NODE.
+     */
+    nodes_claim = libxl__calloc(gc, info->vnodes, sizeof(*nodes_claim));
+    if ( !nodes_claim )
+        return ERROR_FAIL;
+
+    libxl_for_each_set_bit(n, info->nodemap)
+    {
+        for (i = 0; i < info->vnodes; i++)
+        {
+            unsigned long mem_sz = vnodes_mem[i] << 20;
+            if ((nodes_claim[n] + mem_sz <= ninfo[n].free) &&
+                 /* vnode was not set yet. */
+                 (info->vnuma_vnodemap[i] == LIBXC_VNUMA_NO_NODE ) )
+            {
+                info->vnuma_vnodemap[i] = n;
+                nodes_claim[n] += mem_sz;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Builds vnode memory regions from configuration info
+ * for vnuma nodes with multiple regions.
+ */
+static int libxl__build_vnuma_ranges(libxl__gc *gc,
+                              uint32_t domid,
+                              /* IN: mem sizes in megabytes */
+                              libxl_domain_build_info *b_info,
+                              /* OUT: linux NUMA blocks addresses */
+                              vmemrange_t **memrange)
+{
+    /*
+     * For non-PV domains, contruction of the regions will
+     * need to have its own implementation.
+     */
+    if (b_info->type != LIBXL_DOMAIN_TYPE_PV) {
+        LOG(DETAIL, "vNUMA is only supported for PV guests now.\n");
+        errno = EINVAL;
+        return -1;
+    }
+
+    if (b_info->vnodes == 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    b_info->vregions = b_info->vnodes;
+
+    *memrange = libxl__calloc(gc, b_info->vnodes,
+                              sizeof(vmemrange_t));
+
+    /*
+     * For PV domain along with alignment, regions nid will
+     * be set to corresponding vnuma node number and ignored
+     * later during allocation.
+     */
+
+    if (libxl__vnuma_align_mem(gc, domid, b_info, *memrange) < 0) {
+        LOG(DETAIL, "Failed to align memory map.\n");
+        errno = EFAIL;
+        return ERROR_FAIL;
+    }
+
+    return 0;
+}
+
 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
               libxl_domain_config *d_config, libxl__domain_build_state *state)
 {
     libxl_domain_build_info *const info = &d_config->b_info;
     libxl_ctx *ctx = libxl__gc_owner(gc);
     char *xs_domid, *con_domid;
+    struct vmemrange *memrange;
     int rc;
 
     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
@@ -240,6 +343,20 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         return ERROR_FAIL;
     }
 
+    if (libxl__build_vnuma_ranges(gc, domid, info, &memrange) != 0) {
+        LOG(DETAIL, "Failed to build vnuma nodes memory ranges.\n");
+        return ERROR_FAIL;
+
+    }
+
+    /*
+     * NUMA placement and vNUMA autoplacement handling:
+     * If numa_placement is set to default, do not use vnode to pnode
+     * mapping as automatic placement algorithm will find best numa nodes.
+     * If numa_placement is not used, we can try and use domain vnode
+     * to pnode mask.
+     */
+
     /*
      * Check if the domain has any CPU or node affinity already. If not, try
      * to build up the latter via automatic NUMA placement. In fact, in case
@@ -298,7 +415,33 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
                                    NULL, &cpumap_soft);
 
         libxl_bitmap_dispose(&cpumap_soft);
+
+        /*
+         * If vnode_to_pnode mask was defined, dont use it if we automatically
+         * place domain on NUMA nodes, just give warning.
+         */
+        if (!libxl_defbool_val(info->vnuma_autoplacement)) {
+            LOG(INFO, "Automatic NUMA placement for domain is turned on. \
+                vnode to physical nodes mapping will not be used.");
+        }
+        if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) {
+            LOG(ERROR, "Failed to build vnode to pnode map\n");
+            return ERROR_FAIL;
+        }
+    } else {
+        if (!libxl_defbool_val(info->vnuma_autoplacement)) {
+                if (!libxl__vnodemap_is_usable(gc, info)) {
+                    LOG(ERROR, "Defined vnode to pnode domain map cannot be used.\n");
+                    return ERROR_FAIL;
+                }
+        } else {
+            if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) {
+                LOG(ERROR, "Failed to build vnode to pnode map.\n");
+                return ERROR_FAIL;
+            }
+        }
     }
+
     if (info->nodemap.size)
         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
     /* As mentioned in libxl.h, vcpu_hard_array takes precedence */
@@ -339,6 +482,22 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         return ERROR_FAIL;
     }
 
+    /*
+     * XEN_DOMCTL_setvnuma subop hypercall needs to know max mem
+     * for domain set by xc_domain_setmaxmem. So set vNUMA after
+     * maxmem is being set.
+     * memrange should contain regions if multi-region nodes are
+     * suppoted. For PV domain regions are ignored.
+     */
+    if (xc_domain_setvnuma(ctx->xch, domid, info->vnodes,
+        info->vregions,
+        info->max_vcpus, memrange,
+        info->vdistance, info->vnuma_vcpumap,
+        info->vnuma_vnodemap) < 0) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't set vnuma topology");
+        return ERROR_FAIL;
+    }
+
     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
     free(xs_domid);
@@ -434,6 +593,46 @@ retry_transaction:
     return 0;
 }
 
+/*
+ * Function fills the xc_dom_image with memory sizes for later
+ * use in domain memory allocator. No regions here are being
+ * filled and used in allocator as the regions do belong to one node.
+ */
+static int libxl__dom_vnuma_init(struct libxl_domain_build_info *info,
+                                 struct xc_dom_image *dom)
+{
+    errno = ERROR_INVAL;
+
+    if (info->vnodes == 0)
+        return -1;
+
+    info->vregions = info->vnodes;
+
+    dom->vnode_to_pnode = (unsigned int *)malloc(
+                            info->vnodes * sizeof(*info->vnuma_vnodemap));
+    dom->numa_memszs = (uint64_t *)malloc(
+                          info->vnodes * sizeof(*info->vnuma_mem));
+
+    errno = ERROR_FAIL;
+    if ( dom->numa_memszs == NULL || dom->vnode_to_pnode == NULL ) {
+        info->vnodes = 0;
+        if (dom->vnode_to_pnode)
+            free(dom->vnode_to_pnode);
+        if (dom->numa_memszs)
+            free(dom->numa_memszs);
+        return -1;
+    }
+
+    memcpy(dom->numa_memszs, info->vnuma_mem,
+            sizeof(*info->vnuma_mem) * info->vnodes);
+    memcpy(dom->vnode_to_pnode, info->vnuma_vnodemap,
+            sizeof(*info->vnuma_vnodemap) * info->vnodes);
+
+    dom->vnodes = info->vnodes;
+
+    return 0;
+}
+
 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
              libxl_domain_build_info *info, libxl__domain_build_state *state)
 {
@@ -491,6 +690,11 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
     dom->xenstore_domid = state->store_domid;
     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
 
+    if ( (ret = libxl__dom_vnuma_init(info, dom)) != 0 ) {
+        LOGE(ERROR, "Failed to set doman vnuma");
+        goto out;
+    }
+
     if ( (ret = xc_dom_boot_xen_init(dom, ctx->xch, domid)) != 0 ) {
         LOGE(ERROR, "xc_dom_boot_xen_init failed");
         goto out;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v8 9/9] libxl: vnuma topology configuration parser and doc
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
                   ` (4 preceding siblings ...)
  2014-08-26  7:45 ` [PATCH v8 8/9] libxl: vnuma nodes placement bits Elena Ufimtseva
@ 2014-08-26  7:45 ` Elena Ufimtseva
  2014-08-26 15:29 ` [PATCH v8 3/9] vnuma hook to debug-keys u Jan Beulich
  2014-08-27 20:26 ` Konrad Rzeszutek Wilk
  7 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26  7:45 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, JBeulich,
	Elena Ufimtseva

Parses vnuma topoplogy number of nodes and memory
ranges. If not defined, initializes vnuma with
only one node and default topology. This one node covers
all domain memory and all vcpus assigned to it.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 docs/man/xl.cfg.pod.5    |   77 ++++++++
 tools/libxl/xl_cmdimpl.c |  434 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 511 insertions(+)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 1e04eed..073664e 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -264,6 +264,83 @@ if the values of B<memory=> and B<maxmem=> differ.
 A "pre-ballooned" HVM guest needs a balloon driver, without a balloon driver
 it will crash.
 
+=item B<vnuma_nodes=N>
+
+Number of vNUMA nodes the guest will be initialized with on boot.
+PV guest by default will have one vnuma node.
+
+=item B<vnuma_mem=[vmem1, vmem2, ...]>
+
+List of memory sizes for each node, defined in MBytes. Number of items listed must
+match nr_vnodes. If the sum of all vnode memories does not match the domain memory
+or there are missing nodes, it will fail.
+If not specified, memory will be equally split between vnodes. Current minimum
+memory size for one node is limited by 32MB.
+
+Example: vnuma_mem=[1024, 1024, 2048, 2048]
+Total amount of memory in guest: 6GB
+
+=item B<vdistance=[d1, d2]>
+
+Defines the distance table for vNUMA nodes. NUMA topology distances are
+represented by two dimensional square matrix. One element of it [i,j] is
+a distance between nodes i and j. Trivial case is where all diagonal elements
+are equal and matrix is symmetrical. vdistance configuration option allows
+to define two values d1 and d2. d1 will be used for all diagonal elements of
+distance matrix. All other values will be equal to d2 value. Usually distances
+are multiple of 10 in Linux and same rule used here.
+If not specified, the default constants values will be used for distance,
+e.g. [10, 20]. For one node default distance is [10];
+
+Examples:
+vnodes = 3
+vdistance=[10, 20]
+will create this distance table (this is default setting as well):
+[10, 20, 20]
+[20, 10, 20]
+[20, 20, 10]
+
+=item B<vnuma_vcpumap=[node_nr, node_nr, ...]>
+
+Defines vcpu to vnode mapping as a list of integers. The position in the list
+is a vcpu number, and the value is the vnode number to which the vcpu will be
+assigned to.
+Current limitations:
+- vNUMA node must have at least one vcpu, otherwise default vcpu_to_vnode will be used.
+- Total number of vnodes cannot be bigger then number of vcpus.
+
+Example:
+Map of 4 vcpus to 2 vnodes:
+0,1 vcpu -> vnode0
+2,3 vcpu -> vnode1:
+
+vnuma_vcpumap = [0, 0, 1, 1]
+ 4 vcpus here -  0  1  2  3
+
+=item B<vnuma_vnodemap=[p1, p2, ..., pn]>
+
+List of physical node numbers, position in the list represents vnode number.
+Used for manual placement of vnuma nodes to physical NUMA nodes.
+Will not be used if automatic numa placement is active.
+
+Example:
+assume NUMA machine with 4 physical nodes. Placing vnuma node 0 to pnode 2,
+vnuma node 1 to pnode 3:
+vnode0 -> pnode2
+vnode1 -> pnode3
+
+vnuma_vnodemap=[2, 3]
+first vnode will be placed on node 2, second on node 3.
+
+=item B<vnuma_autoplacement=[0|1]>
+
+If set to 1 and automatic NUMA placement is enabled, automatically will find the best
+physical node to place vnuma nodes on. vnuma_vnodemap will be ignored. Automatic NUMA
+placement is enabled if domain has no pinned cpus.
+If vnuma_autoplacement is set to 0, then the vnodes will be placed on NUMA nodes set
+in vnuma_vnodemap if there is enough memory on physical nodes. If not, then the allocation
+will be made on any of the available node and be placed on multiple physical NUMA nodes.
+
 =back
 
 =head3 Event Actions
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index f1c136a..d9299e1 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -40,6 +40,7 @@
 #include "libxl_json.h"
 #include "libxlutil.h"
 #include "xl.h"
+#include "libxl_vnuma.h"
 
 /* For calls which return an errno on failure */
 #define CHK_ERRNOVAL( call ) ({                                         \
@@ -766,6 +767,432 @@ static void parse_vcpu_affinity(libxl_domain_build_info *b_info,
     }
 }
 
+static unsigned int get_list_item_uint(XLU_ConfigList *list, unsigned int i)
+{
+    const char *buf;
+    char *ep;
+    unsigned long ul;
+    int rc = -EINVAL;
+
+    buf = xlu_cfg_get_listitem(list, i);
+    if (!buf)
+        return rc;
+    ul = strtoul(buf, &ep, 10);
+    if (ep == buf)
+        return rc;
+    if (ul >= UINT16_MAX)
+        return rc;
+    return (unsigned int)ul;
+}
+
+static void vdistance_set(unsigned int *vdistance,
+                                unsigned int nr_vnodes,
+                                unsigned int samenode,
+                                unsigned int othernode)
+{
+    unsigned int idx, slot;
+    for (idx = 0; idx < nr_vnodes; idx++)
+        for (slot = 0; slot < nr_vnodes; slot++)
+            *(vdistance + slot * nr_vnodes + idx) =
+                idx == slot ? samenode : othernode;
+}
+
+static void vcputovnode_default(unsigned int *cpu_to_node,
+                                unsigned int nr_vnodes,
+                                unsigned int max_vcpus)
+{
+    unsigned int cpu;
+    for (cpu = 0; cpu < max_vcpus; cpu++)
+        cpu_to_node[cpu] = cpu % nr_vnodes;
+}
+
+/* Split domain memory between vNUMA nodes equally. */
+static int split_vnumamem(libxl_domain_build_info *b_info)
+{
+    unsigned long long vnodemem = 0;
+    unsigned long n;
+    unsigned int i;
+
+    if (b_info->vnodes == 0)
+        return -1;
+
+    vnodemem = (b_info->max_memkb >> 10) / b_info->vnodes;
+    if (vnodemem < MIN_VNODE_SIZE)
+        return -1;
+    /* reminder in MBytes. */
+    n = (b_info->max_memkb >> 10) % b_info->vnodes;
+    /* get final sizes in MBytes. */
+    for (i = 0; i < (b_info->vnodes - 1); i++)
+        b_info->vnuma_mem[i] = vnodemem;
+    /* add the reminder to the last node. */
+    b_info->vnuma_mem[i] = vnodemem + n;
+    return 0;
+}
+
+static void vnuma_vnodemap_default(unsigned int *vnuma_vnodemap,
+                                   unsigned int nr_vnodes)
+{
+    unsigned int i;
+    for (i = 0; i < nr_vnodes; i++)
+        vnuma_vnodemap[i] = VNUMA_NO_NODE;
+}
+
+/*
+ * init vNUMA to "zero config" with one node and all other
+ * topology parameters set to default.
+ */
+static int vnuma_default_config(libxl_domain_build_info *b_info)
+{
+    b_info->vnodes = 1;
+    /* all memory goes to this one vnode, as well as vcpus. */
+    if (!(b_info->vnuma_mem = (uint64_t *)calloc(b_info->vnodes,
+                                sizeof(*b_info->vnuma_mem))))
+        goto bad_vnumazerocfg;
+
+    if (!(b_info->vnuma_vcpumap = (unsigned int *)calloc(b_info->max_vcpus,
+                                sizeof(*b_info->vnuma_vcpumap))))
+        goto bad_vnumazerocfg;
+
+    if (!(b_info->vdistance = (unsigned int *)calloc(b_info->vnodes *
+                                b_info->vnodes, sizeof(*b_info->vdistance))))
+        goto bad_vnumazerocfg;
+
+    if (!(b_info->vnuma_vnodemap = (unsigned int *)calloc(b_info->vnodes,
+                                sizeof(*b_info->vnuma_vnodemap))))
+        goto bad_vnumazerocfg;
+
+    b_info->vnuma_mem[0] = b_info->max_memkb >> 10;
+
+    /* all vcpus assigned to this vnode. */
+    vcputovnode_default(b_info->vnuma_vcpumap, b_info->vnodes,
+                        b_info->max_vcpus);
+
+    /* default vdistance is 10. */
+    vdistance_set(b_info->vdistance, b_info->vnodes, 10, 10);
+
+    /* VNUMA_NO_NODE for vnode_to_pnode. */
+    vnuma_vnodemap_default(b_info->vnuma_vnodemap, b_info->vnodes);
+
+    /*
+     * will be placed to some physical nodes defined by automatic
+     * numa placement or VNUMA_NO_NODE will not request exact node.
+     */
+    libxl_defbool_set(&b_info->vnuma_autoplacement, true);
+    return 0;
+
+ bad_vnumazerocfg:
+    return -1;
+}
+
+static void free_vnuma_info(libxl_domain_build_info *b_info)
+{
+    free(b_info->vnuma_mem);
+    free(b_info->vdistance);
+    free(b_info->vnuma_vcpumap);
+    free(b_info->vnuma_vnodemap);
+
+    b_info->vnuma_mem = NULL;
+    b_info->vdistance = NULL;
+    b_info->vnuma_vcpumap = NULL;
+    b_info->vnuma_vnodemap = NULL;
+
+    b_info->vnodes = 0;
+    b_info->vregions = 0;
+}
+
+static int parse_vnuma_mem(XLU_Config *config,
+                            libxl_domain_build_info **b_info)
+{
+    libxl_domain_build_info *dst;
+    XLU_ConfigList *vnumamemcfg;
+    int nr_vnuma_regions, i;
+    unsigned long long vnuma_memparsed = 0;
+    unsigned long ul;
+    const char *buf;
+    char *ep;
+
+    dst = *b_info;
+    if (!xlu_cfg_get_list(config, "vnuma_mem",
+                          &vnumamemcfg, &nr_vnuma_regions, 0)) {
+
+        if (nr_vnuma_regions != dst->vnodes) {
+            fprintf(stderr, "Number of numa regions (vnumamem = %d) is \
+                    incorrect (should be %d).\n", nr_vnuma_regions,
+                    dst->vnodes);
+            goto bad_vnuma_mem;
+        }
+
+        dst->vnuma_mem = calloc(dst->vnodes,
+                                 sizeof(*dst->vnuma_mem));
+        if (dst->vnuma_mem == NULL) {
+            fprintf(stderr, "Unable to allocate memory for vnuma ranges.\n");
+            goto bad_vnuma_mem;
+        }
+
+        /*
+         * Will parse only nr_vnodes times, even if we have more/less regions.
+         * Take care of it later if less or discard if too many regions.
+         */
+        for (i = 0; i < dst->vnodes; i++) {
+            buf = xlu_cfg_get_listitem(vnumamemcfg, i);
+            if (!buf) {
+                fprintf(stderr,
+                        "xl: Unable to get element %d in vnuma memory list.\n", i);
+                goto bad_vnuma_mem;
+            }
+
+            ul = strtoul(buf, &ep, 10);
+            if (ep == buf) {
+                fprintf(stderr, "xl: Invalid argument parsing vnumamem: %s.\n", buf);
+                goto bad_vnuma_mem;
+            }
+
+            /* 32Mb is a min size for a node, taken from Linux */
+            if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
+                fprintf(stderr, "xl: vnuma memory %lu is not within %u - %u range.\n",
+                        ul, MIN_VNODE_SIZE, UINT32_MAX);
+                goto bad_vnuma_mem;
+            }
+
+            /* memory in MBytes */
+            dst->vnuma_mem[i] = ul;
+        }
+
+        /* Total memory for vNUMA parsed to verify */
+        for (i = 0; i < nr_vnuma_regions; i++)
+            vnuma_memparsed = vnuma_memparsed + (dst->vnuma_mem[i]);
+
+        /* Amount of memory for vnodes same as total? */
+        if ((vnuma_memparsed << 10) != (dst->max_memkb)) {
+            fprintf(stderr, "xl: vnuma memory is not the same as domain \
+                    memory size.\n");
+            goto bad_vnuma_mem;
+        }
+    } else {
+        dst->vnuma_mem = calloc(dst->vnodes,
+                                      sizeof(*dst->vnuma_mem));
+        if (dst->vnuma_mem == NULL) {
+            fprintf(stderr, "Unable to allocate memory for vnuma ranges.\n");
+            goto bad_vnuma_mem;
+        }
+
+        fprintf(stderr, "WARNING: vNUMA memory ranges were not specified.\n");
+        fprintf(stderr, "Using default equal vnode memory size %lu Kbytes \
+                to cover %lu Kbytes.\n",
+                dst->max_memkb / dst->vnodes, dst->max_memkb);
+
+        if (split_vnumamem(dst) < 0) {
+            fprintf(stderr, "Could not split vnuma memory into equal chunks.\n");
+            goto bad_vnuma_mem;
+        }
+    }
+    return 0;
+
+ bad_vnuma_mem:
+    return -1;
+}
+
+static int parse_vnuma_distance(XLU_Config *config,
+                                libxl_domain_build_info **b_info)
+{
+    libxl_domain_build_info *dst;
+    XLU_ConfigList *vdistancecfg;
+    int nr_vdist;
+
+    dst = *b_info;
+    dst->vdistance = calloc(dst->vnodes * dst->vnodes,
+                               sizeof(*dst->vdistance));
+    if (dst->vdistance == NULL)
+        goto bad_distance;
+
+    if (!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, &nr_vdist, 0)) {
+        int d1, d2, i;
+        /*
+         * First value is the same node distance, the second as the
+         * rest of distances. The following is required right now to
+         * avoid non-symmetrical distance table as it may break latest kernel.
+         * TODO: Better way to analyze extended distance table, possibly
+         * OS specific.
+         */
+
+        for (i = 0; i < nr_vdist; i++) {
+            d1 = get_list_item_uint(vdistancecfg, i);
+        }
+
+        d1 = get_list_item_uint(vdistancecfg, 0);
+        if (dst->vnodes > 1)
+           d2 = get_list_item_uint(vdistancecfg, 1);
+        else
+           d2 = d1;
+
+        if (d1 >= 0 && d2 >= 0) {
+            if (d1 < d2)
+                fprintf(stderr, "WARNING: vnuma distance d1 < d2, %u < %u\n", d1, d2);
+            vdistance_set(dst->vdistance, dst->vnodes, d1, d2);
+        } else {
+            fprintf(stderr, "WARNING: vnuma distance values are incorrect.\n");
+            goto bad_distance;
+        }
+    } else {
+        fprintf(stderr, "Could not parse vnuma distances.\n");
+        vdistance_set(dst->vdistance, dst->vnodes, 10, 20);
+    }
+    return 0;
+
+ bad_distance:
+    return -1;
+}
+
+static int parse_vnuma_vcpumap(XLU_Config *config,
+                                libxl_domain_build_info **b_info)
+{
+    libxl_domain_build_info *dst;
+    XLU_ConfigList *vcpumap;
+    int nr_vcpumap, i;
+
+    dst = *b_info;
+    dst->vnuma_vcpumap = (unsigned int *)calloc(dst->max_vcpus,
+                                     sizeof(*dst->vnuma_vcpumap));
+    if (dst->vnuma_vcpumap == NULL)
+        goto bad_vcpumap;
+
+    if (!xlu_cfg_get_list(config, "vnuma_vcpumap",
+                          &vcpumap, &nr_vcpumap, 0)) {
+        if (nr_vcpumap == dst->max_vcpus) {
+            unsigned int  vnode, vcpumask = 0, vmask;
+
+            vmask = ~(~0 << nr_vcpumap);
+            for (i = 0; i < nr_vcpumap; i++) {
+                vnode = get_list_item_uint(vcpumap, i);
+                if (vnode >= 0 && vnode < dst->vnodes) {
+                    vcpumask |= (1 << i);
+                    dst->vnuma_vcpumap[i] = vnode;
+                }
+            }
+
+            /* Did it covered all vnodes in the vcpu mask? */
+            if ( !(((vmask & vcpumask) + 1) == (1 << nr_vcpumap)) ) {
+                fprintf(stderr, "WARNING: Not all vnodes were covered \
+                        in numa_cpumask.\n");
+                goto bad_vcpumap;
+            }
+        } else {
+            fprintf(stderr, "WARNING:  Bad vnuma_vcpumap.\n");
+            goto bad_vcpumap;
+        }
+    }
+    else
+        vcputovnode_default(dst->vnuma_vcpumap,
+                            dst->vnodes,
+                            dst->max_vcpus);
+    return 0;
+
+ bad_vcpumap:
+    return -1;
+}
+
+static int parse_vnuma_vnodemap(XLU_Config *config,
+                                libxl_domain_build_info **b_info)
+{
+    libxl_domain_build_info *dst;
+    XLU_ConfigList *vnodemap;
+    int nr_vnodemap, i;
+
+    dst = *b_info;
+
+    /* There is mapping to NUMA physical nodes? */
+    dst->vnuma_vnodemap = (unsigned int *)calloc(dst->vnodes,
+                           sizeof(*dst->vnuma_vnodemap));
+    if (dst->vnuma_vnodemap == NULL)
+        goto bad_vnodemap;
+
+    if (!xlu_cfg_get_list(config, "vnuma_vnodemap",
+                          &vnodemap, &nr_vnodemap, 0)) {
+        /*
+         * If not specified or incorrect, will be defined
+         * later based on the machine architecture, configuration
+         * and memory availble when creating domain.
+         */
+        libxl_defbool_set(&dst->vnuma_autoplacement, false);
+        if (nr_vnodemap == dst->vnodes) {
+            unsigned int vnodemask = 0, pnode, smask;
+            smask = ~(~0 << dst->vnodes);
+            for (i = 0; i < dst->vnodes; i++) {
+                pnode = get_list_item_uint(vnodemap, i);
+                if (pnode >= 0) {
+                    vnodemask |= (1 << i);
+                    dst->vnuma_vnodemap[i] = pnode;
+                }
+            }
+
+            /* Did it covered all vnodes in the mask? */
+            if ( !(((vnodemask & smask) + 1) == (1 << nr_vnodemap)) ) {
+                fprintf(stderr, "WARNING: Not all vnodes were covered \
+                        vnuma_vnodemap.\n");
+                fprintf(stderr, "Automatic placement will be used for vnodes.\n");
+                libxl_defbool_set(&dst->vnuma_autoplacement, true);
+                vnuma_vnodemap_default(dst->vnuma_vnodemap, dst->vnodes);
+            }
+        }
+        else {
+            fprintf(stderr, "WARNING: Incorrect vnuma_vnodemap.\n");
+            fprintf(stderr, "Automatic placement will be used for vnodes.\n");
+            libxl_defbool_set(&dst->vnuma_autoplacement, true);
+            vnuma_vnodemap_default(dst->vnuma_vnodemap, dst->vnodes);
+        }
+    }
+    else {
+        fprintf(stderr, "WARNING: Missing vnuma_vnodemap.\n");
+        fprintf(stderr, "Automatic placement will be used for vnodes.\n");
+        libxl_defbool_set(&dst->vnuma_autoplacement, true);
+        vnuma_vnodemap_default(dst->vnuma_vnodemap, dst->vnodes);
+    }
+    return 0;
+
+ bad_vnodemap:
+    return -1;
+
+}
+
+static void parse_vnuma_config(XLU_Config *config,
+                               libxl_domain_build_info *b_info)
+{
+    long l;
+
+    if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
+        if (l > MAX_VNUMA_NODES) {
+            fprintf(stderr, "Too many vnuma nodes, max %d is allowed.\n",
+                    MAX_VNUMA_NODES);
+            goto bad_vnuma_config;
+        }
+        b_info->vnodes = l;
+
+        if (!xlu_cfg_get_defbool(config, "vnuma_autoplacement",
+                    &b_info->vnuma_autoplacement, 0))
+            libxl_defbool_set(&b_info->vnuma_autoplacement, false);
+
+        /* Only construct nodes with at least one vcpu. */
+        if (b_info->vnodes != 0 && b_info->max_vcpus >= b_info->vnodes) {
+            if (parse_vnuma_mem(config, &b_info) ||
+                parse_vnuma_distance(config, &b_info) ||
+                parse_vnuma_vcpumap(config, &b_info) ||
+                parse_vnuma_vnodemap(config, &b_info))
+                goto bad_vnuma_config;
+        }
+        else if (vnuma_default_config(b_info))
+            goto bad_vnuma_config;
+    }
+    /* If vnuma topology is not defined for domain, init one node */
+    else if (vnuma_default_config(b_info))
+            goto bad_vnuma_config;
+    return;
+
+ bad_vnuma_config:
+    fprintf(stderr, "Failed to parse vnuma config or set default vnuma config.\n");
+    free_vnuma_info(b_info);
+    exit(1);
+}
+
 static void parse_config_data(const char *config_source,
                               const char *config_data,
                               int config_len,
@@ -1063,6 +1490,13 @@ static void parse_config_data(const char *config_source,
             exit(1);
         }
 
+
+        /*
+         * If there is no vnuma in config, "zero" vnuma config
+         * will be initialized with one node and other defaults.
+         */
+        parse_vnuma_config(config, b_info);
+
         xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0);
         switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args",
                                       &b_info->u.pv.bootloader_args, 1))
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
                   ` (5 preceding siblings ...)
  2014-08-26  7:45 ` [PATCH v8 9/9] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
@ 2014-08-26 15:29 ` Jan Beulich
  2014-08-26 15:36   ` Elena Ufimtseva
  2014-08-27 20:26 ` Konrad Rzeszutek Wilk
  7 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2014-08-26 15:29 UTC (permalink / raw)
  To: Elena Ufimtseva
  Cc: keir, Ian.Campbell, lccycc123, george.dunlap, msw,
	dario.faggioli, stefano.stabellini, ian.jackson, xen-devel

>>> On 26.08.14 at 09:45, <ufimtseva@gmail.com> wrote:
> @@ -389,6 +389,39 @@ static void dump_numa(unsigned char key)
>  
>  		for_each_online_node(i)
>  			printk("    Node %u: %u\n", i, page_num_node[i]);
> +
> +		if ( d->vnuma )
> +		{
> +			printk("    Domain has %u vnodes, %u vcpus\n",
> +				   d->vnuma->nr_vnodes, d->max_vcpus);
> +			for ( i = 0; i < d->vnuma->nr_vnodes; i++ )
> +			{
> +				err = snprintf(keyhandler_scratch, 12, "%u",
> +						       d->vnuma->vnode_to_pnode[i]);
> +				if ( err < 0 || d->vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
> +					snprintf(keyhandler_scratch, 3, "???");
> +				printk("        vnode %3u - pnode %s,", i, keyhandler_scratch);
> +
> +				printk(" %"PRIu64" MB, ",
> +					   (d->vnuma->vmemrange[i].end - d->vnuma->vmemrange[i].start) >> 20);
> +
> +				printk("vcpu nrs: ");
> +				for ( j = 0, n = 0; j < d->max_vcpus; j++ )
> +				{
> +					if ( d->vnuma->vcpu_to_vnode[j] == i )
> +					{
> +						if ( ((n + 1) % 8) == 0 )
> +							printk("%d\n", j);
> +						else if ( !(n % 8) && n != 0 )
> +							printk("%s%d ", "             ", j);
> +						else
> +							printk("%d ", j);
> +						n++;
> +					}
> +				}
> +				printk("\n");
> +			}
> +		}
>  	}
>  
>  	rcu_read_unlock(&domlist_read_lock);

Actually I'm afraid Konrad's comment (and me agreeing to it)
misguided you here: The function is currently written in Linux
style, so Linux style is what the changes should be in. I.e.
opening curly braces on the same line as the end of their
control construct, and no blanks inside outermost parentheses
of control constructs.

However, the comment regarding overly long lines was correct
(and sadly still stands), and there are indentation issues too.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-26 15:29 ` [PATCH v8 3/9] vnuma hook to debug-keys u Jan Beulich
@ 2014-08-26 15:36   ` Elena Ufimtseva
  2014-08-26 15:46     ` Jan Beulich
  0 siblings, 1 reply; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26 15:36 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Campbell, Li Yechen, George Dunlap, Matt Wilson,
	Dario Faggioli, Stefano Stabellini, Ian Jackson, xen-devel

On Tue, Aug 26, 2014 at 11:29 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 26.08.14 at 09:45, <ufimtseva@gmail.com> wrote:
>> @@ -389,6 +389,39 @@ static void dump_numa(unsigned char key)
>>
>>               for_each_online_node(i)
>>                       printk("    Node %u: %u\n", i, page_num_node[i]);
>> +
>> +             if ( d->vnuma )
>> +             {
>> +                     printk("    Domain has %u vnodes, %u vcpus\n",
>> +                                d->vnuma->nr_vnodes, d->max_vcpus);
>> +                     for ( i = 0; i < d->vnuma->nr_vnodes; i++ )
>> +                     {
>> +                             err = snprintf(keyhandler_scratch, 12, "%u",
>> +                                                    d->vnuma->vnode_to_pnode[i]);
>> +                             if ( err < 0 || d->vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
>> +                                     snprintf(keyhandler_scratch, 3, "???");
>> +                             printk("        vnode %3u - pnode %s,", i, keyhandler_scratch);
>> +
>> +                             printk(" %"PRIu64" MB, ",
>> +                                        (d->vnuma->vmemrange[i].end - d->vnuma->vmemrange[i].start) >> 20);
>> +
>> +                             printk("vcpu nrs: ");
>> +                             for ( j = 0, n = 0; j < d->max_vcpus; j++ )
>> +                             {
>> +                                     if ( d->vnuma->vcpu_to_vnode[j] == i )
>> +                                     {
>> +                                             if ( ((n + 1) % 8) == 0 )
>> +                                                     printk("%d\n", j);
>> +                                             else if ( !(n % 8) && n != 0 )
>> +                                                     printk("%s%d ", "             ", j);
>> +                                             else
>> +                                                     printk("%d ", j);
>> +                                             n++;
>> +                                     }
>> +                             }
>> +                             printk("\n");
>> +                     }
>> +             }
>>       }
>>
>>       rcu_read_unlock(&domlist_read_lock);
>
> Actually I'm afraid Konrad's comment (and me agreeing to it)
> misguided you here: The function is currently written in Linux
> style, so Linux style is what the changes should be in. I.e.
> opening curly braces on the same line as the end of their
> control construct, and no blanks inside outermost parentheses
> of control constructs.
>
> However, the comment regarding overly long lines was correct
> (and sadly still stands), and there are indentation issues too.
>
> Jan
>

Hi Jan

This file has both coding styles and every my attempt to match it fails :)

Can I make changes to numa.c so it will be formatted like any other
file if there
is no reason to have it formatted in this way?
Also the indentation symbols are different there as well. Re-formatting it will
help me tremendously and maybe others as well.




-- 
Elena

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-26 15:36   ` Elena Ufimtseva
@ 2014-08-26 15:46     ` Jan Beulich
  2014-08-26 15:56       ` Elena Ufimtseva
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2014-08-26 15:46 UTC (permalink / raw)
  To: Elena Ufimtseva
  Cc: Keir Fraser, Ian Campbell, Li Yechen, George Dunlap, Matt Wilson,
	Dario Faggioli, Stefano Stabellini, Ian Jackson, xen-devel

>>> On 26.08.14 at 17:36, <ufimtseva@gmail.com> wrote:
> On Tue, Aug 26, 2014 at 11:29 AM, Jan Beulich <JBeulich@suse.com> wrote:
>> Actually I'm afraid Konrad's comment (and me agreeing to it)
>> misguided you here: The function is currently written in Linux
>> style, so Linux style is what the changes should be in. I.e.
>> opening curly braces on the same line as the end of their
>> control construct, and no blanks inside outermost parentheses
>> of control constructs.
>>
>> However, the comment regarding overly long lines was correct
>> (and sadly still stands), and there are indentation issues too.
> 
> This file has both coding styles and every my attempt to match it fails :)

Not really, its Linux style with perhaps a few mistakes.

> Can I make changes to numa.c so it will be formatted like any other
> file if there
> is no reason to have it formatted in this way?

If you feel up to making it have entirely uniform style, just go for it.

Jan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-26 15:46     ` Jan Beulich
@ 2014-08-26 15:56       ` Elena Ufimtseva
  0 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-26 15:56 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Campbell, Li Yechen, George Dunlap, Matt Wilson,
	Dario Faggioli, Stefano Stabellini, Ian Jackson, xen-devel

On Tue, Aug 26, 2014 at 11:46 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 26.08.14 at 17:36, <ufimtseva@gmail.com> wrote:
>> On Tue, Aug 26, 2014 at 11:29 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>> Actually I'm afraid Konrad's comment (and me agreeing to it)
>>> misguided you here: The function is currently written in Linux
>>> style, so Linux style is what the changes should be in. I.e.
>>> opening curly braces on the same line as the end of their
>>> control construct, and no blanks inside outermost parentheses
>>> of control constructs.
>>>
>>> However, the comment regarding overly long lines was correct
>>> (and sadly still stands), and there are indentation issues too.
>>
>> This file has both coding styles and every my attempt to match it fails :)
>
> Not really, its Linux style with perhaps a few mistakes.
>
>> Can I make changes to numa.c so it will be formatted like any other
>> file if there
>> is no reason to have it formatted in this way?
>
> If you feel up to making it have entirely uniform style, just go for it.

Yes, Id prefer this.

Thank you Jan.



>
> Jan
>



-- 
Elena

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
                   ` (6 preceding siblings ...)
  2014-08-26 15:29 ` [PATCH v8 3/9] vnuma hook to debug-keys u Jan Beulich
@ 2014-08-27 20:26 ` Konrad Rzeszutek Wilk
  2014-08-27 20:33   ` Elena Ufimtseva
  7 siblings, 1 reply; 14+ messages in thread
From: Konrad Rzeszutek Wilk @ 2014-08-27 20:26 UTC (permalink / raw)
  To: Elena Ufimtseva
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, xen-devel, JBeulich

On Tue, Aug 26, 2014 at 03:45:00AM -0400, Elena Ufimtseva wrote:
> Add debug-keys hook to display vnuma topology.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>

Now that I had led you astray on the style (truly sorry
and I am really happy Jan spotted it), when that gets fixed
to be right type you can add 'Reviewed-by: Konrad Rzeszutek
Wilk <konrad.wilk@oracle.com>'.

Or if there is not enough time, there could be a cleanup
patch to fix those style guide later on.

> ---
>  xen/arch/x86/numa.c |   35 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> index b141877..9efceac 100644
> --- a/xen/arch/x86/numa.c
> +++ b/xen/arch/x86/numa.c
> @@ -347,7 +347,7 @@ EXPORT_SYMBOL(node_data);
>  static void dump_numa(unsigned char key)
>  {
>  	s_time_t now = NOW();
> -	int i;
> +	int i, j, n, err;
>  	struct domain *d;
>  	struct page_info *page;
>  	unsigned int page_num_node[MAX_NUMNODES];
> @@ -389,6 +389,39 @@ static void dump_numa(unsigned char key)
>  
>  		for_each_online_node(i)
>  			printk("    Node %u: %u\n", i, page_num_node[i]);
> +
> +		if ( d->vnuma )
> +		{
> +			printk("    Domain has %u vnodes, %u vcpus\n",
> +				   d->vnuma->nr_vnodes, d->max_vcpus);
> +			for ( i = 0; i < d->vnuma->nr_vnodes; i++ )
> +			{
> +				err = snprintf(keyhandler_scratch, 12, "%u",
> +						       d->vnuma->vnode_to_pnode[i]);
> +				if ( err < 0 || d->vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
> +					snprintf(keyhandler_scratch, 3, "???");
> +				printk("        vnode %3u - pnode %s,", i, keyhandler_scratch);
> +
> +				printk(" %"PRIu64" MB, ",
> +					   (d->vnuma->vmemrange[i].end - d->vnuma->vmemrange[i].start) >> 20);
> +
> +				printk("vcpu nrs: ");
> +				for ( j = 0, n = 0; j < d->max_vcpus; j++ )
> +				{
> +					if ( d->vnuma->vcpu_to_vnode[j] == i )
> +					{
> +						if ( ((n + 1) % 8) == 0 )
> +							printk("%d\n", j);
> +						else if ( !(n % 8) && n != 0 )
> +							printk("%s%d ", "             ", j);
> +						else
> +							printk("%d ", j);
> +						n++;
> +					}
> +				}
> +				printk("\n");
> +			}
> +		}
>  	}
>  
>  	rcu_read_unlock(&domlist_read_lock);
> -- 
> 1.7.10.4
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA
  2014-08-26  7:45 ` [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
@ 2014-08-27 20:31   ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 14+ messages in thread
From: Konrad Rzeszutek Wilk @ 2014-08-27 20:31 UTC (permalink / raw)
  To: Elena Ufimtseva
  Cc: keir, Ian.Campbell, stefano.stabellini, george.dunlap, msw,
	dario.faggioli, lccycc123, ian.jackson, xen-devel, JBeulich

On Tue, Aug 26, 2014 at 03:45:01AM -0400, Elena Ufimtseva wrote:
> With the introduction of the XEN_DOMCTL_setvnumainfo
> in patch titled: "xen: vnuma topology and subop hypercalls"
> we put in the plumbing here to use from the toolstack. The user
> is allowed to call this multiple times if they wish so.
> It will error out if the nr_vnodes or nr_vcpus is zero.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

> ---
>  tools/libxc/xc_domain.c |   65 +++++++++++++++++++++++++++++++++++++++++++++++
>  tools/libxc/xenctrl.h   |   10 ++++++++
>  2 files changed, 75 insertions(+)
> 
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index c67ac9a..1708766 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -2124,6 +2124,71 @@ int xc_domain_set_max_evtchn(xc_interface *xch, uint32_t domid,
>      return do_domctl(xch, &domctl);
>  }
>  
> +/* Plumbing Xen with vNUMA topology */
> +int xc_domain_setvnuma(xc_interface *xch,
> +                       uint32_t domid,
> +                       uint32_t nr_vnodes,
> +                       uint32_t nr_regions,
> +                       uint32_t nr_vcpus,
> +                       vmemrange_t *vmemrange,
> +                       unsigned int *vdistance,
> +                       unsigned int *vcpu_to_vnode,
> +                       unsigned int *vnode_to_pnode)
> +{
> +    int rc;
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(vmemrange, sizeof(*vmemrange) * nr_regions,
> +                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> +    DECLARE_HYPERCALL_BOUNCE(vdistance, sizeof(*vdistance) *
> +                             nr_vnodes * nr_vnodes,
> +                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> +    DECLARE_HYPERCALL_BOUNCE(vcpu_to_vnode, sizeof(*vcpu_to_vnode) * nr_vcpus,
> +                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> +    DECLARE_HYPERCALL_BOUNCE(vnode_to_pnode, sizeof(*vnode_to_pnode) *
> +                             nr_vnodes,
> +                             XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> +    errno = EINVAL;
> +
> +    if ( nr_vnodes == 0 || nr_regions == 0 || nr_regions < nr_vnodes )
> +        return -1;
> +
> +    if ( !vdistance || !vcpu_to_vnode || !vmemrange || !vnode_to_pnode )
> +    {
> +        PERROR("%s: Cant set vnuma without initializing topology", __func__);
> +        return -1;
> +    }
> +
> +    if ( xc_hypercall_bounce_pre(xch, vmemrange)      ||
> +         xc_hypercall_bounce_pre(xch, vdistance)      ||
> +         xc_hypercall_bounce_pre(xch, vcpu_to_vnode)  ||
> +         xc_hypercall_bounce_pre(xch, vnode_to_pnode) )
> +    {
> +        rc = -1;
> +        goto vnumaset_fail;
> +
> +    }
> +
> +    set_xen_guest_handle(domctl.u.vnuma.vmemrange, vmemrange);
> +    set_xen_guest_handle(domctl.u.vnuma.vdistance, vdistance);
> +    set_xen_guest_handle(domctl.u.vnuma.vcpu_to_vnode, vcpu_to_vnode);
> +    set_xen_guest_handle(domctl.u.vnuma.vnode_to_pnode, vnode_to_pnode);
> +
> +    domctl.cmd = XEN_DOMCTL_setvnumainfo;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.vnuma.nr_vnodes = nr_vnodes;
> +    domctl.u.vnuma.nr_regions = nr_regions;
> +
> +    rc = do_domctl(xch, &domctl);
> +
> + vnumaset_fail:
> +    xc_hypercall_bounce_post(xch, vmemrange);
> +    xc_hypercall_bounce_post(xch, vdistance);
> +    xc_hypercall_bounce_post(xch, vcpu_to_vnode);
> +    xc_hypercall_bounce_post(xch, vnode_to_pnode);
> +
> +    return rc;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 1c5d0db..1c8aa42 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1245,6 +1245,16 @@ int xc_domain_set_memmap_limit(xc_interface *xch,
>                                 uint32_t domid,
>                                 unsigned long map_limitkb);
>  
> +int xc_domain_setvnuma(xc_interface *xch,
> +                        uint32_t domid,
> +                        uint32_t nr_vnodes,
> +                        uint32_t nr_regions,
> +                        uint32_t nr_vcpus,
> +                        vmemrange_t *vmemrange,
> +                        unsigned int *vdistance,
> +                        unsigned int *vcpu_to_vnode,
> +                        unsigned int *vnode_to_pnode);
> +
>  #if defined(__i386__) || defined(__x86_64__)
>  /*
>   * PC BIOS standard E820 types and structure.
> -- 
> 1.7.10.4
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v8 3/9] vnuma hook to debug-keys u
  2014-08-27 20:26 ` Konrad Rzeszutek Wilk
@ 2014-08-27 20:33   ` Elena Ufimtseva
  0 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2014-08-27 20:33 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Keir Fraser, Ian Campbell, Stefano Stabellini, George Dunlap,
	Matt Wilson, Dario Faggioli, Li Yechen, Ian Jackson, xen-devel,
	Jan Beulich

On Wed, Aug 27, 2014 at 4:26 PM, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
> On Tue, Aug 26, 2014 at 03:45:00AM -0400, Elena Ufimtseva wrote:
>> Add debug-keys hook to display vnuma topology.
>>
>> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
>
> Now that I had led you astray on the style (truly sorry
> and I am really happy Jan spotted it), when that gets fixed
> to be right type you can add 'Reviewed-by: Konrad Rzeszutek
> Wilk <konrad.wilk@oracle.com>'.
>
> Or if there is not enough time, there could be a cleanup
> patch to fix those style guide later on.
>
>> ---
>>  xen/arch/x86/numa.c |   35 ++++++++++++++++++++++++++++++++++-
>>  1 file changed, 34 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
>> index b141877..9efceac 100644
>> --- a/xen/arch/x86/numa.c
>> +++ b/xen/arch/x86/numa.c
>> @@ -347,7 +347,7 @@ EXPORT_SYMBOL(node_data);
>>  static void dump_numa(unsigned char key)
>>  {
>>       s_time_t now = NOW();
>> -     int i;
>> +     int i, j, n, err;
>>       struct domain *d;
>>       struct page_info *page;
>>       unsigned int page_num_node[MAX_NUMNODES];
>> @@ -389,6 +389,39 @@ static void dump_numa(unsigned char key)
>>
>>               for_each_online_node(i)
>>                       printk("    Node %u: %u\n", i, page_num_node[i]);
>> +
>> +             if ( d->vnuma )
>> +             {
>> +                     printk("    Domain has %u vnodes, %u vcpus\n",
>> +                                d->vnuma->nr_vnodes, d->max_vcpus);
>> +                     for ( i = 0; i < d->vnuma->nr_vnodes; i++ )
>> +                     {
>> +                             err = snprintf(keyhandler_scratch, 12, "%u",
>> +                                                    d->vnuma->vnode_to_pnode[i]);
>> +                             if ( err < 0 || d->vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
>> +                                     snprintf(keyhandler_scratch, 3, "???");
>> +                             printk("        vnode %3u - pnode %s,", i, keyhandler_scratch);
>> +
>> +                             printk(" %"PRIu64" MB, ",
>> +                                        (d->vnuma->vmemrange[i].end - d->vnuma->vmemrange[i].start) >> 20);
>> +
>> +                             printk("vcpu nrs: ");
>> +                             for ( j = 0, n = 0; j < d->max_vcpus; j++ )
>> +                             {
>> +                                     if ( d->vnuma->vcpu_to_vnode[j] == i )
>> +                                     {
>> +                                             if ( ((n + 1) % 8) == 0 )
>> +                                                     printk("%d\n", j);
>> +                                             else if ( !(n % 8) && n != 0 )
>> +                                                     printk("%s%d ", "             ", j);
>> +                                             else
>> +                                                     printk("%d ", j);
>> +                                             n++;
>> +                                     }
>> +                             }
>> +                             printk("\n");
>> +                     }
>> +             }
>>       }
>>
>>       rcu_read_unlock(&domlist_read_lock);
>> --
>> 1.7.10.4
>>


Thanks Konrad!

Will be posting tonight next version.


-- 
Elena

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2014-08-27 20:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-26  7:45 [PATCH v8 3/9] vnuma hook to debug-keys u Elena Ufimtseva
2014-08-26  7:45 ` [PATCH v8 4/9] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
2014-08-27 20:31   ` Konrad Rzeszutek Wilk
2014-08-26  7:45 ` [PATCH v8 5/9] libxl: vnuma types declararion Elena Ufimtseva
2014-08-26  7:45 ` [PATCH v8 6/9] libxl: build numa nodes memory blocks Elena Ufimtseva
2014-08-26  7:45 ` [PATCH v8 7/9] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
2014-08-26  7:45 ` [PATCH v8 8/9] libxl: vnuma nodes placement bits Elena Ufimtseva
2014-08-26  7:45 ` [PATCH v8 9/9] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
2014-08-26 15:29 ` [PATCH v8 3/9] vnuma hook to debug-keys u Jan Beulich
2014-08-26 15:36   ` Elena Ufimtseva
2014-08-26 15:46     ` Jan Beulich
2014-08-26 15:56       ` Elena Ufimtseva
2014-08-27 20:26 ` Konrad Rzeszutek Wilk
2014-08-27 20:33   ` Elena Ufimtseva

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.