All of lore.kernel.org
 help / color / mirror / Atom feed
* about the guest(Redhat6.3) shows white screen, but the suse, ubuntu is ok
@ 2013-07-21 14:41 butine
  2013-07-22 11:32 ` Dario Faggioli
  0 siblings, 1 reply; 3+ messages in thread
From: butine @ 2013-07-21 14:41 UTC (permalink / raw)
  To: dario.faggioli; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 210 bytes --]

hello,Dario,

I have implemented the HVM guest NUMA.After I created the guest(Redhat6.3),it would show white screen when guest started.But the suse,ubuntu is ok.why??

 

Thanks

Regard,
Butine Huang
2013-07-15

[-- Attachment #1.2: Type: text/html, Size: 257 bytes --]

[-- Attachment #2: guest_numa.patch --]
[-- Type: application/octet-stream, Size: 98304 bytes --]

diff --git a/tools/firmware/hvmloader/acpi/acpi2_0.h b/tools/firmware/hvmloader/acpi/acpi2_0.h
index 9ea356b..36a813b 100644
--- a/tools/firmware/hvmloader/acpi/acpi2_0.h
+++ b/tools/firmware/hvmloader/acpi/acpi2_0.h
@@ -283,8 +283,68 @@ struct acpi_20_madt {
     uint32_t flags;
 };
 
+/*
+ * System Resource Affinity Table header definition (SRAT) (Version 3.0)
+ * X2APIC_CPU_AFFINITY is defined in version 4.0
+ */
+struct acpi_30_srat {
+	struct acpi_header header;	/* Common ACPI table header */
+	uint32_t table_revision;	/* Must be value '1' */
+	uint32_t reserved[2];		/* Reserved, must be zero */
+};
+#define ACPI_30_SRAT_TABLE_REVISION    0x1
+
+/* Values for type (in SRAT subtables) */
+enum acpi_30_srat_type {
+	ACPI_30_SRAT_TYPE_CPU_AFFINITY = 0,
+	ACPI_30_SRAT_TYPE_MEMORY_AFFINITY = 1,
+	ACPI_30_SRAT_TYPE_RESERVED = 2	/* 2 and greater are reserved */
+};
+
+/* type(0) : Processor Local APIC/SAPIC Affinity */
+struct acpi_30_srat_cpu_affinity {
+    uint8_t type;
+    uint8_t length;
+    uint8_t proximity_domain_lo;
+    uint8_t apic_id;
+    uint32_t flags;
+    uint8_t local_sapic_eid;
+    uint8_t proximity_domain_hi[3];
+    uint32_t reserved;		/* Reserved, must be zero */
+};
+
+/* Flags */
+#define ACPI_30_SRAT_CPU_USE_AFFINITY  (1)	/* 00: Use affinity structure */
+
+/* 1: Memory Affinity */
+
+struct acpi_30_srat_mem_affinity {
+    uint8_t type;
+    uint8_t length;
+    uint32_t proximity_domain;
+    uint16_t reserved;		/* Reserved, must be zero */
+    uint64_t base_address;
+    uint64_t size;
+    uint32_t reserved1;
+    uint32_t flags;
+    uint64_t reserved2;	    /* Reserved, must be zero */
+};
+
+/* Flags */
+#define ACPI_30_SRAT_MEM_ENABLED       (1)	/* 00: Use affinity structure */
+#define ACPI_30_SRAT_MEM_HOT_PLUGGABLE (1<<1)	/* 01: Mem is hot pluggable */
+#define ACPI_30_SRAT_MEM_NON_VOLATILE  (1<<2)	/* 02: Mem is non-volatile */
 
 /*
+ * System Locality Information Table header definition (SLIT) (Version 1.0)
+ */
+struct acpi_10_slit {
+    struct acpi_header header;
+    uint64_t locality_count;
+    uint8_t entry[1];
+};
+ 
+/*
  * HPET Description Table
  */
 struct acpi_20_hpet {
@@ -367,6 +427,9 @@ struct acpi_20_madt_intsrcovr {
 #define ACPI_2_0_XSDT_SIGNATURE ASCII32('X','S','D','T')
 #define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A')
 #define ACPI_2_0_HPET_SIGNATURE ASCII32('H','P','E','T')
+#define ACPI_3_0_SRAT_SIGNATURE ASCII32('S','R','A','T')
+#define ACPI_1_0_SLIT_SIGNATURE ASCII32('S','L','I','T')
+
 
 /*
  * Table revision numbers.
@@ -379,6 +442,8 @@ struct acpi_20_madt_intsrcovr {
 #define ACPI_2_0_TCPA_REVISION 0x02
 #define ACPI_2_0_HPET_REVISION 0x01
 #define ACPI_1_0_FADT_REVISION 0x01
+#define ACPI_3_0_SRAT_REVISION 0x01
+#define ACPI_1_0_SLIT_REVISION 0x01
 
 #pragma pack ()
 
diff --git a/tools/firmware/hvmloader/acpi/build.c b/tools/firmware/hvmloader/acpi/build.c
index dc38c73..0403e75 100644
--- a/tools/firmware/hvmloader/acpi/build.c
+++ b/tools/firmware/hvmloader/acpi/build.c
@@ -149,6 +149,114 @@ static int construct_madt(struct acpi_20_madt *madt)
     return align16(offset);
 }
 
+static int 
+construct_srat_cpu_affinity(struct acpi_30_srat_cpu_affinity *cpu_srat)
+{
+    struct acpi_30_srat_cpu_affinity *cpu_srat_iter;
+    int vnode, vcpu;
+    struct xen_domain_numa_info *numa_info = &hvm_info->numa_info[0];
+    uint8_t *numa_vcpu_to_vnode = NUMA_INFO_VCPU_TO_VNODE(numa_info);
+
+    for ( vnode = 0, cpu_srat_iter = cpu_srat; 
+                            vnode < numa_info->nr_vnodes; vnode++ )
+    {
+        for ( vcpu = 0 ; vcpu < numa_info->nr_vcpus; vcpu++ )
+        {
+            if (numa_vcpu_to_vnode[vcpu] == vnode)
+            {
+                memset(cpu_srat_iter, 0, sizeof(*cpu_srat_iter));
+                cpu_srat_iter->type = ACPI_30_SRAT_TYPE_CPU_AFFINITY;
+                cpu_srat_iter->length = sizeof(*cpu_srat);
+                cpu_srat_iter->proximity_domain_lo = vnode;
+                cpu_srat_iter->apic_id = LAPIC_ID(vcpu);
+                cpu_srat_iter->flags = ACPI_30_SRAT_CPU_USE_AFFINITY;
+                cpu_srat_iter++;
+            }
+        }
+    }
+    /* return length of the sub-table */
+    return ((uint8_t *)cpu_srat_iter-(uint8_t *)cpu_srat);
+}
+
+static int 
+construct_srat_mem_affinity(struct acpi_30_srat_mem_affinity *mem_srat)
+{
+    int vnode;
+    struct acpi_30_srat_mem_affinity *mem_srat_iter = mem_srat;
+    struct xen_domain_numa_info *numa_info = &hvm_info->numa_info[0];
+    struct xen_vnode_info *numa_vnode_info = NUMA_INFO_VNODE_INFO(numa_info);
+
+    for ( vnode = 0; vnode < numa_info->nr_vnodes; vnode++ )
+    {
+        struct xen_vnode_info *vnode_info = &numa_vnode_info[vnode];
+        memset(mem_srat_iter, 0, sizeof(*mem_srat_iter));
+        mem_srat_iter->type = ACPI_30_SRAT_TYPE_MEMORY_AFFINITY;
+        mem_srat_iter->length = sizeof(*mem_srat_iter);
+        mem_srat_iter->proximity_domain = vnode;
+        mem_srat_iter->base_address = (uint64_t)vnode_info->start << PAGE_SHIFT;
+        mem_srat_iter->size = 
+				(uint64_t)(vnode_info->end - vnode_info->start) << PAGE_SHIFT;
+        mem_srat_iter->flags = ACPI_30_SRAT_MEM_ENABLED;
+        mem_srat_iter++;
+    }
+    /* return length of the sub-table */
+    return ((uint8_t *)mem_srat_iter-(uint8_t *)mem_srat);
+}
+
+static int construct_srat(struct acpi_30_srat *srat)
+{
+    int offset;
+
+    memset(srat, 0, sizeof(*srat));
+    srat->header.signature    = ACPI_3_0_SRAT_SIGNATURE;
+    srat->header.revision     = ACPI_3_0_SRAT_REVISION;
+    fixed_strcpy(srat->header.oem_id, ACPI_OEM_ID);
+    fixed_strcpy(srat->header.oem_table_id, ACPI_OEM_TABLE_ID);
+    srat->header.oem_revision = ACPI_OEM_REVISION;
+    srat->header.creator_id   = ACPI_CREATOR_ID;
+    srat->header.creator_revision = ACPI_CREATOR_REVISION;
+    srat->table_revision = ACPI_30_SRAT_TABLE_REVISION;
+    offset = sizeof(*srat);
+
+    offset += construct_srat_cpu_affinity((struct acpi_30_srat_cpu_affinity *)
+                                                ((uint8_t *)srat + offset));
+
+    offset += construct_srat_mem_affinity((struct acpi_30_srat_mem_affinity *)
+                                                ((uint8_t *)srat + offset));
+
+    srat->header.length = offset;
+    set_checksum(srat, offsetof(struct acpi_header, checksum), offset);
+
+    return offset;
+}
+
+static int construct_slit(struct acpi_10_slit *slit)
+{
+    int offset, i, nr_vnodes;
+    struct xen_domain_numa_info *numa_info = &hvm_info->numa_info[0];
+    uint8_t *numa_vnode_distance = NUMA_INFO_VNODE_DISTANCE(numa_info);
+
+    memset(slit, 0, sizeof(*slit));
+    slit->header.signature    = ACPI_1_0_SLIT_SIGNATURE;
+    slit->header.revision     = ACPI_1_0_SLIT_REVISION;
+    fixed_strcpy(slit->header.oem_id, ACPI_OEM_ID);
+    fixed_strcpy(slit->header.oem_table_id, ACPI_OEM_TABLE_ID);
+    slit->header.oem_revision = ACPI_OEM_REVISION;
+    slit->header.creator_id   = ACPI_CREATOR_ID;
+    slit->header.creator_revision = ACPI_CREATOR_REVISION;
+    slit->locality_count = numa_info->nr_vnodes;
+
+    nr_vnodes = numa_info->nr_vnodes;
+    for (i=0; i<(nr_vnodes*nr_vnodes); i++)
+        slit->entry[i] = numa_vnode_distance[i];
+
+    offset = sizeof(*slit)+(nr_vnodes*nr_vnodes)-1;
+    slit->header.length = offset;
+    set_checksum(slit, offsetof(struct acpi_header, checksum), offset);
+
+    return offset;
+}
+
 static int construct_hpet(struct acpi_20_hpet *hpet)
 {
     int offset;
@@ -177,6 +285,8 @@ static int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs)
     struct acpi_20_madt *madt;
     struct acpi_20_hpet *hpet;
     struct acpi_20_tcpa *tcpa;
+    struct acpi_30_srat *srat;
+    struct acpi_10_slit *slit;
     static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
     uint16_t *tis_hdr;
     void *lasa;
@@ -189,6 +299,18 @@ static int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs)
         table_ptrs[nr_tables++] = (unsigned long)madt;
     }
 
+	 /* SRAT/SLIT. */
+	 if ( hvm_info->numa_enabled &&
+			 hvm_info->numa_info[0].version == XEN_DOM_NUMA_INTERFACE_VERSION )
+	 {
+		 srat = (struct acpi_30_srat *)&buf[offset];
+		 offset += construct_srat(srat);
+		 table_ptrs[nr_tables++] = (unsigned long)srat;
+		 slit = (struct acpi_10_slit *)&buf[offset];
+		 offset += construct_slit(slit);
+		 table_ptrs[nr_tables++] = (unsigned long)slit;
+	 }
+
     /* HPET. */
     if ( hpet_exists(ACPI_HPET_ADDRESS) )
     {
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index 9942c3a..eef9b44 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -32,6 +32,8 @@ CTRL_SRCS-y       += xc_mem_access.c
 CTRL_SRCS-y       += xc_memshr.c
 CTRL_SRCS-y       += xc_hcall_buf.c
 CTRL_SRCS-y       += xc_foreign_memory.c
+CTRL_SRCS-y       += xc_cpumap.c
+CTRL_SRCS-y       += xc_dom_numa.c
 CTRL_SRCS-y       += xtl_core.c
 CTRL_SRCS-y       += xtl_logger_stdio.c
 CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c
diff --git a/tools/libxc/ia64/xc_ia64_hvm_build.c b/tools/libxc/ia64/xc_ia64_hvm_build.c
index 18be616..bc8358e 100644
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c
@@ -1119,6 +1119,7 @@ int xc_hvm_build_target_mem(xc_interface *xch,
                             uint32_t domid,
                             int memsize,
                             int target,
+                            xc_domain_numa_config_t *numa_config,
                             const char *image_name)
 {
     /* XXX:PoD isn't supported yet */
diff --git a/tools/libxc/xc_cpumap.c b/tools/libxc/xc_cpumap.c
new file mode 100755
index 0000000..66e41fc
--- /dev/null
+++ b/tools/libxc/xc_cpumap.c
@@ -0,0 +1,104 @@
+#include "xc_cpumap.h"
+#include <stdio.h>
+
+/* Author : Lab309 */
+
+uint32_t xc_cpumap_next(int cpu, struct xenctl_cpumap *srcp)
+{
+    uint8_t *p, pos;
+    uint8_t *addr = xc_cpumap_bits(srcp);
+    uint32_t size = xc_cpumap_len(srcp);
+    uint32_t offset = cpu+1; /* Find the next set cpu */
+
+    if (offset >= size)
+        return size;
+
+    p = addr + XC_BITMAP_BYTE(offset);
+    pos = XC_BITMAP_BYTE_OFFSET(offset);
+
+    do {
+        for (; (pos < XC_BITS_PER_BYTE) && !((*p)&(1<<pos)); pos++);
+        if (pos < XC_BITS_PER_BYTE)
+            break;
+        pos = 0; p++;
+    } while (p < (addr+size));
+
+    return (((p-addr)*XC_BITS_PER_BYTE) + pos);
+}
+
+void __xc_cpumap_or(struct xenctl_cpumap *dstp,
+        struct xenctl_cpumap *src1p, struct xenctl_cpumap *src2p)
+{
+    uint8_t *dp = xc_cpumap_bits(dstp);
+    uint8_t *s1p = xc_cpumap_bits(src1p);
+    uint8_t *s2p = xc_cpumap_bits(src2p);
+    int nr = XC_BITS_TO_BYTES(xc_cpumap_len(dstp));
+    int k;
+    for (k=0; k<nr; k++)
+        dp[k] = s1p[k] | s2p[k];
+}
+
+static inline uint8_t hweight8(uint8_t w)
+{
+    uint8_t res = (w & 0x55) + ((w >> 1) & 0x55);
+    res = (res & 0x33) + ((res >> 2) & 0x33);
+    return (res & 0x0F) + ((res >> 4) & 0x0F);
+}
+
+int __xc_cpumap_weight(struct xenctl_cpumap *srcp)
+{
+    const uint8_t *sp = xc_cpumap_bits(srcp);
+    int k, w = 0, lim = XC_BITS_TO_BYTES(xc_cpumap_len(srcp));
+    for (k=0; k <lim; k++)
+        w += hweight8(sp[k]);
+    return w;
+}
+
+/* xenctl_cpumap print function */
+#define CHUNKSZ	8
+#define roundup_power2(val,modulus)	(((val) + (modulus) - 1) & ~((modulus) - 1))
+
+int __xc_cpumap_snprintf(char *buf, unsigned int buflen,
+                                        const struct xenctl_cpumap *cpumap)
+{
+    const uint8_t *maskp = xc_cpumap_bits(cpumap);
+    int nmaskbits = xc_cpumap_len(cpumap);
+	int i, word, bit, len = 0;
+	unsigned long val;
+	const char *sep = "";
+	int chunksz;
+	uint8_t chunkmask;
+
+	chunksz = nmaskbits & (CHUNKSZ - 1);
+	if (chunksz == 0)
+		chunksz = CHUNKSZ;
+
+	i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+	for (; i >= 0; i -= CHUNKSZ) {
+		chunkmask = ((1ULL << chunksz) - 1);
+		word = i / XC_BITS_PER_BYTE;
+		bit = i % XC_BITS_PER_BYTE;
+		val = (maskp[word] >> bit) & chunkmask;
+		len += snprintf(buf+len, buflen-len, "%s%0*lx", sep,
+			(chunksz+3)/4, val);
+		chunksz = CHUNKSZ;
+		sep = ",";
+	}
+	return len;
+}
+
+int xc_cpumap_printf(const struct xenctl_cpumap *cpumap)
+{
+	char buffer[1024];
+	int ret =\r__xc_cpumap_snprintf(buffer,1024,cpumap);
+	if(ret>=0)
+		printf("cpumap:%s\n",buffer);
+	return ret;
+}
+
+int xc_bitmap_printf(const struct xenctl_bitmap *bitmap)
+{
+	struct xenctl_cpumap cpumap ={bitmap->bitmap,bitmap->nr_elems};
+	return xc_cpumap_printf(&cpumap);
+}
+
diff --git a/tools/libxc/xc_cpumap.h b/tools/libxc/xc_cpumap.h
new file mode 100755
index 0000000..5cd4cda
--- /dev/null
+++ b/tools/libxc/xc_cpumap.h
@@ -0,0 +1,137 @@
+#ifndef __XENCTL_CPUMAP_H
+#define __XENCTL_CPUMAP_H
+
+#include "xc_private.h"
+#include <stdint.h>
+#include <string.h>
+
+#define XC_BITS_PER_BYTE 8
+#define XC_BITS_TO_BYTES(bits) \
+    (((bits)+XC_BITS_PER_BYTE-1)/XC_BITS_PER_BYTE)
+#define XC_BITMAP_BIT(nr)   (1 << (nr))
+#define XC_BITMAP_BIT_MASK(nr)  (1 << ((nr) % XC_BITS_PER_BYTE))
+#define XC_BITMAP_BYTE(nr)  ((nr) / XC_BITS_PER_BYTE)
+#define XC_BITMAP_BYTE_OFFSET(nr)  ((nr) % XC_BITS_PER_BYTE)
+#define XC_BITMAP_BYTE_MASK (0xFF)
+#define XC_BITMAP_LAST_BYTE_MASK(nbits)                             \
+            (((nbits) % XC_BITS_PER_BYTE) ?                         \
+                       ((1<<((nbits) % XC_BITS_PER_BYTE))-1) :      \
+                                            XC_BITMAP_BYTE_MASK)
+
+#define xc_cpumap_bits(maskp)                                           \
+                    ({  uint8_t *bitmap;                                \
+                        get_xen_guest_handle(bitmap, (maskp)->bitmap);  \
+                        bitmap; })
+#define xc_cpumap_len(maskp) ((maskp)->nr_cpus)
+
+/* For iterating over the cpus set in the cpumap */
+#define xc_for_each_cpu(cpu, mask)				\
+            __xc_for_each_cpu(cpu, &(mask))
+#define __xc_for_each_cpu(cpu, mask)            \
+	for ((cpu) = -1;				            \
+		(cpu) = xc_cpumap_next((cpu), (mask)),	\
+		(cpu) < xc_cpumap_len(mask);)
+extern uint32_t xc_cpumap_next(int n, struct xenctl_cpumap *srcp);
+
+#define xc_cpumap_set_cpu(cpu, dst) __xc_cpumap_set_cpu(cpu, &(dst))
+static inline void __xc_cpumap_set_cpu(int cpu, struct xenctl_cpumap *dstp)
+{
+    uint8_t mask = XC_BITMAP_BIT_MASK(cpu);
+    uint8_t *p = ((uint8_t *)xc_cpumap_bits(dstp)) + XC_BITMAP_BYTE(cpu);
+    *p |= mask;
+}
+
+#define xc_cpumap_clear_cpu(cpu, dst) __xc_cpumap_clear_cpu(cpu, &(dst))
+static inline void __xc_cpumap_clear_cpu(int cpu, struct xenctl_cpumap *dstp)
+{
+    uint8_t mask = XC_BITMAP_BIT_MASK(cpu);
+    uint8_t *p = ((uint8_t *)xc_cpumap_bits(dstp)) + XC_BITMAP_BYTE(cpu);
+    *p &= ~mask;
+}
+
+#define xc_cpumap_test_cpu(cpu, dst) __xc_cpumap_test_cpu(cpu, &(dst))
+static inline int __xc_cpumap_test_cpu(int cpu, struct xenctl_cpumap *dstp)
+{
+    uint8_t mask = XC_BITMAP_BIT_MASK(cpu);
+    uint8_t *p = ((uint8_t *)xc_cpumap_bits(dstp)) + XC_BITMAP_BYTE(cpu);
+    return *p & mask;
+}
+
+#define xc_cpumap_setall(dst) __xc_cpumap_setall(&(dst))
+static inline void __xc_cpumap_setall(struct xenctl_cpumap *dstp)
+{
+    uint8_t *dp = xc_cpumap_bits(dstp);
+    int nbits = xc_cpumap_len(dstp);
+    size_t nbytes = XC_BITS_TO_BYTES(nbits);
+    if (nbytes > 1)
+        memset(dp, 0xff, nbytes);
+    dp[nbytes-1] = XC_BITMAP_LAST_BYTE_MASK(nbits);
+}
+
+#define xc_cpumap_clearall(dst) __xc_cpumap_clearall(&(dst))
+static inline void __xc_cpumap_clearall(struct xenctl_cpumap *dstp)
+{
+    size_t nbytes = XC_BITS_TO_BYTES(xc_cpumap_len(dstp));
+    if (nbytes > 1)
+        memset(xc_cpumap_bits(dstp), 0x00, nbytes); 
+}
+
+#define xc_cpumap_or(dst, src1, src2) \
+                        __xc_cpumap_or(&(dst), &(src1), &(src2))
+extern void __xc_cpumap_or(struct xenctl_cpumap *dstp,
+        struct xenctl_cpumap *src1p, struct xenctl_cpumap *src2p);
+
+#define xc_cpumap_weight(src) __xc_cpumap_weight(&(src))
+extern int __xc_cpumap_weight(struct xenctl_cpumap *srcp);
+
+#define xc_cpumap_snprintf(buf, len, src) \
+			__xc_cpumap_snprintf((buf), (len), &(src))
+extern int __xc_cpumap_snprintf(char *buf, unsigned int len,
+					        const struct xenctl_cpumap *srcp);
+
+/***********************************************************************/
+static inline int lock_pages(void *addr, size_t len)
+{
+      int e;
+      void *laddr = (void *)((unsigned long)addr & PAGE_MASK);
+      size_t llen = (len + ((unsigned long)addr - (unsigned long)laddr) +
+                     PAGE_SIZE - 1) & PAGE_MASK;
+      e = mlock(laddr, llen);
+      return e;
+}
+
+static inline void unlock_pages(void *addr, size_t len)
+{
+    void *laddr = (void *)((unsigned long)addr & PAGE_MASK);
+    size_t llen = (len + ((unsigned long)addr - (unsigned long)laddr) +
+                   PAGE_SIZE - 1) & PAGE_MASK;
+	int saved_errno = errno;
+    munlock(laddr, llen);
+	errno = saved_errno;
+}
+
+static inline int
+xc_cpumap_lock_pages(struct xenctl_cpumap *map)
+{
+    uint8_t *bitmap;
+    uint32_t nr_bytes = XC_BITS_TO_BYTES(map->nr_cpus);
+    get_xen_guest_handle(bitmap, map->bitmap);
+    if (lock_pages(bitmap, nr_bytes))
+        return -1;
+    return 0;
+}
+
+static inline void
+xc_cpumap_unlock_pages(struct xenctl_cpumap *map)
+{
+    uint8_t *bitmap;
+    uint32_t nr_bytes = XC_BITS_TO_BYTES(map->nr_cpus);
+    get_xen_guest_handle(bitmap, map->bitmap);
+    unlock_pages(bitmap, nr_bytes);
+}
+
+int xc_cpumap_printf(const struct xenctl_cpumap *cpumap);
+
+int xc_bitmap_printf(const struct xenctl_bitmap *bitmap);
+
+#endif /* __XENCTL_CPUMAP_H */
diff --git a/tools/libxc/xc_dom_numa.c b/tools/libxc/xc_dom_numa.c
new file mode 100755
index 0000000..4a346e4
--- /dev/null
+++ b/tools/libxc/xc_dom_numa.c
@@ -0,0 +1,976 @@
+/* XEN Guest NUMA support
+ * Author : Lab309 */
+
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include "xg_private.h"
+#include "xc_dom_numa.h"
+#include "xc_cpumap.h"
+
+#ifdef __DOM_NUMA_DEBUG__
+#undef DBGPRINTF
+#define DBGPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_INFO,0, _f , ## _a)
+#endif
+
+#ifdef set_xen_guest_handle
+#undef set_xen_guest_handle
+#endif
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
+
+void print_numa_info(xc_interface *xch)
+{
+	DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, memsize);
+    DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, memfree);
+    DECLARE_HYPERCALL_BUFFER(uint32_t, node_dists);
+	xc_numainfo_t ninfo;
+	int max_nodes=xc_get_max_nodes(xch),i,j;
+	memsize = xc_hypercall_buffer_alloc
+        (xch, memsize, sizeof(*memsize) * max_nodes);
+    memfree = xc_hypercall_buffer_alloc
+        (xch, memfree, sizeof(*memfree) * max_nodes);
+    node_dists = xc_hypercall_buffer_alloc
+		(xch, node_dists, sizeof(*node_dists) * max_nodes * max_nodes);
+	set_xen_guest_handle(ninfo.node_to_memsize, memsize);
+    set_xen_guest_handle(ninfo.node_to_memfree, memfree);
+    set_xen_guest_handle(ninfo.node_to_node_distance, node_dists);
+    ninfo.max_node_index = max_nodes - 1;
+	xc_numainfo(xch, &ninfo);
+	 if (ninfo.max_node_index < max_nodes - 1)
+        max_nodes = ninfo.max_node_index + 1;
+
+	
+	printf("numa_info              :\n");
+    printf("node:    memsize    memfree    distances\n");
+
+    for (i = 0; i < max_nodes; i++) 
+	{
+    	printf("%4d:    %6"PRIu64"     %6"PRIu64"      %d", i,
+        memsize[i] >> 20, memfree[i] >> 20,
+                   node_dists[i * max_nodes + 0]);
+    	for (j = 1; j < max_nodes; j++)
+        	printf(",%d", node_dists[i * max_nodes + j]);
+        printf("\n");
+    }
+}
+
+void print_machine_layout(xc_machine_numa_layout_t *layout)
+{
+	printf("size_page:%"PRIu64"\n",layout->size_pages);
+	printf("free_pages:%"PRIu64"\n",layout->free_pages);
+	printf("nr_nodes:%u\n",layout->nr_nodes);
+	printf("node_distance:\n");
+	for(int i=0;i<layout->nr_nodes;i++)
+	{
+		for(int j=0;j<layout->nr_nodes;j++)
+			printf("%u ",layout->node_distance[i+j*layout->nr_nodes]);
+		printf("\n");
+	}
+}
+
+/* XXX: Move all sanity checks to this funtion */
+#define XC_DOM_NUMA_MIN_UNIT  256
+
+static char *numa_val_to_str(uint32_t val)
+{
+    switch (val)
+    {
+        case XC_DOM_NUMA_AUTO:
+                return "AUTO";
+        case XC_DOM_NUMA_CLUSTER:
+                return "CLUSTER";
+        case XC_DOM_NUMA_GUEST_NUMA:
+                return "GUEST NUMA";
+        case XC_DOM_NUMA_CROSS:
+                return "CROSS";
+        default:
+                return "NONE";
+    }
+}
+
+void print_layout(xc_domain_numa_layout_t *layout)
+{
+	if(layout==NULL)
+		printf("Error:Empty Point!\n");
+	printf("version:%u\n",layout->version);
+	printf("type:%u\n",layout->type);
+	printf("nr_vcpus:%u\n",layout->nr_vcpus);
+	printf("nr_vnodes:%u\n",layout->nr_vnodes);
+	printf("nr_pages:%u\n",layout->nr_pages);
+	printf("domid:%u\n",layout->domid);
+	printf("strategy:%s\n",numa_val_to_str(layout->strategy));
+	printf("unit_size:%u\n",layout->unit_size);
+	printf("node_distance:\n");
+	for(int i=0;i<layout->nr_vnodes;i++)
+	{
+		for(int j=0;j<layout->nr_vnodes;j++)
+			printf("%u ",(uint32_t)layout->vnode_distance[i+j*layout->nr_vnodes]);
+		printf("\n");
+	}
+}
+
+xc_domain_numa_layout_t * xc_dom_alloc_numa_layout(xc_interface *xch, 
+        uint32_t domid, uint64_t nr_pages, xc_domain_numa_config_t *config)
+{
+    xc_domain_numa_layout_t *dom_layout;
+
+    if (config->strategy == XC_DOM_NUMA_NONE)
+    {
+        IPRINTF("%s: NUMA memory allocation disabled\n", __FUNCTION__);
+        return 0;
+    }
+    if (!(dom_layout = (xc_domain_numa_layout_t *)malloc(sizeof(*dom_layout))))
+    {
+        ERROR("%s: dom_layout allocation failed\n", __FUNCTION__);
+        return dom_layout;
+    }
+
+    DBGPRINTF("%s: dom_layout allocated\n", __FUNCTION__);
+    memset(dom_layout, 0, sizeof(*dom_layout));
+
+    dom_layout->version = XEN_DOM_NUMA_INTERFACE_VERSION;
+    dom_layout->nr_pages = nr_pages;
+    dom_layout->nr_vnodes = config->nr_nodes;
+
+    /* Internal data */
+    dom_layout->domid = domid;
+    dom_layout->strategy = config->strategy;
+    dom_layout->unit_size = config->unit_size;
+    if (dom_layout->unit_size && 
+                        (dom_layout->unit_size < XC_DOM_NUMA_MIN_UNIT))
+    {
+        dom_layout->unit_size = XC_DOM_NUMA_MIN_UNIT;
+        IPRINTF("%s: Min cross unit size is %d pages\n", 
+                                        __FUNCTION__, dom_layout->unit_size);
+    }
+    return dom_layout;
+}
+
+void
+xc_dom_free_numa_layout(xc_interface *xch, xc_domain_numa_layout_t *dom_layout)
+{
+    DBGPRINTF("%s: dom_layout freed\n", __FUNCTION__);
+    free(dom_layout);
+}
+
+#define XC_DUMP_STR_SZ  (8192)
+static void
+xc_dump_dom_numa_layout(xc_interface *xch, xc_domain_numa_layout_t *layout)
+{
+    unsigned int i, j;
+    char *xc_dump_str, *dumpstr;
+    if (!(xc_dump_str = malloc(XC_DUMP_STR_SZ)))
+    {
+        DBGPRINTF("%s : dump_str allocation failed", __FUNCTION__);
+        return;
+    }
+    dumpstr = xc_dump_str;
+    dumpstr += sprintf(dumpstr, 
+                        "NUMA-LAYOUT(Dom %d) : vcpus(%u), vnodes(%u)",
+                        layout->domid, layout->nr_vcpus, layout->nr_vnodes);
+    switch (layout->type)
+    {
+        case XEN_DOM_NUMA_CLUSTER:
+            dumpstr += sprintf(dumpstr, ", type(CLUSTER)\n");
+            break;
+        case XEN_DOM_NUMA_GUSET_NUMA:
+            dumpstr += sprintf(dumpstr, ", type(GUSET_NUMA)\n");
+            break;
+        case XEN_DOM_NUMA_CROSS:
+            dumpstr += sprintf(dumpstr, ", type(CROSS)\n");
+            break;
+        case XEN_DOM_NUMA_DONTCARE:
+            dumpstr += sprintf(dumpstr, ", type(DONTCARE)\n");
+            break;
+        default:
+            dumpstr += sprintf(dumpstr, ", type(UNDEFINED)\n");
+    }
+    for (i = 0; i < layout->nr_vnodes; i++)
+    {
+        xc_vnode_data_t *vnode_data = &layout->vnode_data[i];
+        dumpstr += sprintf(dumpstr, "vnode[%u]:mnode(%u), node_nr_pages(%x)", 
+                vnode_data->vnode_id, vnode_data->mnode_id,
+                vnode_data->nr_pages);
+        if (layout->type == XEN_DOM_NUMA_GUSET_NUMA)
+        {
+            char mapstr[128] = "";
+            struct xenctl_cpumap cpumap;
+            xc_cpumap_from_cpumask(&cpumap, &vnode_data->vcpu_mask);
+            xc_cpumap_snprintf(mapstr, sizeof(mapstr), cpumap);
+            dumpstr += sprintf(dumpstr, ", vcpu_mask(%s)", mapstr);
+        }
+        dumpstr += sprintf(dumpstr, "\n");
+    }
+
+    if (layout->type == XEN_DOM_NUMA_CLUSTER)
+        goto done;
+    dumpstr += sprintf(dumpstr, "vnode distances :\n");
+    for (i = 0; i < layout->nr_vnodes; i++)
+        dumpstr += sprintf(dumpstr, "\tvnode[%u]", i);
+    for (i = 0; i < layout->nr_vnodes; i++)
+    {
+        dumpstr += sprintf(dumpstr, "\nvnode[%u]", i);
+        for (j = 0; j < layout->nr_vnodes; j++)
+            dumpstr += sprintf(dumpstr, "\t%u",
+                            layout->vnode_distance[i*layout->nr_vnodes + j]);
+        dumpstr += sprintf(dumpstr, "\n");
+    }
+done:
+    IPRINTF("%s", xc_dump_str);
+    free(xc_dump_str);
+    return;
+}
+
+
+int xc_get_machine_numa_layout(xc_interface *xch, xc_machine_numa_layout_t *layout)
+{
+    uint32_t i, nr_nodes, nr_cpus;
+    xc_numainfo_t ninfo = { 0 };
+    uint64_t node_memsize[XC_MAX_NODES];
+    uint64_t node_memfree[XC_MAX_NODES];
+    xc_topologyinfo_t tinfo = { 0 };
+    uint32_t cpu_to_node[XC_CPUMASK_NR_CPUS];
+
+	memset(layout, 0, sizeof(*layout));
+	memset(node_memsize, 0, sizeof(uint64_t)*XC_MAX_NODES);
+	memset(node_memfree, 0, sizeof(uint64_t)*XC_MAX_NODES);
+
+    set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
+    set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
+    /* Read directly into layout's structure */
+    set_xen_guest_handle(ninfo.node_to_node_distance, layout->node_distance);
+    ninfo.max_node_index = XC_MAX_NODES-1;
+    if (xc_numainfo(xch, &ninfo))
+    {
+        ERROR("%s: xc_numainfo failed", __FUNCTION__);
+        return -1;
+    }
+    /* No need to check if a node is invalid, as in that case
+     * the size would be zero and it would never get selected*/
+    nr_nodes = ninfo.max_node_index + 1;
+    if ( nr_nodes > XC_MAX_NODES )
+        nr_nodes = XC_MAX_NODES;
+
+
+    set_xen_guest_handle(tinfo.cpu_to_core, NULL);
+    set_xen_guest_handle(tinfo.cpu_to_socket, NULL);
+    set_xen_guest_handle(tinfo.cpu_to_node, cpu_to_node);
+    tinfo.max_cpu_index = XC_CPUMASK_NR_CPUS-1;
+
+    if (xc_topologyinfo(xch, &tinfo))
+    {
+        ERROR("%s: xc_topologyinfo failed", __FUNCTION__);
+        return -1;
+    }
+
+    nr_cpus = tinfo.max_cpu_index+1;
+    if (nr_cpus > XC_CPUMASK_NR_CPUS)
+        nr_cpus = XC_CPUMASK_NR_CPUS;
+
+    layout->nr_nodes = nr_nodes;
+    for (i=0; i<nr_nodes; i++)
+    {
+        uint64_t size_pages, free_pages;
+        layout->node_data[i].node_id = i;
+        size_pages = (node_memsize[i] >> PAGE_SHIFT);
+        free_pages = (node_memfree[i] >> PAGE_SHIFT);
+        layout->node_data[i].size_pages = size_pages;
+        layout->node_data[i].free_pages = free_pages;
+        layout->size_pages += size_pages;
+        layout->free_pages += free_pages;
+    }
+
+    for (i=0; i<nr_cpus; i++)
+    {
+        struct xenctl_cpumap cpumap;
+        xc_cpumask_t *cpumask;
+
+        if (cpu_to_node[i] == INVALID_TOPOLOGY_ID)
+            continue;
+        cpumask = &(layout->node_data[(cpu_to_node[i])].cpu_mask);
+        xc_cpumap_from_cpumask(&cpumap, cpumask);
+        xc_cpumap_set_cpu(i, cpumap);
+    }
+    return 0;
+}
+
+static int
+xc_get_max_vcpus(xc_interface *xch, uint32_t domid)
+{
+    DECLARE_DOMCTL;
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)domid;
+    return ((do_domctl(xch, &domctl) < 0)
+            ? 0 : (domctl.u.getdomaininfo.max_vcpu_id+1));
+}
+
+/* The function makes a (greedy) best fit selection of num_vnodes of
+ * vnode_size each. The number of pages selected from each node are returned
+ * in the node_pages_selected array.
+ * The best_fit ranking is based on the fraction(up to 1024 parts) of node
+ * memory occupied, if the node is selected.
+ * Returns 0 on success and 1 if selection fails. */
+/* XXX: Node selection needs more research/experience. */
+static int xc_select_best_fit_nodes(
+        xc_interface *xch, xc_machine_numa_layout_t *phys_layout,
+        uint32_t num_vnodes, uint64_t vnode_pages, uint64_t *nodes_pages)
+{
+    int i, num_nodes_selected;
+    uint64_t best_fit_rank;
+
+    DBGPRINTF("%s: called\n", __FUNCTION__);
+#define INVALID_NODE (~0)
+#define NODE_FIT_RANK_SHIFT (10)
+    best_fit_rank = 0;
+	num_nodes_selected = 0;
+
+    do {
+        int selected_node = INVALID_NODE;
+        for (i=0; i<phys_layout->nr_nodes; i++)
+        {
+            xc_node_data_t *node_data;
+            uint64_t node_sizepages, node_freepages;
+            uint64_t node_fit_rank;
+
+            /* Node is already selected */
+            if (nodes_pages[i])
+                continue;
+
+            node_data = &phys_layout->node_data[i];
+            node_sizepages = node_data->size_pages;
+            node_freepages = node_data->free_pages;
+
+            if (node_freepages < vnode_pages)
+                continue;
+
+            node_fit_rank = ((node_sizepages-node_freepages-vnode_pages)
+                                    << NODE_FIT_RANK_SHIFT) / node_sizepages;
+
+            if (node_fit_rank > best_fit_rank)
+                selected_node = i;
+        }
+
+        /* Nodes could not be selected. Bail out ! */
+        if (selected_node == INVALID_NODE)
+            return -1;
+
+        nodes_pages[selected_node] = vnode_pages;
+        num_nodes_selected++;
+    } while(num_nodes_selected < num_vnodes);
+#undef NODE_FIT_RANK_SHIFT
+#undef INVALID_NODE
+    return 0;
+}
+
+/* Sort the phys nodes in the decreasing order of free node memory */
+static void xc_sort_nodeload(xc_machine_numa_layout_t *phys_layout)
+{
+    int i, j;
+    uint32_t nr_nodes;
+
+    nr_nodes = phys_layout->nr_nodes;
+
+    for (i = 0; i < nr_nodes; i++)
+    {
+        uint64_t i_node_free = phys_layout->node_data[i].free_pages; 
+        for (j = i+1; j < nr_nodes; j++)
+        {
+            uint64_t j_node_free = phys_layout->node_data[j].free_pages; 
+            if (i_node_free > j_node_free)
+            {
+                xc_node_data_t tmp_node_data;
+                tmp_node_data = phys_layout->node_data[i];
+                phys_layout->node_data[i] = phys_layout->node_data[j];
+                phys_layout->node_data[j] = tmp_node_data;
+            }
+        }
+    }
+
+    return;
+}
+
+/* The function selects the nodes in the increasing order of free node memory,
+ * and fills them. The physical memory map for such a domain is distrubuted 
+ * across all the selected nodes. 
+ * The phys_layout node_data structures could be sorted inplace. So, we 
+ * should always use node_data->node_id while using the node_distance array. 
+ * Returns the number of nodes selected. */
+static int xc_select_max_fit_nodes(
+        xc_interface *xch, xc_machine_numa_layout_t *phys_layout,
+                                    uint64_t dom_pages, uint64_t *node_pages)
+{
+    int i;
+    uint64_t dom_alloc_pages;
+
+    DBGPRINTF("%s: called\n", __FUNCTION__);
+    xc_sort_nodeload(phys_layout);
+
+    dom_alloc_pages = 0;
+    for (i=0; i<phys_layout->nr_nodes; i++)
+    {
+        xc_node_data_t *node_data;
+        uint64_t node_freepages;
+
+        node_data = &phys_layout->node_data[i];
+
+        /* In max-fit, if we try to pack the nodes too aggressively
+         * we might fail on any small allocation (from xen node heaps).
+		 * That's why, with DEFAULT, we don't use exact_node flag. */
+        node_freepages = node_data->free_pages;
+        if (!node_freepages)
+            continue;
+
+        if (node_freepages > (dom_pages-dom_alloc_pages))
+            node_freepages = (dom_pages-dom_alloc_pages);
+
+        node_pages[i] = node_freepages;
+        dom_alloc_pages += node_freepages;
+    }
+    if (dom_alloc_pages != dom_pages)
+    {
+        ERROR(
+                "%s: Failed to allocate memory. Maybe had to balloon more\n",
+                __FUNCTION__);
+        return -1;
+    }
+    return (i+1);
+}
+
+static int xc_setup_vnode_vcpu_masks(xc_domain_numa_layout_t *dom_layout)
+{
+    int vcpu;
+    for (vcpu=0; vcpu<dom_layout->nr_vcpus; vcpu++)
+    {
+        struct xenctl_cpumap vcpumap;
+        xc_cpumask_t *vcpumask;
+        int vnode = vcpu/(dom_layout->nr_vcpus/dom_layout->nr_vnodes);
+
+        vcpumask = &dom_layout->vnode_data[vnode].vcpu_mask;
+        xc_cpumap_from_cpumask(&vcpumap, vcpumask);
+        xc_cpumap_set_cpu(vcpu, vcpumap);
+    } 
+    return 0;    
+}
+
+static int xc_setup_vnode_distances(xc_machine_numa_layout_t *phys_layout, 
+                                        xc_domain_numa_layout_t *dom_layout)
+{
+    int vn1, vn2;
+    for (vn1=0; vn1<dom_layout->nr_vnodes; vn1++)
+    {
+        int n1 = dom_layout->vnode_data[vn1].mnode_id;
+        for (vn2=0; vn2<dom_layout->nr_vnodes; vn2++)
+        {
+            int n2 = dom_layout->vnode_data[vn2].mnode_id;
+            dom_layout->vnode_distance[(vn1*dom_layout->nr_vnodes)+vn2] =
+                phys_layout->node_distance[(n1*phys_layout->nr_nodes)+n2];
+        
+        }
+    }
+    return 0;
+}
+
+/* We require the vnodes to be aligned to 1GB 
+ * SHIFT values for 4K pages */
+#define XC_VNODE_MIN_SHIFT   (XEN_MIN_VNODE_SHIFT-PAGE_SHIFT)
+#define XC_VNODE_MIN_SIZE   (1UL << XC_VNODE_MIN_SHIFT)
+#define XC_VNODE_MIN_MASK ~(XC_VNODE_MIN_SIZE-1)
+/* Because we are strict with the alignment, we boost the size 
+ * to account for the pages not seen in physmap (by 16MB for now). */
+#define XC_VNODE_BOOST_SIZE (4096)
+#define XC_VCPUS_PER_VNODE (1)
+#define XC_POWER_OF_2(x) (((x) & ((x) - 1)) == 0)
+
+static int xc_setup_domain_vnodes(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout,
+	uint64_t *node_pages_selected)
+{
+	int i;
+    uint32_t vnode_id;
+
+    for (i=0, vnode_id=0; i<phys_layout->nr_nodes; i++)
+    {
+        xc_node_data_t *node_data;
+        xc_vnode_data_t *vnode_data;
+
+        if (!node_pages_selected[i])
+            continue;
+
+        node_data = &phys_layout->node_data[i];
+        vnode_data = &dom_layout->vnode_data[vnode_id];
+        vnode_data->vnode_id = vnode_id;
+        vnode_data->nr_pages = node_pages_selected[i];
+        vnode_data->mnode_id = node_data->node_id;
+        vnode_id++;
+    }
+    if (vnode_id != dom_layout->nr_vnodes)
+    {
+        ERROR("%s: Internal Error(vnode count mismatch) (%d/%d) !\n", 
+                                __FUNCTION__, vnode_id, dom_layout->nr_vnodes);
+        return -1;
+    }
+    /* vnodes are exposed to the guest only for GUEST NUMA. */
+    if (xc_setup_vnode_vcpu_masks(dom_layout) || 
+            (xc_setup_vnode_distances(phys_layout, dom_layout)))
+    {
+        ERROR("%s: vnode setup failed !\n", __FUNCTION__);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int xc_select_domain_prep(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+    if (!dom_layout->nr_vnodes)
+    {
+        ERROR("%s: VM nr_vnodes configured incorrectly !\n", __FUNCTION__);
+        return -1; 
+    }
+
+    if (dom_layout->nr_pages > phys_layout->free_pages)
+    {
+        ERROR(
+            "%s: Not enough memory for pv (unlikely after balloon checks)\n",
+                __FUNCTION__);
+        return -1;
+    }
+
+    if (!(dom_layout->nr_vcpus = xc_get_max_vcpus(xch, dom_layout->domid)))
+    {
+        ERROR("%s: xc_get_max_vcpus failed !\n", __FUNCTION__);
+        return -1; 
+    }
+
+    if (dom_layout->nr_vcpus > XC_CPUMASK_NR_CPUS)
+    {
+        ERROR("%s: Failed - More than %d vcpus!\n",
+                                            __FUNCTION__,  XC_CPUMASK_NR_CPUS);
+        return -1; 
+    }
+
+    if (dom_layout->nr_vcpus < dom_layout->nr_vnodes )
+    {
+        ERROR("%s: VM (%d) - more vcpus(%d) than vnodes(%d)!\n",
+                __FUNCTION__, dom_layout->domid, dom_layout->nr_vcpus,
+                dom_layout->nr_vnodes);
+        return -1; 
+    }
+
+    return 0;
+}
+
+static int xc_select_domain_cluster(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+    uint64_t *node_pages_selected = 0;
+    int rc;
+
+    DBGPRINTF("%s: Called for VM %d\n", __FUNCTION__, dom_layout->domid);
+    if ((rc = xc_select_domain_prep(xch, phys_layout, dom_layout)))
+        return -1;
+
+    if (!(node_pages_selected = 
+                (uint64_t *)calloc(XC_MAX_NODES, sizeof(uint64_t))))
+    {
+        rc = -1;
+        ERROR("%s: node_pages allocation failed\n", __FUNCTION__);
+    	goto failed;
+    }
+	if ((rc = xc_select_best_fit_nodes(xch, phys_layout, 1, 
+                    dom_layout->nr_pages, node_pages_selected)))
+    {
+        ERROR("%s: Not enough memory for CLUSTER (Had to balloon more ?)\n",
+                                                            __FUNCTION__);
+    	goto failed;
+    }
+
+    dom_layout->type = XEN_DOM_NUMA_CLUSTER;
+    rc = xc_setup_domain_vnodes(xch, phys_layout, dom_layout, 
+                                                    node_pages_selected);
+    if (!rc)
+        DBGPRINTF("%s: Selected CLUSTER for VM %d\n", 
+                                    __FUNCTION__, dom_layout->domid);
+failed:
+    if (node_pages_selected)
+        free(node_pages_selected);
+    return rc;
+}
+
+/* For the numa guests, we construct a symmetrical topology (wrt the 
+ * distribution of vcpus over vnodes).
+ * We require the numa guests to have (2^n) vcpus and (2^k) vnodes.
+ * Each vnode is then assigned 2^(n-k) vcpus, where (n>=k).
+ */
+static int xc_select_domain_guest_numa(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+    uint64_t vnode_nr_pages, *node_pages_selected = 0;
+    int rc;
+
+    DBGPRINTF("%s: Called for VM %d\n", __FUNCTION__, dom_layout->domid);
+    if ((rc = xc_select_domain_prep(xch, phys_layout, dom_layout)))
+        return -1;
+
+    if (!XC_POWER_OF_2(dom_layout->nr_vcpus))
+    {
+        ERROR("%s: #vcpus != 2^n (disable guest numa)\n", __FUNCTION__);
+		return -1;
+    }
+    if (!XC_POWER_OF_2(dom_layout->nr_vnodes))
+    {
+        ERROR("%s: #vnodes != 2^n (disable guest numa)\n", __FUNCTION__);
+		return -1;
+    }
+	if (dom_layout->nr_vcpus < (dom_layout->nr_vnodes*XC_VCPUS_PER_VNODE))
+	{
+        ERROR("%s: Failed - Not enough vcpus (%d on %d)!\n",
+				__FUNCTION__, dom_layout->nr_vcpus, dom_layout->nr_vnodes);
+        return -1; 
+	}
+
+	vnode_nr_pages = 
+        (dom_layout->nr_pages+XC_VNODE_BOOST_SIZE)/dom_layout->nr_vnodes;
+    vnode_nr_pages &= XC_VNODE_MIN_MASK;
+	if (vnode_nr_pages < XC_VNODE_MIN_SIZE)
+	{
+        ERROR("%s: vnode_size(%lx)<min(%lx), nr_pages(%lx), nr_vnodes(%d)!\n",
+				__FUNCTION__, vnode_nr_pages, XC_VNODE_MIN_SIZE,
+                dom_layout->nr_pages, dom_layout->nr_vnodes);
+        return -1; 
+	}
+    dom_layout->nr_pages = vnode_nr_pages*dom_layout->nr_vnodes;
+
+    if (!(node_pages_selected = 
+                (uint64_t *)calloc(XC_MAX_NODES, sizeof(uint64_t))))
+    {
+        rc = -1;
+        ERROR("%s: node_pages allocation failed\n", __FUNCTION__);
+    	goto failed;
+    }
+	if ((rc = xc_select_best_fit_nodes(xch, phys_layout, dom_layout->nr_vnodes, 
+                    vnode_nr_pages, node_pages_selected)) != 0)
+    {
+        ERROR("%s: Not enough memory for GUSET_NUMA (Had to balloon more ?)\n",
+                                                            __FUNCTION__);
+    	goto failed;
+    }
+
+    dom_layout->nr_pages = dom_layout->nr_vnodes*vnode_nr_pages;
+    dom_layout->type = XEN_DOM_NUMA_GUSET_NUMA;
+    if ((rc = xc_setup_domain_vnodes(xch, phys_layout, dom_layout, 
+                                                    node_pages_selected)))
+        goto failed;
+
+    if ((rc = xc_domain_setmaxmem(xch, dom_layout->domid, 
+            (dom_layout->nr_pages+XC_VNODE_BOOST_SIZE)<<(PAGE_SHIFT-10))))
+        goto failed;
+
+    DBGPRINTF("%s: Selected GUEST_NUMA for VM %d\n", 
+                                    __FUNCTION__, dom_layout->domid);
+failed:
+    if (node_pages_selected)
+        free(node_pages_selected);
+    return rc;
+}
+
+static int xc_select_domain_cross(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+    uint64_t vnode_nr_pages, *node_pages_selected = 0;
+    int rc;
+
+    DBGPRINTF("%s: Called for VM %d\n", __FUNCTION__, dom_layout->domid);
+    if ((rc = xc_select_domain_prep(xch, phys_layout, dom_layout)))
+        return -1;
+
+	vnode_nr_pages = dom_layout->nr_pages/dom_layout->nr_vnodes;
+
+    if (!(node_pages_selected = 
+                (uint64_t *)calloc(XC_MAX_NODES, sizeof(uint64_t))))
+    {
+        rc = -1;
+        ERROR("%s: node_pages allocation failed\n", __FUNCTION__);
+    	goto failed;
+    }
+	if ((rc = xc_select_best_fit_nodes(xch, phys_layout, dom_layout->nr_vnodes, 
+                    vnode_nr_pages, node_pages_selected)) != 0)
+    {
+        ERROR("%s: Not enough memory for CROSS (Had to balloon more ?)\n",
+                                                            __FUNCTION__);
+    	goto failed;
+    }
+
+    dom_layout->nr_pages = dom_layout->nr_vnodes*vnode_nr_pages;
+    dom_layout->type = XEN_DOM_NUMA_CROSS;
+    rc = xc_setup_domain_vnodes(xch, phys_layout, dom_layout, 
+                                                    node_pages_selected);
+    if (!rc)
+        DBGPRINTF("%s: Selected CROSS for VM %d\n", 
+                                    __FUNCTION__, dom_layout->domid);
+failed:
+    if (node_pages_selected)
+        free(node_pages_selected);
+    return rc;
+}
+
+static int xc_select_domain_dontcare(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+    uint64_t *node_pages_selected = 0;
+    int rc;
+
+    DBGPRINTF("%s: Called for VM %d\n", __FUNCTION__, dom_layout->domid);
+    if ((rc = xc_select_domain_prep(xch, phys_layout, dom_layout)))
+        return -1;
+
+    if (!(node_pages_selected = 
+                (uint64_t *)calloc(XC_MAX_NODES, sizeof(uint64_t))))
+    {
+        rc = -1;
+        ERROR("%s: node_pages allocation failed\n", __FUNCTION__);
+    	goto failed;
+    }
+	if ((rc = xc_select_max_fit_nodes(xch, phys_layout, dom_layout->nr_pages, 
+                                    node_pages_selected)) < 0)
+    {
+        ERROR("%s: Not enough memory for CLUSTER (Had to balloon more ?)\n",
+                                                            __FUNCTION__);
+    	goto failed;
+    }
+
+    dom_layout->type = XEN_DOM_NUMA_DONTCARE;
+    dom_layout->nr_vnodes = rc;
+    rc = xc_setup_domain_vnodes(xch, phys_layout, dom_layout, 
+                                                    node_pages_selected);
+    if (!rc)
+        DBGPRINTF("%s: Selected DONTCARE for VM %d\n", 
+                                    __FUNCTION__, dom_layout->domid);
+failed:
+    if (node_pages_selected)
+        free(node_pages_selected);
+    return rc;
+}
+
+#define XC_DOM_IS_NUMA_GUEST(n) (0)
+
+static int xc_select_domain_auto(xc_interface *xch,
+    xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *dom_layout)
+{
+	int i;
+
+    /* Attempt to cluster the VM */
+    DBGPRINTF("%s: Selecting allocation strategy for (VM %d)\n", 
+                                    __FUNCTION__, dom_layout->domid);
+
+    dom_layout->nr_vnodes = 1;
+	if (!xc_select_domain_cluster(xch, phys_layout, dom_layout))
+		return 0;
+
+    if (!XC_DOM_IS_NUMA_GUEST(dom_layout))
+        DBGPRINTF("%s: Image doesn't support numa (VM %d)\n", 
+                                    __FUNCTION__, dom_layout->domid);
+    else
+	{
+    	/* Attempt to show guest numa to the VM */
+    	for (i = 2; i <= phys_layout->nr_nodes; i<<=1)
+        {
+            dom_layout->nr_vnodes = i;
+			if (!xc_select_domain_guest_numa(xch, phys_layout, dom_layout))
+				return 0;
+        }
+	}
+
+  	/* Attempt to make the VM cross echo node*/
+   	for (i = 2; i <= phys_layout->nr_nodes; i++)
+    {
+        dom_layout->nr_vnodes = i;
+		if (!xc_select_domain_cross(xch, phys_layout, dom_layout))
+			return 0;
+    }
+
+	if (!xc_select_domain_dontcare(xch, phys_layout, dom_layout))
+		return 0;
+
+    ERROR("%s: Failed to allocate memory for the VM (Had to balloon more ?)\n",
+                                                            __FUNCTION__);
+    return -1;
+}
+
+int xc_setup_numa_domain(xc_interface *xch, xc_domain_numa_layout_t *dom_layout)
+{
+    int rc;
+    xc_machine_numa_layout_t *phys_layout;
+
+    DBGPRINTF("%s: called (mem_strategy:%d)\n",
+                                    __FUNCTION__, dom_layout->strategy);
+
+    if (!(phys_layout = malloc(sizeof(*phys_layout))))
+    {
+        ERROR( "%s: phys_layout allocation failed\n", __FUNCTION__);
+        return -1;
+    }
+
+    if ((rc = xc_get_machine_numa_layout(xch, phys_layout)))
+    {
+        ERROR( "%s: xc_get_machine_numa_layout failed\n", __FUNCTION__);
+        goto done;
+    }
+
+	switch (dom_layout->strategy)
+	{
+		case XC_DOM_NUMA_AUTO:
+			rc = xc_select_domain_auto(xch, phys_layout, dom_layout);
+			break;
+		case XC_DOM_NUMA_CLUSTER:
+            dom_layout->nr_vnodes = 1; /* In case configured bad */
+			rc = xc_select_domain_cluster(xch, phys_layout, dom_layout);
+			break;
+		case XC_DOM_NUMA_GUEST_NUMA:
+			rc = xc_select_domain_guest_numa(xch, phys_layout, dom_layout);
+			break;
+		case XC_DOM_NUMA_CROSS:
+			rc = xc_select_domain_cross(xch, phys_layout, dom_layout);
+			break;
+		default:
+			rc = -1;
+        	ERROR("%s: Unknown memory allocation strategy (%d)\n",
+								__FUNCTION__, dom_layout->strategy);
+	}
+
+	if (rc)
+ 	{
+       	ERROR("%s: xc_select_domain failed for (%d)\n", 
+				__FUNCTION__, dom_layout->strategy);
+       	goto done;
+   	}
+
+    xc_dump_dom_numa_layout(xch, dom_layout);
+done:
+    free(phys_layout);
+    return rc;
+}
+
+static int
+xc_domain_numa_vcpu_setaffinity(xc_interface *xch, uint32_t domid,
+                                int vcpu, struct xenctl_cpumap *cpumap)
+{
+    DECLARE_DOMCTL;
+    int ret = -1;
+
+    domctl.cmd = XEN_DOMCTL_setvcpuaffinity;
+    domctl.domain = (domid_t)domid;
+    domctl.u.vcpuaffinity.vcpu = vcpu;
+    domctl.u.vcpuaffinity.cpumap.bitmap = cpumap->bitmap;
+	domctl.u.vcpuaffinity.cpumap.nr_elems = cpumap->nr_cpus;
+
+    if ( xc_cpumap_lock_pages(cpumap) != 0 )
+    {
+        PERROR("Could not lock memory for Xen hypercall");
+        goto out;
+    }
+
+    ret = do_domctl(xch, &domctl);
+    xc_cpumap_unlock_pages(cpumap);
+ out:
+    return ret;
+}
+
+static int
+xc_domain_numa_pinvcpus_guest_numa(xc_interface *xch,
+                                xc_domain_numa_layout_t *dom_layout,
+                                xc_machine_numa_layout_t *phys_layout)
+{
+    int vnode;
+
+    for (vnode = 0; vnode < dom_layout->nr_vnodes; vnode++)
+    {
+        int vcpu;
+        int mnode = dom_layout->vnode_data[vnode].mnode_id;
+        xc_cpumask_t *node_cpumask =
+                    &phys_layout->node_data[mnode].cpu_mask;
+        xc_cpumask_t *vnode_vcpumask =
+                    &dom_layout->vnode_data[vnode].vcpu_mask;
+        struct xenctl_cpumap node_cpumap, vnode_vcpumap;
+
+        xc_cpumap_from_cpumask(&node_cpumap, node_cpumask);
+        xc_cpumap_from_cpumask(&vnode_vcpumap, vnode_vcpumask);
+        xc_for_each_cpu(vcpu, vnode_vcpumap)
+        {
+            if (xc_domain_numa_vcpu_setaffinity(
+                        xch, dom_layout->domid, vcpu, &node_cpumap)) 
+            {
+                ERROR( "%s:xc_vcpu_setaffinity failed\n", __FUNCTION__);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int
+xc_domain_numa_pinvcpus_cross(xc_interface *xch,
+                                xc_domain_numa_layout_t *dom_layout,
+                                xc_machine_numa_layout_t *phys_layout)
+{
+    int vnode, vcpu;
+    xc_cpumask_t cross_cpumask;
+    struct xenctl_cpumap cross_cpumap;
+
+    xc_cpumap_from_cpumask(&cross_cpumap, &cross_cpumask);
+    xc_cpumap_clearall(cross_cpumap);
+
+    for (vnode = 0; vnode < dom_layout->nr_vnodes; vnode++)
+    {
+        int mnode = dom_layout->vnode_data[vnode].mnode_id;
+        xc_cpumask_t *node_cpumask =
+                    &phys_layout->node_data[mnode].cpu_mask;
+        struct xenctl_cpumap node_cpumap;
+
+        xc_cpumap_from_cpumask(&node_cpumap, node_cpumask);
+        xc_cpumap_or(cross_cpumap, cross_cpumap, node_cpumap);
+    }
+
+    for (vcpu = 0; vcpu < dom_layout->nr_vcpus; vcpu++)
+    {
+        if (xc_domain_numa_vcpu_setaffinity(
+                    xch, dom_layout->domid, vcpu, &cross_cpumap)) 
+        {
+            ERROR( "%s:xc_scpu_getaffinity failed\n", __FUNCTION__);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+xc_domain_numa_pinvcpus(xc_interface *xch, xc_domain_numa_layout_t *dom_layout)
+{
+    int rc;
+
+    xc_machine_numa_layout_t *phys_layout;
+    if (!(phys_layout = malloc(sizeof(*phys_layout))))
+    {
+        ERROR( "%s: layout allocation failed\n", __FUNCTION__);
+        return -1;
+    }
+
+    if ((rc = xc_get_machine_numa_layout(xch, phys_layout)))
+    {
+        ERROR( "%s: xc_get_machine_numa_layout failed\n",
+                                                            __FUNCTION__);
+        goto done;
+    }
+
+    if ((dom_layout->type == XEN_DOM_NUMA_CROSS) || 
+						(dom_layout->type == XEN_DOM_NUMA_DONTCARE))
+        rc = xc_domain_numa_pinvcpus_cross(xch, dom_layout, phys_layout);
+    else
+        rc = xc_domain_numa_pinvcpus_guest_numa(xch, dom_layout, phys_layout);
+done:
+    free(phys_layout);
+    return rc;
+}
+
+#undef set_xen_guest_handle
diff --git a/tools/libxc/xc_dom_numa.h b/tools/libxc/xc_dom_numa.h
new file mode 100755
index 0000000..764dcba
--- /dev/null
+++ b/tools/libxc/xc_dom_numa.h
@@ -0,0 +1,115 @@
+#ifndef __XC_DOM_NUMA_H
+#define __XC_DOM_NUMA_H
+
+#include "xenctrl.h"
+#include <xen/dom_numa.h>
+
+#define XC_CPUMASK_NR_CPUS XEN_MAX_VCPUS
+#define XC_MAX_VNODES 8
+
+#define XC_CPUMASK_BITS_PER_BYTE 8
+#define XC_CPUMASK_BITS_TO_BYTES(bits) \
+    (((bits)+XC_CPUMASK_BITS_PER_BYTE-1)/XC_CPUMASK_BITS_PER_BYTE)
+#define XC_CPUMASK_DECLARE_BITMAP(name,bits) \
+    uint8_t name[XC_CPUMASK_BITS_TO_BYTES(bits)]
+
+struct xc_cpumask{ XC_CPUMASK_DECLARE_BITMAP(bits, XC_CPUMASK_NR_CPUS); };
+typedef struct xc_cpumask xc_cpumask_t;
+
+/* Construct a xenctl_cpumap structure using buffer from the xc_cpumask
+ * structure */
+#define xc_cpumap_from_cpumask(map, mask)               \
+do {                                                    \
+    (map)->nr_cpus = XC_CPUMASK_NR_CPUS;                    \
+    set_xen_guest_handle((map)->bitmap, (mask)->bits);  \
+}while(0)
+
+
+struct xc_vnode_data {
+    uint8_t vnode_id;
+    uint8_t mnode_id;
+    uint32_t nr_pages;
+    xc_cpumask_t vcpu_mask; /* vnode_to_vcpumask */
+};
+typedef struct xc_vnode_data xc_vnode_data_t;
+
+struct xc_domain_numa_layout {
+    uint8_t version;
+    uint8_t type;
+
+    uint8_t nr_vcpus;
+    uint8_t nr_vnodes;
+
+    uint32_t nr_pages;
+    /* Only (nr_vnodes) entries are filled */
+    xc_vnode_data_t vnode_data[XC_MAX_VNODES];
+    /* Only (nr_vnodes*nr_vnodes) entries are filled */
+    uint8_t vnode_distance[XC_MAX_VNODES*XC_MAX_VNODES];
+
+    /* For Internal USE only */
+    uint32_t domid;
+    uint16_t strategy;
+    uint16_t unit_size;
+};
+typedef struct xc_domain_numa_layout xc_domain_numa_layout_t;
+
+#define XC_MAX_NODES 16
+struct xc_node_data {
+    uint32_t node_id;
+    uint64_t size_pages;
+    uint64_t free_pages;
+    xc_cpumask_t cpu_mask; /* node_to_cpumask */
+};
+typedef struct xc_node_data xc_node_data_t;
+
+struct xc_machine_numa_layout {
+    uint64_t size_pages;
+    uint64_t free_pages;
+
+    uint32_t nr_nodes;
+
+    /* Only (nr_nodes*nr_nodes) entries are filled */
+    uint32_t node_distance[XC_MAX_NODES*XC_MAX_NODES];
+    /* Only (nr_nodes) entries are filled */
+    xc_node_data_t node_data[XC_MAX_NODES];
+};
+typedef struct xc_machine_numa_layout xc_machine_numa_layout_t;
+
+#define DEBUG_LAYOUT(layout) \
+{\
+	DEBUG_PRINT("layout(%s):\n",#layout);\
+	print_layout(layout);\
+}
+
+void print_layout(xc_domain_numa_layout_t *layout);
+
+#define DEBUG_M_LAYOUT(layout) \
+{\
+	DEBUG_PRINT("layout(%s):\n",#layout);\
+	print_machine_layout(layout);\
+}
+	
+void print_machine_layout(xc_machine_numa_layout_t *layout);
+
+extern xc_domain_numa_layout_t * xc_dom_alloc_numa_layout(xc_interface *xch, 
+        uint32_t domid, uint64_t nr_pages, xc_domain_numa_config_t *config);
+extern void xc_dom_free_numa_layout(xc_interface *xch, 
+                                        xc_domain_numa_layout_t *dom_layout);
+
+extern int 
+xc_setup_numa_domain(xc_interface *xch, xc_domain_numa_layout_t *dom_layout);
+extern int
+xc_domain_numa_pinvcpus(xc_interface *xch, xc_domain_numa_layout_t *dom_layout);
+
+static inline int xc_domain_nr_vnodes(xc_domain_numa_layout_t * dom_layout)
+{
+    if (!dom_layout || (dom_layout->type != XEN_DOM_NUMA_GUSET_NUMA))
+        return 0;
+    return dom_layout->nr_vnodes;
+}
+
+int xc_get_machine_numa_layout(xc_interface *xch, xc_machine_numa_layout_t *layout);
+
+void print_numa_info(xc_interface *xch);
+
+#endif
diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
index d619f88..aff7f08 100644
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -24,6 +24,8 @@
 
 #include "xg_private.h"
 #include "xc_private.h"
+#include "xc_dom_numa.h"
+#include "xc_cpumap.h"
 
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
@@ -46,7 +48,67 @@
 #define NR_SPECIAL_PAGES     5
 #define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
 
-static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
+#ifdef set_xen_guest_handle
+#undef set_xen_guest_handle
+#endif
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
+
+static int build_hvm_numa_info(struct hvm_info_table *hvm_info, 
+                                        xc_domain_numa_layout_t *dlayout)
+{
+    int i, j;
+    uint64_t vnode_pgstart;
+    struct xen_domain_numa_info *ninfo;
+    struct xen_vnode_info *ninfo_vnode_info;
+    uint8_t *ninfo_vcpu_to_vnode, *ninfo_vnode_distance;
+
+    ninfo = &hvm_info->numa_info[0];
+    ninfo->version = dlayout->version;
+    ninfo->type = dlayout->type;
+    ninfo->nr_vcpus = dlayout->nr_vcpus;
+    ninfo->nr_vnodes = dlayout->nr_vnodes;
+
+    ninfo_vnode_info = NUMA_INFO_VNODE_INFO(ninfo);
+    ninfo_vcpu_to_vnode = NUMA_INFO_VCPU_TO_VNODE(ninfo);
+    ninfo_vnode_distance = NUMA_INFO_VNODE_DISTANCE(ninfo);
+
+	for (i=0; i<ninfo->nr_vcpus; i++)
+		ninfo_vcpu_to_vnode[i] = XEN_INVALID_NODE;
+
+    for (i=0, vnode_pgstart=0; i<dlayout->nr_vnodes; i++)
+    {
+        uint64_t vnode_pgend;
+		struct xenctl_cpumap vnode_vcpumap;
+        xc_vnode_data_t *vnode_data = &dlayout->vnode_data[i];
+		xc_cpumask_t *vnode_vcpumask = &vnode_data->vcpu_mask;
+        struct xen_vnode_info *vnode_info = &ninfo_vnode_info[i];
+
+        vnode_info->mnode_id = vnode_data->mnode_id;
+        vnode_pgend = vnode_pgstart + vnode_data->nr_pages;
+        /* Account for hole in the memory map */
+        if ( (vnode_pgstart < hvm_info->low_mem_pgend) && 
+                            (vnode_pgend >= hvm_info->low_mem_pgend) )
+                vnode_pgend += ((1ull<<32) - HVM_BELOW_4G_RAM_END)>>PAGE_SHIFT;
+       
+        vnode_info->start = vnode_pgstart;
+        vnode_info->end = vnode_pgend;
+        vnode_pgstart = vnode_pgend;
+
+        xc_cpumap_from_cpumask(&vnode_vcpumap, vnode_vcpumask);
+        xc_for_each_cpu(j, vnode_vcpumap)
+            ninfo_vcpu_to_vnode[j] = i;
+    }
+
+    for (i=0; i<ninfo->nr_vnodes; i++)
+        for (j=0; j<ninfo->nr_vnodes; j++)
+            ninfo_vnode_distance[(i*ninfo->nr_vnodes)+j] =
+                    dlayout->vnode_distance[(i*ninfo->nr_vnodes)+j];
+
+    return NUMA_INFO_SIZE(ninfo);
+}
+
+static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, 
+                                        xc_domain_numa_layout_t *dom_layout)
 {
     struct hvm_info_table *hvm_info = (struct hvm_info_table *)
         (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
@@ -77,6 +139,12 @@ static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
     hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
     hvm_info->reserved_mem_pgstart = special_pfn(0);
 
+	 if ( dom_layout && ( dom_layout->type == XEN_DOM_NUMA_GUSET_NUMA))
+	 {
+		 hvm_info->numa_enabled = 1;
+		hvm_info->length += build_hvm_numa_info(hvm_info, dom_layout);
+	 }
+
     /* Finish with the checksum. */
     for ( i = 0, sum = 0; i < hvm_info->length; i++ )
         sum += ((uint8_t *)hvm_info)[i];
@@ -131,206 +199,295 @@ static int check_mmio_hole(uint64_t start, uint64_t memsize)
         return 1;
 }
 
-static int setup_guest(xc_interface *xch,
-                       uint32_t dom, int memsize, int target,
-                       char *image, unsigned long image_size)
+#define INVALID_NODE (~0)
+static int __setup_guest_memory(xc_interface *xch, uint32_t dom, 
+                        unsigned long nr_pages, unsigned long target_pages, 
+                        unsigned long cur_pages, xen_pfn_t *page_array, 
+                        int vga_hole, int node, int exact_node)
 {
-    xen_pfn_t *page_array = NULL;
-    unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
-    unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT);
-    unsigned long entry_eip, cur_pages, cur_pfn;
-    void *hvm_info_page;
-    uint32_t *ident_pt;
-    struct elf_binary elf;
-    uint64_t v_start, v_end;
-    int rc;
-    xen_capabilities_info_t caps;
-    unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, 
-        stat_1gb_pages = 0;
+    unsigned long i,cur_pfn,rc = 0;
+    unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, stat_1gb_pages = 0;
+	unsigned int mem_flags = 0;
     int pod_mode = 0;
 
-    /* An HVM guest must be initialised with at least 2MB memory. */
-    if ( memsize < 2 || target < 2 )
-        goto error_out;
-
-    if ( memsize > target )
+    if ( nr_pages > target_pages )
+    {
         pod_mode = 1;
+        mem_flags |= XENMEMF_populate_on_demand;
+    }
+	if (node != INVALID_NODE)
+        mem_flags |= exact_node?XENMEMF_exact_node(node):XENMEMF_node(node);
+	
+	/*
+	* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
+	*
+	* We attempt to allocate 1GB pages if possible. It falls back on 2MB
+	* pages if 1GB allocation fails. 4KB pages will be used eventually if
+	* both fail.
+	* 
+	* Under 2MB mode, we allocate pages in batches of no more than 8MB to 
+	* ensure that we can be preempted and hence dom0 remains responsive.
+	*/
+		 
+	if (vga_hole)
+    {
+		xc_domain_populate_physmap_exact(
+			xch, dom, 0xa0, mem_flags , 0, &page_array[0x00]);
+		cur_pages = 0xc0;
+		stat_normal_pages = 0xc0;
+	}
+		while ( (rc == 0) && (nr_pages > cur_pages) )
+		{
+			/* Clip count to maximum 1GB extent. */
+			unsigned long count = nr_pages - cur_pages;
+			unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
+	
+			if ( count > max_pages )
+				count = max_pages;
+	
+			cur_pfn = page_array[cur_pages];
+	
+			/* Take care the corner cases of super page tails */
+			if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+				 (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
+				count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
+			else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+					  (count > SUPERPAGE_1GB_NR_PFNS) )
+				count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
+	
+			/* Attemp to allocate 1GB super page. Because in each pass we only
+			 * allocate at most 1GB, we don't have to clip super page boundaries.
+			 */
+			if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
+				 /* Check if there exists MMIO hole in the 1GB memory range */
+				 !check_mmio_hole(cur_pfn << PAGE_SHIFT,
+								  SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
+			{
+				long done;
+				unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
+				xen_pfn_t sp_extents[nr_extents];
+	
+				for ( i = 0; i < nr_extents; i++ )
+					sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)];
+	
+				done = xc_domain_populate_physmap(xch, dom, nr_extents, SUPERPAGE_1GB_SHIFT,
+												  mem_flags,sp_extents);
+	
+				if ( done > 0 )
+				{
+					stat_1gb_pages += done;
+					done <<= SUPERPAGE_1GB_SHIFT;
+					cur_pages += done;
+					count -= done;
+				}
+			}
+	
+			if ( count != 0 )
+			{
+				/* Clip count to maximum 8MB extent. */
+				max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
+				if ( count > max_pages )
+					count = max_pages;
+				
+				/* Clip partial superpage extents to superpage boundaries. */
+				if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+					 (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
+					count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
+				else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+						  (count > SUPERPAGE_2MB_NR_PFNS) )
+					count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail */
+	
+				/* Attempt to allocate superpage extents. */
+				if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+				{
+					long done;
+					unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
+					xen_pfn_t sp_extents[nr_extents];
+	
+					for ( i = 0; i < nr_extents; i++ )
+						sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)];
+	
+					done = xc_domain_populate_physmap(xch, dom, nr_extents, SUPERPAGE_2MB_SHIFT,
+													  mem_flags, sp_extents);
+	
+					if ( done > 0 )
+					{
+						stat_2mb_pages += done;
+						done <<= SUPERPAGE_2MB_SHIFT;
+						cur_pages += done;
+						count -= done;
+					}
+				}
+			}
+	
+			/* Fall back to 4kB extents. */
+			if ( count != 0 )
+			{
+				rc = xc_domain_populate_physmap_exact(
+					xch, dom, count, 0, exact_node?XENMEMF_exact_node(node):XENMEMF_node(node), &page_array[cur_pages]);
+				cur_pages += count;
+				stat_normal_pages += count;
+			}
+		}
+	
+		/* Subtract 0x20 from target_pages for the VGA "hole".	Xen will
+		 * adjust the PoD cache size so that domain tot_pages will be
+		 * target_pages - 0x20 after this call. */
+		if ( pod_mode )
+			rc = xc_domain_set_pod_target(xch, dom, target_pages - 0x20,
+										  NULL, NULL, NULL);
+	
+		if ( rc != 0 )
+		{
+			PERROR("Could not allocate memory for HVM guest.");
+			goto error_out;
+		}
+	
+		IPRINTF("PHYSICAL MEMORY ALLOCATION (NODE %d):\n"
+				"  4KB PAGES: 0x%016lx\n"
+				"  2MB PAGES: 0x%016lx\n"
+				"  1GB PAGES: 0x%016lx\n",
+				node, stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
+		
+		return 0;
+		
+error_out:
+		if ( page_array )
+			free(page_array);
+		return -1;
+		
+}
 
-    memset(&elf, 0, sizeof(elf));
-    if ( elf_init(&elf, image, image_size) != 0 )
-        goto error_out;
-    elf_parse_binary(&elf);
-    v_start = 0;
-    v_end = (unsigned long long)memsize << 20;
-
-    if ( xc_version(xch, XENVER_capabilities, &caps) != 0 )
+static int setup_guest_numa_cross(xc_interface *xch,
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc=0;
+    unsigned long cur_pages, nr_pages;
+    /* Make a private copy for cross iterations */
+    xc_domain_numa_layout_t *layout;
+    if (!(layout = malloc(sizeof(*layout))))
     {
-        PERROR("Could not get Xen capabilities");
-        goto error_out;
+        PERROR("%s : Failed malloc.", __FUNCTION__);
+        return -1;
     }
+    memcpy(layout, dom_layout, sizeof(*layout));
 
-    if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            cur_pages<layout->nr_pages && !rc; vnode++)
     {
-        PERROR("Guest OS must load to a page boundary.");
-        goto error_out;
+        unsigned long allocsz;
+        xc_vnode_data_t *vnode_data;
+        while (!layout->vnode_data[vnode].nr_pages)
+        {
+            vnode++;
+            if (vnode >= layout->nr_vnodes)
+                vnode = 0;
+        }
+        vnode_data = &layout->vnode_data[vnode];
+        allocsz = layout->unit_size;
+        if (allocsz > vnode_data->nr_pages)
+            allocsz = vnode_data->nr_pages;
+
+        nr_pages = cur_pages + allocsz;
+        rc = __setup_guest_memory(xch, layout->domid, nr_pages, nr_pages, 
+                cur_pages, page_array, !cur_pages, vnode_data->mnode_id, 1);
+        vnode_data->nr_pages -= allocsz;
+        cur_pages = nr_pages;
     }
+    free(layout);
+    return rc;
+}
 
-    IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
-            "  Loader:        %016"PRIx64"->%016"PRIx64"\n"
-            "  TOTAL:         %016"PRIx64"->%016"PRIx64"\n"
-            "  ENTRY ADDRESS: %016"PRIx64"\n",
-            elf.pstart, elf.pend,
-            v_start, v_end,
-            elf_uval(&elf, elf.ehdr, e_entry));
+static int setup_guest_numa_memory(xc_interface *xch, 
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc;
+    unsigned long cur_pages, nr_pages;
 
-    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+    if ((rc = xc_setup_numa_domain(xch, dom_layout)))
+        goto setup_done;
+
+    if (dom_layout->type == XEN_DOM_NUMA_CROSS)
     {
-        PERROR("Could not allocate memory.");
-        goto error_out;
+        rc = setup_guest_numa_cross(xch, dom_layout, page_array);
+        goto setup_done;
     }
 
-    for ( i = 0; i < nr_pages; i++ )
-        page_array[i] = i;
-    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
-        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
-    /*
-     * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
-     *
-     * We attempt to allocate 1GB pages if possible. It falls back on 2MB
-     * pages if 1GB allocation fails. 4KB pages will be used eventually if
-     * both fail.
-     * 
-     * Under 2MB mode, we allocate pages in batches of no more than 8MB to 
-     * ensure that we can be preempted and hence dom0 remains responsive.
-     */
-    rc = xc_domain_populate_physmap_exact(
-        xch, dom, 0xa0, 0, 0, &page_array[0x00]);
-    cur_pages = 0xc0;
-    stat_normal_pages = 0xc0;
-    while ( (rc == 0) && (nr_pages > cur_pages) )
+    /* XXX: pod is turned off with NUMA allocation for now */
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            vnode<dom_layout->nr_vnodes && !rc; vnode++)
     {
-        /* Clip count to maximum 1GB extent. */
-        unsigned long count = nr_pages - cur_pages;
-        unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
-
-        if ( count > max_pages )
-            count = max_pages;
-
-        cur_pfn = page_array[cur_pages];
-
-        /* Take care the corner cases of super page tails */
-        if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
-             (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
-            count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
-        else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
-                  (count > SUPERPAGE_1GB_NR_PFNS) )
-            count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
-
-        /* Attemp to allocate 1GB super page. Because in each pass we only
-         * allocate at most 1GB, we don't have to clip super page boundaries.
-         */
-        if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
-             /* Check if there exists MMIO hole in the 1GB memory range */
-             !check_mmio_hole(cur_pfn << PAGE_SHIFT,
-                              SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
-        {
-            long done;
-            unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
-            xen_pfn_t sp_extents[nr_extents];
-
-            for ( i = 0; i < nr_extents; i++ )
-                sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)];
-
-            done = xc_domain_populate_physmap(xch, dom, nr_extents, SUPERPAGE_1GB_SHIFT,
-                                              pod_mode ? XENMEMF_populate_on_demand : 0,
-                                              sp_extents);
-
-            if ( done > 0 )
-            {
-                stat_1gb_pages += done;
-                done <<= SUPERPAGE_1GB_SHIFT;
-                cur_pages += done;
-                count -= done;
-            }
-        }
+        xc_vnode_data_t *vnode_data = &dom_layout->vnode_data[vnode];
 
-        if ( count != 0 )
-        {
-            /* Clip count to maximum 8MB extent. */
-            max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
-            if ( count > max_pages )
-                count = max_pages;
-            
-            /* Clip partial superpage extents to superpage boundaries. */
-            if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
-                 (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
-                count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
-            else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
-                      (count > SUPERPAGE_2MB_NR_PFNS) )
-                count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail */
-
-            /* Attempt to allocate superpage extents. */
-            if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
-            {
-                long done;
-                unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
-                xen_pfn_t sp_extents[nr_extents];
-
-                for ( i = 0; i < nr_extents; i++ )
-                    sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)];
-
-                done = xc_domain_populate_physmap(xch, dom, nr_extents, SUPERPAGE_2MB_SHIFT,
-                                                  pod_mode ? XENMEMF_populate_on_demand : 0,
-                                                  sp_extents);
-
-                if ( done > 0 )
-                {
-                    stat_2mb_pages += done;
-                    done <<= SUPERPAGE_2MB_SHIFT;
-                    cur_pages += done;
-                    count -= done;
-                }
-            }
-        }
-
-        /* Fall back to 4kB extents. */
-        if ( count != 0 )
-        {
-            rc = xc_domain_populate_physmap_exact(
-                xch, dom, count, 0, 0, &page_array[cur_pages]);
-            cur_pages += count;
-            stat_normal_pages += count;
-        }
+        nr_pages = cur_pages + vnode_data->nr_pages;
+        rc = __setup_guest_memory(xch, dom_layout->domid, nr_pages, nr_pages,
+                    cur_pages, page_array, (vnode == 0), vnode_data->mnode_id, 
+                    (dom_layout->type != XEN_DOM_NUMA_DONTCARE));
+        cur_pages = nr_pages;
     }
+setup_done:
+    if (!rc)
+        rc = xc_domain_numa_pinvcpus(xch, dom_layout);
+    if (!rc)
+        rc = xc_domain_disable_migrate(xch, dom_layout->domid);
+    return rc;
+}
 
-    /* Subtract 0x20 from target_pages for the VGA "hole".  Xen will
-     * adjust the PoD cache size so that domain tot_pages will be
-     * target_pages - 0x20 after this call. */
-    if ( pod_mode )
-        rc = xc_domain_set_pod_target(xch, dom, target_pages - 0x20,
-                                      NULL, NULL, NULL);
+static int setup_guest_nonnuma_memory(xc_interface *xch, uint32_t domid,
+                unsigned long nr_pages, unsigned long target_pages, 
+                xen_pfn_t *page_array)
+{
+    return __setup_guest_memory(xch, domid, nr_pages, target_pages, 0, 
+            page_array, 1, INVALID_NODE, 0);
+}
+
+static int setup_guest_memory(xc_interface *xch, uint32_t dom,
+                            xc_domain_numa_layout_t *dom_layout,
+                            unsigned long nr_pages, unsigned long target_pages,
+                            struct elf_binary *elf)
+{
+    xen_pfn_t *page_array = NULL;
+    unsigned long i;
+    int rc;
 
-    if ( rc != 0 )
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
     {
-        PERROR("Could not allocate memory for HVM guest.");
-        goto error_out;
+        rc = -1;
+        PERROR("Could not allocate memory.");
+        goto out;
     }
 
-    IPRINTF("PHYSICAL MEMORY ALLOCATION:\n"
-            "  4KB PAGES: 0x%016lx\n"
-            "  2MB PAGES: 0x%016lx\n"
-            "  1GB PAGES: 0x%016lx\n",
-            stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
-    
-    if ( loadelfimage(xch, &elf, dom, page_array) != 0 )
-        goto error_out;
+    for ( i = 0; i < nr_pages; i++ )
+        page_array[i] = i;
+    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
+        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+    if ( dom_layout )
+        rc = setup_guest_numa_memory(xch, dom_layout, page_array);
+    else
+        rc = setup_guest_nonnuma_memory(xch, dom, 
+                                nr_pages, target_pages, page_array);
+    if ( rc )
+        goto out;
+    rc = loadelfimage(xch, elf, dom, page_array);
+out:
+    if ( page_array )
+        free(page_array);
+    return rc;
+}
 
-    if ( (hvm_info_page = xc_map_foreign_range(
+static int setup_guest_special_pages(xc_interface *xch, uint32_t dom, 
+                    uint64_t memsize, xc_domain_numa_layout_t *dom_layout)
+{
+    void *hvm_info_page;
+    uint32_t *ident_pt;
+    unsigned long i;
+	int rc=0;
+	if ( (hvm_info_page = xc_map_foreign_range(
               xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
               HVM_INFO_PFN)) == NULL )
         goto error_out;
-    build_hvm_info(hvm_info_page, v_end);
+    build_hvm_info(hvm_info_page, memsize, dom_layout);
     munmap(hvm_info_page, PAGE_SIZE);
 
     /* Allocate and clear special pages. */
@@ -370,6 +527,62 @@ static int setup_guest(xc_interface *xch,
     munmap(ident_pt, PAGE_SIZE);
     xc_set_hvm_param(xch, dom, HVM_PARAM_IDENT_PT,
                      special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+	return 0;
+	error_out:
+    	return -1;
+}
+
+static int setup_guest(xc_interface *xch,
+                       uint32_t dom, int memsize, int target,
+                       xc_domain_numa_layout_t  *dom_layout,
+                       char *image, unsigned long image_size)
+{
+    unsigned long entry_eip;
+    struct elf_binary elf;
+    uint64_t v_start, v_end;
+    int rc;
+    xen_capabilities_info_t caps;
+
+    /* An HVM guest must be initialised with at least 2MB memory. */
+    if ( memsize < 2 || target < 2 )
+        goto error_out;
+
+    memset(&elf, 0, sizeof(elf));
+    if ( elf_init(&elf, image, image_size) != 0 )
+        goto error_out;
+    elf_parse_binary(&elf);
+    v_start = 0;
+    v_end = (unsigned long long)memsize << 20;
+
+    if ( xc_version(xch, XENVER_capabilities, &caps) != 0 )
+    {
+        PERROR("Could not get Xen capabilities");
+        goto error_out;
+    }
+
+    if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
+    {
+        PERROR("Guest OS must load to a page boundary.");
+        goto error_out;
+    }
+
+    IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
+            "  Loader:        %016"PRIx64"->%016"PRIx64"\n"
+            "  TOTAL:         %016"PRIx64"->%016"PRIx64"\n"
+            "  ENTRY ADDRESS: %016"PRIx64"\n",
+            elf.pstart, elf.pend,
+            v_start, v_end,
+            elf_uval(&elf, elf.ehdr, e_entry));
+
+    rc = setup_guest_memory(xch, dom, dom_layout, 
+                    (unsigned long)memsize << (20 - PAGE_SHIFT),
+                    (unsigned long)target << (20 - PAGE_SHIFT), &elf);
+    if ( rc < 0 )
+        goto error_out;
+
+    rc = setup_guest_special_pages(xch, dom, v_end, dom_layout);
+    if ( rc < 0 )
+        goto error_out;
 
     /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
     entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
@@ -384,11 +597,9 @@ static int setup_guest(xc_interface *xch,
         munmap(page0, PAGE_SIZE);
     }
 
-    free(page_array);
     return 0;
 
  error_out:
-    free(page_array);
     return -1;
 }
 
@@ -396,16 +607,27 @@ static int xc_hvm_build_internal(xc_interface *xch,
                                  uint32_t domid,
                                  int memsize,
                                  int target,
+                                 xc_domain_numa_config_t *numa_config,
                                  char *image,
                                  unsigned long image_size)
 {
+	int rc;
+	xc_domain_numa_layout_t  *dom_layout = 0;
+
     if ( (image == NULL) || (image_size == 0) )
     {
         ERROR("Image required");
         return -1;
     }
 
-    return setup_guest(xch, domid, memsize, target, image, image_size);
+	 if ( numa_config )
+		 dom_layout = xc_dom_alloc_numa_layout(xch, domid, 
+						 (uint64_t)memsize << (20 - PAGE_SHIFT), numa_config);
+	 rc = setup_guest(xch, domid, memsize, target, dom_layout,
+														 image, image_size);
+	 if ( dom_layout )
+		 xc_dom_free_numa_layout(xch, dom_layout);
+	 return rc;
 }
 
 /* xc_hvm_build:
@@ -424,7 +646,7 @@ int xc_hvm_build(xc_interface *xch,
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, image, image_size);
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL, image, image_size);
 
     free(image);
 
@@ -442,6 +664,7 @@ int xc_hvm_build_target_mem(xc_interface *xch,
                            uint32_t domid,
                            int memsize,
                            int target,
+                           xc_domain_numa_config_t *numa_config,
                            const char *image_name)
 {
     char *image;
@@ -452,7 +675,7 @@ int xc_hvm_build_target_mem(xc_interface *xch,
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xch, domid, memsize, target, image, image_size);
+    sts = xc_hvm_build_internal(xch, domid, memsize, target, numa_config, image, image_size);
 
     free(image);
 
@@ -487,7 +710,7 @@ int xc_hvm_build_mem(xc_interface *xch,
         return -1;
     }
 
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize,
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL,
                                 img, img_len);
 
     /* xc_inflate_buffer may return the original buffer pointer (for
@@ -499,6 +722,8 @@ int xc_hvm_build_mem(xc_interface *xch,
     return sts;
 }
 
+#undef set_xen_guest_handle
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index 09c8f23..2dbba3c 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -47,6 +47,19 @@
  *  0 - on success
  * -1 - on error
  */
+
+void print_xch(xc_interface *xch)
+{
+	printf("\ncurrently_progress_reporting=%s\n",xch->currently_progress_reporting);
+	printf("last_error=%s\n",xch->last_error.message);
+	printf("hypercall_buffer_cache_nr=%d\n",xch->hypercall_buffer_cache_nr);
+	printf("hypercall_buffer_total_allocations=%d\n",xch->hypercall_buffer_total_allocations);
+	printf("hypercall_buffer_maximum_allocations=%d\n",xch->hypercall_buffer_maximum_allocations);
+	printf("hypercall_buffer_cache_hits=%d\n",xch->hypercall_buffer_cache_hits);
+	printf("hypercall_buffer_cache_misses=%d\n",xch->hypercall_buffer_cache_misses);
+	printf("hypercall_buffer_cache_toobig=%d\n",xch->hypercall_buffer_cache_toobig);
+}
+
 static int xc_osdep_get_info(xc_interface *xch, xc_osdep_info_t *info)
 {
     int rc = -1;
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 3687561..53902ec 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -102,6 +102,7 @@ struct xc_interface_core {
     xc_osdep_handle  ops_handle; /* opaque data for xc_osdep_ops */
 };
 
+void print_xch(xc_interface *xch);
 void xc_report_error(xc_interface *xch, int code, const char *fmt, ...);
 void xc_reportv(xc_interface *xch, xentoollog_logger *lg, xentoollog_level,
                 int code, const char *fmt, va_list args)
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index dc2561e..2833d59 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -122,7 +122,6 @@
 
 #define DEBUG_MARK() DEBUG_PRINT("mark(line:%d)\n",__LINE__)
 
-
 typedef struct xc_interface_core xc_interface;
 typedef struct xc_interface_core xc_evtchn;
 typedef struct xc_interface_core xc_gnttab;
@@ -414,6 +413,24 @@ typedef union
     start_info_t s;
 } start_info_any_t;
 
+/**
+ * struct xc_dom_numa_info : Carries information required for NUMA memory 
+ * allocation for the guests.
+ */
+#define XC_DOM_NUMA_AUTO     	0  /* Let the allocator choose */
+#define XC_DOM_NUMA_CLUSTER 	1
+#define XC_DOM_NUMA_GUEST_NUMA  2
+#define XC_DOM_NUMA_CROSS   	3
+#define XC_DOM_NUMA_NONE     	4
+
+#define XC_DOM_NUMA_DEF_UNIT_SIZE 32    /* in 4K pages */  
+
+typedef struct xc_domain_numa_config
+{
+    uint32_t strategy;      /* By default, DONTCARE (for now) */
+    uint32_t nr_nodes;      /* For GUEST_NUMA/CROSS */
+    uint32_t unit_size;   /* For CROSS only */
+} xc_domain_numa_config_t;
 
 int xc_domain_create(xc_interface *xch,
                      uint32_t ssidref,
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 9ed0ea4..a87ca6f 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -173,6 +173,7 @@ int xc_hvm_build_target_mem(xc_interface *xch,
                             uint32_t domid,
                             int memsize,
                             int target,
+							xc_domain_numa_config_t *numa_config,
                             const char *image_name);
 
 int xc_hvm_build_mem(xc_interface *xch,
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 0dc6319..a527152 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -136,6 +136,8 @@
 
 #define LIBXL_DTOR_POISON 0xa5
 
+typedef xc_domain_numa_config_t libxl_domain_numa_config;
+
 typedef uint8_t libxl_mac[6];
 
 typedef char **libxl_string_list;
diff --git a/tools/libxl/libxl.idl b/tools/libxl/libxl.idl
index 0c777d7..11f036c 100644
--- a/tools/libxl/libxl.idl
+++ b/tools/libxl/libxl.idl
@@ -6,6 +6,7 @@
 libxl_ctx = Builtin("ctx")
 libxl_uuid = Builtin("uuid")
 libxl_mac = Builtin("mac")
+xc_domain_numa_config = Builtin("domain_numa_config");
 libxl_cpumap = Builtin("cpumap", destructor_fn="libxl_cpumap_destroy", passby=PASS_BY_REFERENCE)
 libxl_nodemap = Builtin("nodemap", destructor_fn="libxl_nodemap_destroy", passby=PASS_BY_REFERENCE)
 libxl_cpuarray = Builtin("cpuarray", destructor_fn="libxl_cpuarray_destroy", passby=PASS_BY_REFERENCE)
@@ -102,6 +103,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("disable_migrate", bool),
     ("kernel",          libxl_file_reference),
     ("cpuid",           libxl_cpuid_policy_list),
+    ("numa_config",		xc_domain_numa_config),
     ("hvm",             integer),
     ("u", KeyedUnion(None, "hvm",
                 [("hvm", "%s", Struct(None,
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 22e6006..face10d 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -103,6 +103,7 @@ void libxl_init_build_info(libxl_domain_build_info *b_info, libxl_domain_create_
     } else {
         b_info->u.pv.slack_memkb = 8 * 1024;
     }
+		b_info->numa_config.strategy = XC_DOM_NUMA_NONE;
 }
 
 void libxl_init_dm_info(libxl_device_model_info *dm_info,
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 0413dfd..8046ef6 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -901,6 +901,7 @@ int libxl__build_hvm(libxl_ctx *ctx, uint32_t domid,
         domid,
         (info->max_memkb - info->video_memkb) / 1024,
         (info->target_memkb - info->video_memkb) / 1024,
+        &info->numa_config,
         libxl__abs_path(&gc, (char *)info->kernel.path,
                        libxl_xenfirmwaredir_path()));
     if (ret) {
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 083efc9..006dc7b 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -339,6 +339,37 @@ static void dolog(const char *file, int line, const char *func, char *fmt, ...)
         libxl_write_exactly(NULL, logfile, s, rc, NULL, NULL);
 }
 
+static char *numa_val_to_str(uint32_t val)
+{
+    switch (val)
+    {
+        case XC_DOM_NUMA_AUTO:
+                return "AUTO";
+        case XC_DOM_NUMA_CLUSTER:
+                return "CLUSTER";
+        case XC_DOM_NUMA_GUEST_NUMA:
+                return "GUEST NUMA";
+        case XC_DOM_NUMA_CROSS:
+                return "CROSS";
+        default:
+                return "NONE";
+    }
+}
+
+static uint32_t numa_str_to_val(const char *str)
+{
+    if (!strcasecmp(str, "AUTO"))
+        return XC_DOM_NUMA_AUTO;
+    if (!strcasecmp(str, "CLUSTER"))
+        return XC_DOM_NUMA_CLUSTER;
+    if (!strcasecmp(str, "GUEST_NUMA")||!strcasecmp(str, "GUEST NUMA")||!strcasecmp(str, "GUESTNUMA"))
+        return XC_DOM_NUMA_GUEST_NUMA;
+    if (!strcasecmp(str, "CROSS"))
+        return XC_DOM_NUMA_CROSS;
+
+    return XC_DOM_NUMA_NONE;
+}
+
 static void printf_info(int domid,
                         libxl_domain_config *d_config,
                         libxl_device_model_info *dm_info)
@@ -383,6 +414,10 @@ static void printf_info(int domid,
     printf("\t(max_memkb %d)\n", b_info->max_memkb);
     printf("\t(target_memkb %d)\n", b_info->target_memkb);
     printf("\t(nomigrate %d)\n", b_info->disable_migrate);
+    printf("\t(numa_strategy %s)\n", 
+                            numa_val_to_str(b_info->numa_config.strategy));
+    printf("\t(numa_nodes %d)\n", b_info->numa_config.nr_nodes);
+    printf("\t(unit_size %d)\n", b_info->numa_config.unit_size);
 
     if (!c_info->hvm && b_info->u.pv.bootloader) {
         printf("\t(bootloader %s)\n", b_info->u.pv.bootloader);
@@ -1276,6 +1311,8 @@ static void parse_config_data(const char *configfile_filename_report,
         fprintf(stderr, "Illegal pool specified\n");
         exit(1);
     }
+	
+    libxl_init_build_info(b_info, c_info);
 
     /* the following is the actual config parsing with overriding values in the structures */
     if (!xlu_cfg_get_long (config, "vcpus", &l)) {
@@ -1465,6 +1502,14 @@ static void parse_config_data(const char *configfile_filename_report,
     if (!xlu_cfg_get_long (config, "videoram", &l))
         b_info->video_memkb = l * 1024;
 
+    if (!xlu_cfg_get_string (config, "strategy", &buf)) {
+        b_info->numa_config.strategy = numa_str_to_val(buf);
+        if (!xlu_cfg_get_long (config, "vnodes", &l))
+            b_info->numa_config.nr_nodes = l;
+        if (!xlu_cfg_get_long (config, "uintsz", &l))
+            b_info->numa_config.unit_size = l;
+    }
+
     xlu_cfg_replace_string (config, "kernel", &b_info->kernel.path);
 
     if (!xlu_cfg_get_long (config, "gfx_passthru", &l))
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 2600b90..4820288 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -981,7 +981,7 @@ static PyObject *pyxc_hvm_build(XcObject *self,
         target = memsize;
 
     if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
-                                 target, image) != 0 )
+                                 target, NULL, image) != 0 )
         return pyxc_error_to_exception(self->xc_handle);
 
 #if !defined(__ia64__)
diff --git a/tools/python/xen/lowlevel/xl/xl.c b/tools/python/xen/lowlevel/xl/xl.c
index 1b55937..746f5dc 100644
--- a/tools/python/xen/lowlevel/xl/xl.c
+++ b/tools/python/xen/lowlevel/xl/xl.c
@@ -398,6 +398,16 @@ PyObject *attrib__struct_in_addr_get(struct in_addr *pptr)
     return NULL;
 }
 
+int attrib__libxl_domain_numa_config_set(PyObject *v, libxl_domain_numa_config *pptr)
+{
+	return 0;
+}
+
+PyObject *attrib__libxl_domain_numa_config_get(libxl_domain_numa_config *pptr)
+{
+	return NULL;
+}
+
 typedef struct {
     PyObject_HEAD;
     libxl_ctx ctx;
diff --git a/xen/include/public/arch-x86/dom_numa.h b/xen/include/public/arch-x86/dom_numa.h
new file mode 100755
index 0000000..c377ac9
--- /dev/null
+++ b/xen/include/public/arch-x86/dom_numa.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * dom_numa.h
+ *
+ * Guest NUMA common structures.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Author : Lab309
+ */
+#ifndef __XEN_PUBLIC_DOM_NUMA_X86_H__
+#define __XEN_PUBLIC_DOM_NUMA_X86_H__
+
+#define XEN_MAX_VCPUS 128
+
+/* vnodes are 1GB-aligned */
+#define XEN_MIN_VNODE_SHIFT (30)
+#define XEN_INVALID_NODE (0xFF)
+
+struct xen_vnode_info {
+    uint8_t mnode_id; 	/* physical node vnode is allocated from */
+    uint32_t start; 	/* start of the vnode range (in pages) */
+    uint32_t end; 		/* end of the vnode range (in pages) */
+};
+
+/* version : Interface version */
+#define XEN_DOM_NUMA_INTERFACE_VERSION  0x01
+
+/* type : On NUMA platforms, the VM memory could be distributed across 
+ * nodes in different ways.
+ */
+#define XEN_DOM_NUMA_CLUSTER   		0x01 /* Non-NUMA VM clustered to a node */
+#define XEN_DOM_NUMA_GUSET_NUMA    	0x02 /* NUMA VM guest numa across nodes */
+#define XEN_DOM_NUMA_CROSS		    0x03 /* Non-NUMA VM distributed across nodes */
+#define XEN_DOM_NUMA_DONTCARE   	0x04 /* Ad-hoc allocation */
+
+/* xen_domain_numa_info : 
+ * For PV VMs, this is the NUMA enlightenment structure.
+ * For HVMs, this structure is shared with the domain builder (hvmloader).
+ * Size of data[] depends on nr_vnodes and nr_vcpus.
+ */
+
+/* Macros to access data structures in dynamic data[] field.
+ * nr_vcpus and nr_vnodes must be initialized in the xen_domain_numa_info
+ * structure before calling these macros. */
+#define NUMA_INFO_SIZE(pinfo)                                           \
+            (sizeof(*pinfo)                                             \
+                + pinfo->nr_vnodes*sizeof(struct xen_vnode_info)        \
+                + pinfo->nr_vcpus*sizeof(uint8_t)                       \
+                + pinfo->nr_vnodes*pinfo->nr_vnodes*sizeof(uint8_t))
+
+#define NUMA_INFO_VNODE_INFO(pinfo)                                     \
+            (struct xen_vnode_info *)((uint8_t *)pinfo + sizeof(*pinfo))
+		
+#define NUMA_INFO_VCPU_TO_VNODE(pinfo)                                  \
+            (uint8_t *)((uint8_t *)NUMA_INFO_VNODE_INFO(pinfo)          \
+                + pinfo->nr_vnodes*sizeof(struct xen_vnode_info))
+
+#define NUMA_INFO_VNODE_DISTANCE(pinfo)                                 \
+            (uint8_t *)((uint8_t *)NUMA_INFO_VCPU_TO_VNODE(pinfo)       \
+                + pinfo->nr_vcpus*sizeof(uint8_t))
+
+struct xen_domain_numa_info {
+    uint8_t version;    /* Interface version */
+    uint8_t type;       /* VM memory allocation scheme (see above) */
+
+    uint8_t nr_vcpus;
+    uint8_t nr_vnodes;
+    /* data[] has the following entries :
+     * //Only (nr_vnodes) entries are filled, each sizeof(struct xen_vnode_info)
+     * struct xen_vnode_info vnode_info[nr_vnodes]; 
+     * //Only (nr_vcpus) entries are filled, each sizeof(uint8_t)
+     * uint8_t vcpu_to_vnode[nr_vcpus];
+     * //Only (nr_vnodes*nr_vnodes) entries are filled, each sizeof(uint8_t)
+     * uint8_t vnode_distance[nr_vnodes*nr_vnodes];
+     */
+	uint8_t data[0];
+};
+
+#endif
diff --git a/xen/include/public/dom_numa.h b/xen/include/public/dom_numa.h
new file mode 100755
index 0000000..d9750c0
--- /dev/null
+++ b/xen/include/public/dom_numa.h
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * dom_numa.h
+ *
+ * Guest NUMA common structures.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Author : Lab309
+ */
+
+#ifndef __XEN_PUBLIC_DOM_NUMA_H
+#define __XEN_PUBLIC_DOM_NUMA_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "./arch-x86/dom_numa.h"
+#else
+#error "unsupported architecture"
+#endif
+
+
+#endif
diff --git a/xen/include/public/hvm/hvm_info_table.h b/xen/include/public/hvm/hvm_info_table.h
index bdb5995..6c3d4cc 100644
--- a/xen/include/public/hvm/hvm_info_table.h
+++ b/xen/include/public/hvm/hvm_info_table.h
@@ -25,12 +25,14 @@
 #ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
 #define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
 
+#include "../dom_numa.h"
+
 #define HVM_INFO_PFN         0x09F
 #define HVM_INFO_OFFSET      0x800
 #define HVM_INFO_PADDR       ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
 
 /* Maximum we can support with current vLAPIC ID mapping. */
-#define HVM_MAX_VCPUS        128
+#define HVM_MAX_VCPUS        XEN_MAX_VCPUS
 
 struct hvm_info_table {
     char        signature[8]; /* "HVM INFO" */
@@ -70,6 +72,12 @@ struct hvm_info_table {
 
     /* Bitmap of which CPUs are online at boot time. */
     uint8_t     vcpu_online[(HVM_MAX_VCPUS + 7)/8];
+
+    /* Domain NUMA memory distribution. Size of this structure should be 
+     * obtained using the macro XEN_DOMAIN_NUMA_INFO_SIZE(numa_info).
+     */
+    uint8_t numa_enabled; /* numa_info is populated only if numa_enabled != 0 */
+    struct xen_domain_numa_info numa_info[0];
 };
 
 #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index e8f0532..eee1e84 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -713,6 +713,7 @@ struct xenctl_bitmap {
     XEN_GUEST_HANDLE_64(uint8) bitmap;
     uint32_t nr_elems;
 };
+
 #endif
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: about the guest(Redhat6.3) shows white screen, but the suse, ubuntu is ok
  2013-07-21 14:41 about the guest(Redhat6.3) shows white screen, but the suse, ubuntu is ok butine
@ 2013-07-22 11:32 ` Dario Faggioli
  2013-07-23  6:26   ` Matt Wilson
  0 siblings, 1 reply; 3+ messages in thread
From: Dario Faggioli @ 2013-07-22 11:32 UTC (permalink / raw)
  To: butine; +Cc: Elena Ufimtseva, Matt Wilson, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1587 bytes --]

On dom, 2013-07-21 at 22:41 +0800, butine@zju.edu.cn wrote:
> hello,Dario,
> 
Hi,

> I have implemented the HVM guest NUMA.
>
Oh, really? Well, that would be really nice indeed. I'm on vacation till
Wednesday, so I'll take a look at you patch on Thursday (or even
later :-( ).

I'm Cc-ing Matt, which was also looking into it. I'm also Cc-ing Elena
which (if I understood Butine's work correctly), is looking into pretty
much the same thing, although for PV.

One thing I can already tell you is that this is a really big patch, and
it'd be really really uncomfortable to review it like this... Could you
perhaps split it into smaller patches and send a proper series?

Take a look at other patch series submissions on the xen-devel mailing
list...

> After I created the guest(Redhat6.3),it would show white screen when
> guest started.But the suse,ubuntu is ok.why??
> 
Mmm.. No idea. :-(

Actually, I'm not even sure I understood what happens, to properly try
to help in debugging any issue, I think we'd require much more
information and some logs.

For instance, have a look here:
http://blog.xen.org/index.php/2013/06/04/reporting-a-bug-against-the-xen-hypervisor/

Nice to see that you're working on this though, and thanks for sharing
the code! :-)

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: about the guest(Redhat6.3) shows white screen, but the suse, ubuntu is ok
  2013-07-22 11:32 ` Dario Faggioli
@ 2013-07-23  6:26   ` Matt Wilson
  0 siblings, 0 replies; 3+ messages in thread
From: Matt Wilson @ 2013-07-23  6:26 UTC (permalink / raw)
  To: Dario Faggioli; +Cc: Elena Ufimtseva, butine, xen-devel

On Mon, Jul 22, 2013 at 12:32:10PM +0100, Dario Faggioli wrote:
> On dom, 2013-07-21 at 22:41 +0800, butine@zju.edu.cn wrote:
> > hello,Dario,
> > 
> Hi,
> 
> > I have implemented the HVM guest NUMA.
> >
> Oh, really? Well, that would be really nice indeed. I'm on vacation till
> Wednesday, so I'll take a look at you patch on Thursday (or even
> later :-( ).
> 
> I'm Cc-ing Matt, which was also looking into it. I'm also Cc-ing Elena
> which (if I understood Butine's work correctly), is looking into pretty
> much the same thing, although for PV.

> One thing I can already tell you is that this is a really big patch, and
> it'd be really really uncomfortable to review it like this... Could you
> perhaps split it into smaller patches and send a proper series?
> 
> Take a look at other patch series submissions on the xen-devel mailing
> list...

Hi,

I had a quick look at your patch. There's quite a lot of code there!
My NUMA support for HVM guests patch is a lot smaller. I've been
meaning to find some time to add some support in libxl before posting
it. Perhaps I should post my series (which is based in part on patches
posted a while ago on xen-devel) as a RFC so you can see how it works.

> > After I created the guest(Redhat6.3),it would show white screen when
> > guest started.But the suse,ubuntu is ok.why??
> > 
> Mmm.. No idea. :-(
> 
> Actually, I'm not even sure I understood what happens, to properly try
> to help in debugging any issue, I think we'd require much more
> information and some logs.

In looking at your patches, you don't seem to adjust the initial APIC
ID of the virtual CPUs. The APIC ID needs to be changed so that cpu
topology enumeration via cpuid works correctly. I've seen Linux panic
early when the ACPI tables say that a NUMA topology is expected but
the CPU topology via cpuid doesn't match.

--msw

> For instance, have a look here:
> http://blog.xen.org/index.php/2013/06/04/reporting-a-bug-against-the-xen-hypervisor/
> 
> Nice to see that you're working on this though, and thanks for sharing
> the code! :-)
> 
> Thanks and Regards,
> Dario
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-07-23  6:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-21 14:41 about the guest(Redhat6.3) shows white screen, but the suse, ubuntu is ok butine
2013-07-22 11:32 ` Dario Faggioli
2013-07-23  6:26   ` Matt Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.