[Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes
@ 2014-02-19  7:53 Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 01/14] NUMA: move numa related code to new file numa.c Hu Tao
                   ` (13 more replies)
  0 siblings, 14 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

The major change from v17 is the patchset is now based on memory backend.
Example command line:

./x86_64-softmmu/qemu-system-x86_64 -hda /path/to/image.img -smp 2 \
   -object memory-ram,size=512M,host-nodes=1,policy=membind,id=ram-node0 \
   -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
   -object memory-ram,size=1024M,host-nodes=2-3,policy=interleave,id=ram-node1 \
   -numa node,nodeid=1,cpus=1,memdev=ram-node1 \
   -m 1536M

There are some minor problems but I'd like to see if there are any problems in the
general direction. Your comments are welcome!

v17: https://lists.gnu.org/archive/html/qemu-devel/2013-12/msg00568.html

Hu Tao (4):
  add memdev backend infrastructure
  visitor uint16list
  add memory backend ram field
  add query-memdev

Igor Mammedov (1):
  qapi: add SIZE type parser to string_input_visitor

Paolo Bonzini (3):
  pc: pass QEMUMachineInitArgs to pc_memory_init
  numa: introduce memory_region_allocate_system_memory
  numa: add -numa node,memdev= option

Wanlong Gao (6):
  NUMA: move numa related code to new file numa.c
  NUMA: check if the total numa memory size is equal to ram_size
  NUMA: Add numa_info structure to contain numa nodes info
  NUMA: convert -numa option to use OptsVisitor
  NUMA: expand MAX_NODES from 64 to 128
  qapi: add HostMemPolicy enum type

 Makefile.target                   |   2 +-
 backends/Makefile.objs            |   2 +
 backends/hostmem-ram.c            | 261 ++++++++++++++++++++++++++++++++++++++
 backends/hostmem.c                | 125 ++++++++++++++++++
 cpus.c                            |  14 --
 hw/i386/pc.c                      |  27 ++--
 hw/i386/pc_piix.c                 |   8 +-
 hw/i386/pc_q35.c                  |   4 +-
 include/hw/i386/pc.h              |   7 +-
 include/sysemu/cpus.h             |   1 -
 include/sysemu/hostmem.h          |  63 +++++++++
 include/sysemu/sysemu.h           |  23 +++-
 monitor.c                         |   2 +-
 numa.c                            | 260 +++++++++++++++++++++++++++++++++++++
 qapi-schema.json                  |  85 +++++++++++++
 qapi/string-input-visitor.c       | 178 +++++++++++++++++++++++++-
 qmp-commands.hx                   |  30 +++++
 tests/test-string-input-visitor.c |  22 ++++
 vl.c                              | 155 ++--------------------
 19 files changed, 1076 insertions(+), 193 deletions(-)
 create mode 100644 backends/hostmem-ram.c
 create mode 100644 backends/hostmem.c
 create mode 100644 include/sysemu/hostmem.h
 create mode 100644 numa.c

-- 
1.8.5.2.229.g4448466

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 01/14] NUMA: move numa related code to new file numa.c
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size Hu Tao
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 Makefile.target         |   2 +-
 cpus.c                  |  14 ----
 include/sysemu/cpus.h   |   1 -
 include/sysemu/sysemu.h |   3 +
 numa.c                  | 183 ++++++++++++++++++++++++++++++++++++++++++++++++
 vl.c                    | 139 +-----------------------------------
 6 files changed, 188 insertions(+), 154 deletions(-)
 create mode 100644 numa.c

diff --git a/Makefile.target b/Makefile.target
index af6ac7e..0197c17 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -109,7 +109,7 @@ endif #CONFIG_BSD_USER
 #########################################################
 # System emulator target
 ifdef CONFIG_SOFTMMU
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o
 obj-y += hw/
 obj-$(CONFIG_FDT) += device_tree.o
diff --git a/cpus.c b/cpus.c
index 945d85b..891d062 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1299,20 +1299,6 @@ static void tcg_exec_all(void)
     exit_request = 0;
 }
 
-void set_numa_modes(void)
-{
-    CPUState *cpu;
-    int i;
-
-    CPU_FOREACH(cpu) {
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
-                cpu->numa_node = i;
-            }
-        }
-    }
-}
-
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
     /* XXX: implement xxx_cpu_list for targets that still miss it */
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 6502488..4f79081 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -23,7 +23,6 @@ extern int smp_threads;
 #define smp_threads 1
 #endif
 
-void set_numa_modes(void);
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
 
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 495dae8..2509649 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -136,6 +136,9 @@ extern QEMUClockType rtc_clock;
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
 extern unsigned long *node_cpumask[MAX_NODES];
+void numa_add(const char *optarg);
+void set_numa_nodes(void);
+void set_numa_modes(void);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
new file mode 100644
index 0000000..7845036
--- /dev/null
+++ b/numa.c
@@ -0,0 +1,183 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2013 Fujitsu Ltd.
+ * Author: Wanlong Gao <gaowanlong@cn.fujitsu.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysemu/sysemu.h"
+
+static void numa_node_parse_cpus(int nodenr, const char *cpus)
+{
+    char *endptr;
+    unsigned long long value, endvalue;
+
+    /* Empty CPU range strings will be considered valid, they will simply
+     * not set any bit in the CPU bitmap.
+     */
+    if (!*cpus) {
+        return;
+    }
+
+    if (parse_uint(cpus, &value, &endptr, 10) < 0) {
+        goto error;
+    }
+    if (*endptr == '-') {
+        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
+            goto error;
+        }
+    } else if (*endptr == '\0') {
+        endvalue = value;
+    } else {
+        goto error;
+    }
+
+    if (endvalue >= MAX_CPUMASK_BITS) {
+        endvalue = MAX_CPUMASK_BITS - 1;
+        fprintf(stderr,
+            "qemu: NUMA: A max of %d VCPUs are supported\n",
+             MAX_CPUMASK_BITS);
+    }
+
+    if (endvalue < value) {
+        goto error;
+    }
+
+    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
+    return;
+
+error:
+    fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
+    exit(1);
+}
+
+void numa_add(const char *optarg)
+{
+    char option[128];
+    char *endptr;
+    unsigned long long nodenr;
+
+    optarg = get_opt_name(option, 128, optarg, ',');
+    if (*optarg == ',') {
+        optarg++;
+    }
+    if (!strcmp(option, "node")) {
+
+        if (nb_numa_nodes >= MAX_NODES) {
+            fprintf(stderr, "qemu: too many NUMA nodes\n");
+            exit(1);
+        }
+
+        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
+            nodenr = nb_numa_nodes;
+        } else {
+            if (parse_uint_full(option, &nodenr, 10) < 0) {
+                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
+                exit(1);
+            }
+        }
+
+        if (nodenr >= MAX_NODES) {
+            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
+            exit(1);
+        }
+
+        if (get_param_value(option, 128, "mem", optarg) == 0) {
+            node_mem[nodenr] = 0;
+        } else {
+            int64_t sval;
+            sval = strtosz(option, &endptr);
+            if (sval < 0 || *endptr) {
+                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
+                exit(1);
+            }
+            node_mem[nodenr] = sval;
+        }
+        if (get_param_value(option, 128, "cpus", optarg) != 0) {
+            numa_node_parse_cpus(nodenr, option);
+        }
+        nb_numa_nodes++;
+    } else {
+        fprintf(stderr, "Invalid -numa option: %s\n", option);
+        exit(1);
+    }
+}
+
+void set_numa_nodes(void)
+{
+    if (nb_numa_nodes > 0) {
+        int i;
+
+        if (nb_numa_nodes > MAX_NODES) {
+            nb_numa_nodes = MAX_NODES;
+        }
+
+        /* If no memory size if given for any node, assume the default case
+         * and distribute the available memory equally across all nodes
+         */
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (node_mem[i] != 0) {
+                break;
+            }
+        }
+        if (i == nb_numa_nodes) {
+            uint64_t usedmem = 0;
+
+            /* On Linux, the each node's border has to be 8MB aligned,
+             * the final node gets the rest.
+             */
+            for (i = 0; i < nb_numa_nodes - 1; i++) {
+                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
+                usedmem += node_mem[i];
+            }
+            node_mem[i] = ram_size - usedmem;
+        }
+
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
+                break;
+            }
+        }
+        /* assigning the VCPUs round-robin is easier to implement, guest OSes
+         * must cope with this anyway, because there are BIOSes out there in
+         * real machines which also use this scheme.
+         */
+        if (i == nb_numa_nodes) {
+            for (i = 0; i < max_cpus; i++) {
+                set_bit(i, node_cpumask[i % nb_numa_nodes]);
+            }
+        }
+    }
+}
+
+void set_numa_modes(void)
+{
+    CPUState *cpu;
+    int i;
+
+    CPU_FOREACH(cpu) {
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
+                cpu->numa_node = i;
+            }
+        }
+    }
+}
diff --git a/vl.c b/vl.c
index 316de54..0adac0c 100644
--- a/vl.c
+++ b/vl.c
@@ -1211,102 +1211,6 @@ char *get_boot_devices_list(size_t *size)
     return list;
 }
 
-static void numa_node_parse_cpus(int nodenr, const char *cpus)
-{
-    char *endptr;
-    unsigned long long value, endvalue;
-
-    /* Empty CPU range strings will be considered valid, they will simply
-     * not set any bit in the CPU bitmap.
-     */
-    if (!*cpus) {
-        return;
-    }
-
-    if (parse_uint(cpus, &value, &endptr, 10) < 0) {
-        goto error;
-    }
-    if (*endptr == '-') {
-        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
-            goto error;
-        }
-    } else if (*endptr == '\0') {
-        endvalue = value;
-    } else {
-        goto error;
-    }
-
-    if (endvalue >= MAX_CPUMASK_BITS) {
-        endvalue = MAX_CPUMASK_BITS - 1;
-        fprintf(stderr,
-            "qemu: NUMA: A max of %d VCPUs are supported\n",
-             MAX_CPUMASK_BITS);
-    }
-
-    if (endvalue < value) {
-        goto error;
-    }
-
-    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
-    return;
-
-error:
-    fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
-    exit(1);
-}
-
-static void numa_add(const char *optarg)
-{
-    char option[128];
-    char *endptr;
-    unsigned long long nodenr;
-
-    optarg = get_opt_name(option, 128, optarg, ',');
-    if (*optarg == ',') {
-        optarg++;
-    }
-    if (!strcmp(option, "node")) {
-
-        if (nb_numa_nodes >= MAX_NODES) {
-            fprintf(stderr, "qemu: too many NUMA nodes\n");
-            exit(1);
-        }
-
-        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
-            nodenr = nb_numa_nodes;
-        } else {
-            if (parse_uint_full(option, &nodenr, 10) < 0) {
-                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
-                exit(1);
-            }
-        }
-
-        if (nodenr >= MAX_NODES) {
-            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
-            exit(1);
-        }
-
-        if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
-        } else {
-            int64_t sval;
-            sval = strtosz(option, &endptr);
-            if (sval < 0 || *endptr) {
-                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
-                exit(1);
-            }
-            node_mem[nodenr] = sval;
-        }
-        if (get_param_value(option, 128, "cpus", optarg) != 0) {
-            numa_node_parse_cpus(nodenr, option);
-        }
-        nb_numa_nodes++;
-    } else {
-        fprintf(stderr, "Invalid -numa option: %s\n", option);
-        exit(1);
-    }
-}
-
 static QemuOptsList qemu_smp_opts = {
     .name = "smp-opts",
     .implied_opt_name = "cpus",
@@ -4139,48 +4043,7 @@ int main(int argc, char **argv, char **envp)
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
 
-    if (nb_numa_nodes > 0) {
-        int i;
-
-        if (nb_numa_nodes > MAX_NODES) {
-            nb_numa_nodes = MAX_NODES;
-        }
-
-        /* If no memory size if given for any node, assume the default case
-         * and distribute the available memory equally across all nodes
-         */
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_mem[i] != 0)
-                break;
-        }
-        if (i == nb_numa_nodes) {
-            uint64_t usedmem = 0;
-
-            /* On Linux, the each node's border has to be 8MB aligned,
-             * the final node gets the rest.
-             */
-            for (i = 0; i < nb_numa_nodes - 1; i++) {
-                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
-                usedmem += node_mem[i];
-            }
-            node_mem[i] = ram_size - usedmem;
-        }
-
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
-                break;
-            }
-        }
-        /* assigning the VCPUs round-robin is easier to implement, guest OSes
-         * must cope with this anyway, because there are BIOSes out there in
-         * real machines which also use this scheme.
-         */
-        if (i == nb_numa_nodes) {
-            for (i = 0; i < max_cpus; i++) {
-                set_bit(i, node_cpumask[i % nb_numa_nodes]);
-            }
-        }
-    }
+    set_numa_nodes();
 
     if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
         exit(1);
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 01/14] NUMA: move numa related code to new file numa.c Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-25 13:38   ` Eric Blake
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info Hu Tao
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

If the total number of the assigned numa nodes memory is not
equal to the assigned ram size, it will write the wrong data
to ACPI talb, then the guest will ignore the wrong ACPI table
and recognize all memory to one node. It's buggy, we should
check it to ensure that we write the right data to ACPI table.

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 numa.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/numa.c b/numa.c
index 7845036..a06e2d1 100644
--- a/numa.c
+++ b/numa.c
@@ -151,6 +151,16 @@ void set_numa_nodes(void)
             node_mem[i] = ram_size - usedmem;
         }
 
+        uint64_t numa_total = 0;
+        for (i = 0; i < nb_numa_nodes; i++) {
+            numa_total += node_mem[i];
+        }
+        if (numa_total != ram_size) {
+            fprintf(stderr, "qemu: numa nodes total memory size "
+                            "should equal to ram_size\n");
+            exit(1);
+        }
+
         for (i = 0; i < nb_numa_nodes; i++) {
             if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
                 break;
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 01/14] NUMA: move numa related code to new file numa.c Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  9:26   ` Igor Mammedov
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 04/14] NUMA: convert -numa option to use OptsVisitor Hu Tao
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Andre Przywara, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

Add the numa_info structure to contain the numa nodes memory,
VCPUs information and the future added numa nodes host memory
policies.

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 hw/i386/pc.c            | 12 ++++++++----
 include/sysemu/sysemu.h |  8 ++++++--
 monitor.c               |  2 +-
 numa.c                  | 23 ++++++++++++-----------
 vl.c                    |  7 +++----
 5 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e715a33..a464e48 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -674,14 +674,14 @@ static FWCfgState *bochs_bios_init(void)
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
         assert(apic_id < apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, node_cpumask[j])) {
+            if (test_bit(i, numa_info[j].node_cpu)) {
                 numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
                 break;
             }
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
-        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
+        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
     }
     fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
                      (1 + apic_id_limit + nb_numa_nodes) *
@@ -1077,8 +1077,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
     guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
     guest_info->apic_xrupt_override = kvm_allows_irq0_override();
     guest_info->numa_nodes = nb_numa_nodes;
-    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
+    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
                                     sizeof *guest_info->node_mem);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        guest_info->node_mem[i] = numa_info[i].node_mem;
+    }
+
     guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
                                      sizeof *guest_info->node_cpu);
 
@@ -1086,7 +1090,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
         assert(apic_id < guest_info->apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, node_cpumask[j])) {
+            if (test_bit(i, numa_info[j].node_cpu)) {
                 guest_info->node_cpu[apic_id] = j;
                 break;
             }
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 2509649..d873b42 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -9,6 +9,7 @@
 #include "qapi-types.h"
 #include "qemu/notify.h"
 #include "qemu/main-loop.h"
+#include "qemu/bitmap.h"
 
 /* vl.c */
 
@@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock;
 #define MAX_NODES 64
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
-extern uint64_t node_mem[MAX_NODES];
-extern unsigned long *node_cpumask[MAX_NODES];
+typedef struct node_info {
+    uint64_t node_mem;
+    DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+} NodeInfo;
+extern NodeInfo numa_info[MAX_NODES];
 void numa_add(const char *optarg);
 void set_numa_nodes(void);
 void set_numa_modes(void);
diff --git a/monitor.c b/monitor.c
index 690c152..0284735 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
         }
         monitor_printf(mon, "\n");
         monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
-            node_mem[i] >> 20);
+            numa_info[i].node_mem >> 20);
     }
 }
 
diff --git a/numa.c b/numa.c
index a06e2d1..1f413a0 100644
--- a/numa.c
+++ b/numa.c
@@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus)
         goto error;
     }
 
-    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
+    bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
     return;
 
 error:
@@ -101,7 +101,7 @@ void numa_add(const char *optarg)
         }
 
         if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
+            numa_info[nodenr].node_mem = 0;
         } else {
             int64_t sval;
             sval = strtosz(option, &endptr);
@@ -109,7 +109,7 @@ void numa_add(const char *optarg)
                 fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
                 exit(1);
             }
-            node_mem[nodenr] = sval;
+            numa_info[nodenr].node_mem = sval;
         }
         if (get_param_value(option, 128, "cpus", optarg) != 0) {
             numa_node_parse_cpus(nodenr, option);
@@ -134,7 +134,7 @@ void set_numa_nodes(void)
          * and distribute the available memory equally across all nodes
          */
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_mem[i] != 0) {
+            if (numa_info[i].node_mem != 0) {
                 break;
             }
         }
@@ -145,15 +145,16 @@ void set_numa_nodes(void)
              * the final node gets the rest.
              */
             for (i = 0; i < nb_numa_nodes - 1; i++) {
-                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
-                usedmem += node_mem[i];
+                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
+                                        ~((1 << 23UL) - 1);
+                usedmem += numa_info[i].node_mem;
             }
-            node_mem[i] = ram_size - usedmem;
+            numa_info[i].node_mem = ram_size - usedmem;
         }
 
         uint64_t numa_total = 0;
         for (i = 0; i < nb_numa_nodes; i++) {
-            numa_total += node_mem[i];
+            numa_total += numa_info[i].node_mem;
         }
         if (numa_total != ram_size) {
             fprintf(stderr, "qemu: numa nodes total memory size "
@@ -162,7 +163,7 @@ void set_numa_nodes(void)
         }
 
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
+            if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
                 break;
             }
         }
@@ -172,7 +173,7 @@ void set_numa_nodes(void)
          */
         if (i == nb_numa_nodes) {
             for (i = 0; i < max_cpus; i++) {
-                set_bit(i, node_cpumask[i % nb_numa_nodes]);
+                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
             }
         }
     }
@@ -185,7 +186,7 @@ void set_numa_modes(void)
 
     CPU_FOREACH(cpu) {
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
+            if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
                 cpu->numa_node = i;
             }
         }
diff --git a/vl.c b/vl.c
index 0adac0c..915f8b7 100644
--- a/vl.c
+++ b/vl.c
@@ -196,8 +196,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
     QTAILQ_HEAD_INITIALIZER(fw_boot_order);
 
 int nb_numa_nodes;
-uint64_t node_mem[MAX_NODES];
-unsigned long *node_cpumask[MAX_NODES];
+NodeInfo numa_info[MAX_NODES];
 
 uint8_t qemu_uuid[16];
 bool qemu_uuid_set;
@@ -2787,8 +2786,8 @@ int main(int argc, char **argv, char **envp)
     translation = BIOS_ATA_TRANSLATION_AUTO;
 
     for (i = 0; i < MAX_NODES; i++) {
-        node_mem[i] = 0;
-        node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
+        numa_info[i].node_mem = 0;
+        bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
     }
 
     nb_numa_nodes = 0;
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 04/14] NUMA: convert -numa option to use OptsVisitor
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (2 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 05/14] NUMA: expand MAX_NODES from 64 to 128 Hu Tao
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 include/sysemu/sysemu.h |   3 +-
 numa.c                  | 148 +++++++++++++++++++++++-------------------------
 qapi-schema.json        |  30 ++++++++++
 vl.c                    |  11 +++-
 4 files changed, 114 insertions(+), 78 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index d873b42..20b05a3 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -140,9 +140,10 @@ typedef struct node_info {
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
-void numa_add(const char *optarg);
 void set_numa_nodes(void);
 void set_numa_modes(void);
+extern QemuOptsList qemu_numa_opts;
+int numa_init_func(QemuOpts *opts, void *opaque);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
index 1f413a0..827c76f 100644
--- a/numa.c
+++ b/numa.c
@@ -24,101 +24,97 @@
  */
 
 #include "sysemu/sysemu.h"
-
-static void numa_node_parse_cpus(int nodenr, const char *cpus)
+#include "qapi-visit.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/dealloc-visitor.h"
+QemuOptsList qemu_numa_opts = {
+    .name = "numa",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+    .desc = { { 0 } } /* validated with OptsVisitor */
+};
+
+static int numa_node_parse(NumaNodeOptions *opts)
 {
-    char *endptr;
-    unsigned long long value, endvalue;
-
-    /* Empty CPU range strings will be considered valid, they will simply
-     * not set any bit in the CPU bitmap.
-     */
-    if (!*cpus) {
-        return;
-    }
+    uint16_t nodenr;
+    uint16List *cpus = NULL;
 
-    if (parse_uint(cpus, &value, &endptr, 10) < 0) {
-        goto error;
-    }
-    if (*endptr == '-') {
-        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
-            goto error;
-        }
-    } else if (*endptr == '\0') {
-        endvalue = value;
+    if (opts->has_nodeid) {
+        nodenr = opts->nodeid;
     } else {
-        goto error;
+        nodenr = nb_numa_nodes;
     }
 
-    if (endvalue >= MAX_CPUMASK_BITS) {
-        endvalue = MAX_CPUMASK_BITS - 1;
-        fprintf(stderr,
-            "qemu: NUMA: A max of %d VCPUs are supported\n",
-             MAX_CPUMASK_BITS);
+    if (nodenr >= MAX_NODES) {
+        fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
+                PRIu16 "\n", nodenr);
+        return -1;
     }
 
-    if (endvalue < value) {
-        goto error;
+    for (cpus = opts->cpus; cpus; cpus = cpus->next) {
+        if (cpus->value > MAX_CPUMASK_BITS) {
+            fprintf(stderr, "qemu: cpu number %" PRIu16 " is bigger than %d",
+                    cpus->value, MAX_CPUMASK_BITS);
+            continue;
+        }
+        bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
     }
 
-    bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
-    return;
+    if (opts->has_mem) {
+        int64_t mem_size;
+        char *endptr;
+        mem_size = strtosz(opts->mem, &endptr);
+        if (mem_size < 0 || *endptr) {
+            fprintf(stderr, "qemu: invalid numa mem size: %s\n", opts->mem);
+            return -1;
+        }
+        numa_info[nodenr].node_mem = mem_size;
+    }
 
-error:
-    fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
-    exit(1);
+    return 0;
 }
 
-void numa_add(const char *optarg)
+int numa_init_func(QemuOpts *opts, void *opaque)
 {
-    char option[128];
-    char *endptr;
-    unsigned long long nodenr;
-
-    optarg = get_opt_name(option, 128, optarg, ',');
-    if (*optarg == ',') {
-        optarg++;
+    NumaOptions *object = NULL;
+    Error *err = NULL;
+    int ret = 0;
+
+    {
+        OptsVisitor *ov = opts_visitor_new(opts);
+        visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err);
+        opts_visitor_cleanup(ov);
     }
-    if (!strcmp(option, "node")) {
-
-        if (nb_numa_nodes >= MAX_NODES) {
-            fprintf(stderr, "qemu: too many NUMA nodes\n");
-            exit(1);
-        }
 
-        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
-            nodenr = nb_numa_nodes;
-        } else {
-            if (parse_uint_full(option, &nodenr, 10) < 0) {
-                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
-                exit(1);
-            }
-        }
-
-        if (nodenr >= MAX_NODES) {
-            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
-            exit(1);
-        }
+    if (error_is_set(&err)) {
+        fprintf(stderr, "qemu: %s\n", error_get_pretty(err));
+        error_free(err);
+        ret = -1;
+        goto error;
+    }
 
-        if (get_param_value(option, 128, "mem", optarg) == 0) {
-            numa_info[nodenr].node_mem = 0;
-        } else {
-            int64_t sval;
-            sval = strtosz(option, &endptr);
-            if (sval < 0 || *endptr) {
-                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
-                exit(1);
-            }
-            numa_info[nodenr].node_mem = sval;
-        }
-        if (get_param_value(option, 128, "cpus", optarg) != 0) {
-            numa_node_parse_cpus(nodenr, option);
+    switch (object->kind) {
+    case NUMA_OPTIONS_KIND_NODE:
+        ret = numa_node_parse(object->node);
+        if (ret) {
+            goto error;
         }
         nb_numa_nodes++;
-    } else {
-        fprintf(stderr, "Invalid -numa option: %s\n", option);
-        exit(1);
+        break;
+    default:
+        fprintf(stderr, "qemu: Invalid NUMA options type.\n");
+        ret = -1;
     }
+
+error:
+    if (object) {
+        QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
+        visit_type_NumaOptions(qapi_dealloc_get_visitor(dv),
+                               &object, NULL, NULL);
+        qapi_dealloc_visitor_cleanup(dv);
+    }
+
+    return ret;
 }
 
 void set_numa_nodes(void)
diff --git a/qapi-schema.json b/qapi-schema.json
index 7cfb5e5..a2839b8 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4420,3 +4420,33 @@
 # Since: 1.7
 ##
 { 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+
+##
+# @NumaOptions
+#
+# A discriminated record of NUMA options. (for OptsVisitor)
+#
+# Since 2.0
+##
+{ 'union': 'NumaOptions',
+  'data': {
+    'node': 'NumaNodeOptions' }}
+
+##
+# @NumaNodeOptions
+#
+# Create a guest NUMA node. (for OptsVisitor)
+#
+# @nodeid: #optional NUMA node ID
+#
+# @cpus: #optional VCPUs belong to this node
+#
+# @mem: #optional memory size of this node
+#
+# Since: 2.0
+##
+{ 'type': 'NumaNodeOptions',
+  'data': {
+   '*nodeid': 'uint16',
+   '*cpus':   ['uint16'],
+   '*mem':    'str' }}
diff --git a/vl.c b/vl.c
index 915f8b7..e070649 100644
--- a/vl.c
+++ b/vl.c
@@ -2765,6 +2765,7 @@ int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_tpmdev_opts);
     qemu_add_opts(&qemu_realtime_opts);
     qemu_add_opts(&qemu_msg_opts);
+    qemu_add_opts(&qemu_numa_opts);
 
     runstate_init();
 
@@ -2952,7 +2953,10 @@ int main(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_numa:
-                numa_add(optarg);
+                opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1);
+                if (!opts) {
+                    exit(1);
+                }
                 break;
             case QEMU_OPTION_display:
                 display_type = select_display(optarg);
@@ -4042,6 +4046,11 @@ int main(int argc, char **argv, char **envp)
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
 
+    if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
+                          NULL, 1) != 0) {
+        exit(1);
+    }
+
     set_numa_nodes();
 
     if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 05/14] NUMA: expand MAX_NODES from 64 to 128
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (3 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 04/14] NUMA: convert -numa option to use OptsVisitor Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor Hu Tao
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

libnuma choosed 128 for MAX_NODES, so we follow libnuma here.

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 include/sysemu/sysemu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 20b05a3..4c94cf5 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -132,7 +132,7 @@ extern size_t boot_splash_filedata_size;
 extern uint8_t qemu_extra_params_fw[2];
 extern QEMUClockType rtc_clock;
 
-#define MAX_NODES 64
+#define MAX_NODES 128
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
 typedef struct node_info {
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (4 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 05/14] NUMA: expand MAX_NODES from 64 to 128 Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  9:54   ` Igor Mammedov
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure Hu Tao
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Igor Mammedov <imammedo@redhat.com>

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 qapi/string-input-visitor.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
index 8f1bc41..a152f5d 100644
--- a/qapi/string-input-visitor.c
+++ b/qapi/string-input-visitor.c
@@ -97,6 +97,23 @@ static void parse_type_number(Visitor *v, double *obj, const char *name,
     *obj = val;
 }
 
+static void parse_type_size(Visitor *v, uint64_t *obj, const char *name,
+                            Error **errp)
+{
+    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+    int64_t val;
+    char *endp;
+
+    val = strtosz_suffix(siv->string ? siv->string : "", &endp,
+                         STRTOSZ_DEFSUFFIX_B);
+    if (val < 0 || *endp != '\0') {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE, name,
+                  "a size value representible as a non-negative int64");
+        return;
+    }
+    *obj = val;
+}
+
 static void parse_start_optional(Visitor *v, bool *present,
                                  const char *name, Error **errp)
 {
@@ -131,6 +148,7 @@ StringInputVisitor *string_input_visitor_new(const char *str)
     v->visitor.type_bool = parse_type_bool;
     v->visitor.type_str = parse_type_str;
     v->visitor.type_number = parse_type_number;
+    v->visitor.type_size = parse_type_size;
     v->visitor.start_optional = parse_start_optional;
 
     v->string = str;
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (5 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  9:15   ` Igor Mammedov
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 08/14] pc: pass QEMUMachineInitArgs to pc_memory_init Hu Tao
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

Provides framework for splitting host RAM allocation/
policies into a separate backend that could be used
by devices.

Initially only legacy RAM backend is provided, which
uses memory_region_init_ram() allocator and compatible
with every CLI option that affects memory_region_init_ram().

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>

Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 backends/Makefile.objs   |   2 +
 backends/hostmem-ram.c   |  48 ++++++++++++++++++
 backends/hostmem.c       | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 include/sysemu/hostmem.h |  63 ++++++++++++++++++++++++
 4 files changed, 238 insertions(+)
 create mode 100644 backends/hostmem-ram.c
 create mode 100644 backends/hostmem.c
 create mode 100644 include/sysemu/hostmem.h

diff --git a/backends/Makefile.objs b/backends/Makefile.objs
index 42557d5..e6bdc11 100644
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -6,3 +6,5 @@ common-obj-$(CONFIG_BRLAPI) += baum.o
 $(obj)/baum.o: QEMU_CFLAGS += $(SDL_CFLAGS) 
 
 common-obj-$(CONFIG_TPM) += tpm.o
+
+common-obj-y += hostmem.o hostmem-ram.o
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
new file mode 100644
index 0000000..a496dbd
--- /dev/null
+++ b/backends/hostmem-ram.c
@@ -0,0 +1,48 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013 Red Hat Inc
+ *
+ * Authors:
+ *   Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "sysemu/hostmem.h"
+
+#define TYPE_MEMORY_BACKEND_RAM "memory-ram"
+
+
+static int
+ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
+{
+    if (!memory_region_size(&backend->mr)) {
+        memory_region_init_ram(&backend->mr, OBJECT(backend),
+                               object_get_canonical_path(OBJECT(backend)),
+                               backend->size);
+    }
+
+    return 0;
+}
+
+static void
+ram_backend_class_init(ObjectClass *oc, void *data)
+{
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+    bc->memory_init = ram_backend_memory_init;
+}
+
+static const TypeInfo ram_backend_info = {
+    .name = TYPE_MEMORY_BACKEND_RAM,
+    .parent = TYPE_MEMORY_BACKEND,
+    .class_init = ram_backend_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&ram_backend_info);
+}
+
+type_init(register_types);
diff --git a/backends/hostmem.c b/backends/hostmem.c
new file mode 100644
index 0000000..4b8fd8d
--- /dev/null
+++ b/backends/hostmem.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013 Red Hat Inc
+ *
+ * Authors:
+ *   Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "sysemu/hostmem.h"
+#include "sysemu/sysemu.h"
+#include "qapi/visitor.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/config-file.h"
+#include "qom/object_interfaces.h"
+
+static void
+hostmemory_backend_get_size(Object *obj, Visitor *v, void *opaque,
+                            const char *name, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+    uint64_t value = backend->size;
+
+    visit_type_size(v, &value, name, errp);
+}
+
+static void
+hostmemory_backend_set_size(Object *obj, Visitor *v, void *opaque,
+                            const char *name, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+    uint64_t value;
+
+    if (memory_region_size(&backend->mr)) {
+        error_setg(errp, "cannot change property value\n");
+        return;
+    }
+
+    visit_type_size(v, &value, name, errp);
+    if (error_is_set(errp)) {
+        return;
+    }
+    if (!value) {
+        error_setg(errp, "Property '%s.%s' doesn't take value '%" PRIu64 "'",
+                   object_get_typename(obj), name , value);
+        return;
+    }
+    backend->size = value;
+}
+
+static void hostmemory_backend_initfn(Object *obj)
+{
+    object_property_add(obj, "size", "int",
+                        hostmemory_backend_get_size,
+                        hostmemory_backend_set_size, NULL, NULL, NULL);
+}
+
+static void hostmemory_backend_finalize(Object *obj)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+    if (memory_region_size(&backend->mr)) {
+        memory_region_destroy(&backend->mr);
+    }
+}
+
+static int
+hostmemory_backend_memory_init(HostMemoryBackend *backend, Error **errp)
+{
+    error_setg(errp, "memory_init is not implemented for type [%s]",
+               object_get_typename(OBJECT(backend)));
+
+    return -1;
+}
+
+MemoryRegion *
+host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
+{
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(backend);
+    Object *obj = OBJECT(backend);
+
+    if (!backend->size) {
+        error_setg(errp, "Invalid property [%s.size] value: %" PRIu64,
+                   object_get_typename(obj), backend->size);
+        return NULL;
+    }
+
+    if (bc->memory_init(backend, errp) < 0) {
+        return NULL;
+    }
+
+    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
+}
+
+static void
+hostmemory_backend_class_init(ObjectClass *oc, void *data)
+{
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+    bc->memory_init = hostmemory_backend_memory_init;
+}
+
+static const TypeInfo hostmemory_backend_info = {
+    .name = TYPE_MEMORY_BACKEND,
+    .parent = TYPE_OBJECT,
+    .abstract = true,
+    .class_size = sizeof(HostMemoryBackendClass),
+    .class_init = hostmemory_backend_class_init,
+    .instance_size = sizeof(HostMemoryBackend),
+    .instance_init = hostmemory_backend_initfn,
+    .instance_finalize = hostmemory_backend_finalize,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&hostmemory_backend_info);
+}
+
+type_init(register_types);
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
new file mode 100644
index 0000000..5847943
--- /dev/null
+++ b/include/sysemu/hostmem.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013 Red Hat Inc
+ *
+ * Authors:
+ *   Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_RAM_H
+#define QEMU_RAM_H
+
+#include "qom/object.h"
+#include "qapi/error.h"
+#include "exec/memory.h"
+#include "qemu/option.h"
+
+#define TYPE_MEMORY_BACKEND "memory"
+#define MEMORY_BACKEND(obj) \
+    OBJECT_CHECK(HostMemoryBackend, (obj), TYPE_MEMORY_BACKEND)
+#define MEMORY_BACKEND_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(HostMemoryBackendClass, (obj), TYPE_MEMORY_BACKEND)
+#define MEMORY_BACKEND_CLASS(klass) \
+    OBJECT_CLASS_CHECK(HostMemoryBackendClass, (klass), TYPE_MEMORY_BACKEND)
+
+typedef struct HostMemoryBackend HostMemoryBackend;
+typedef struct HostMemoryBackendClass HostMemoryBackendClass;
+
+/**
+ * HostMemoryBackendClass:
+ * @parent_class: opaque parent class container
+ * @memory_init: hook for derived classes to perform memory allocation
+ */
+struct HostMemoryBackendClass {
+    ObjectClass parent_class;
+
+    int (*memory_init)(HostMemoryBackend *backend, Error **errp);
+};
+
+/**
+ * @HostMemoryBackend
+ *
+ * @parent: opaque parent object container
+ * @size: amount of memory backend provides
+ * @id: unique identification string in memdev namespace
+ * @mr: MemoryRegion representing host memory belonging to backend
+ */
+struct HostMemoryBackend {
+    /* private */
+    Object parent;
+
+    /* protected */
+    uint64_t size;
+
+    MemoryRegion mr;
+};
+
+MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
+                                             Error **errp);
+
+#endif
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 08/14] pc: pass QEMUMachineInitArgs to pc_memory_init
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (6 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure Hu Tao
@ 2014-02-19  7:53 ` Hu Tao
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 09/14] numa: introduce memory_region_allocate_system_memory Hu Tao
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Paolo Bonzini <pbonzini@redhat.com>

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 hw/i386/pc.c         | 11 +++++------
 hw/i386/pc_piix.c    |  8 +++-----
 hw/i386/pc_q35.c     |  4 +---
 include/hw/i386/pc.h |  7 +++----
 4 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a464e48..17d4820 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1145,10 +1145,8 @@ void pc_acpi_init(const char *default_dsdt)
     }
 }
 
-FWCfgState *pc_memory_init(MemoryRegion *system_memory,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
+FWCfgState *pc_memory_init(QEMUMachineInitArgs *args,
+                           MemoryRegion *system_memory,
                            ram_addr_t below_4g_mem_size,
                            ram_addr_t above_4g_mem_size,
                            MemoryRegion *rom_memory,
@@ -1160,7 +1158,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
     MemoryRegion *ram_below_4g, *ram_above_4g;
     FWCfgState *fw_cfg;
 
-    linux_boot = (kernel_filename != NULL);
+    linux_boot = (args->kernel_filename != NULL);
 
     /* Allocate RAM.  We allocate it as a single memory region and use
      * aliases to address portions of it, mostly for backwards compatibility
@@ -1201,7 +1199,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
     rom_set_fw(fw_cfg);
 
     if (linux_boot) {
-        load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size);
+        load_linux(fw_cfg, args->kernel_filename, args->initrd_filename,
+                   args->kernel_cmdline, below_4g_mem_size);
     }
 
     for (i = 0; i < nb_option_roms; i++) {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 1acd2b2..670d417 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -151,11 +151,9 @@ static void pc_init1(QEMUMachineInitArgs *args,
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
-        fw_cfg = pc_memory_init(system_memory,
-                       args->kernel_filename, args->kernel_cmdline,
-                       args->initrd_filename,
-                       below_4g_mem_size, above_4g_mem_size,
-                       rom_memory, &ram_memory, guest_info);
+        fw_cfg = pc_memory_init(args, system_memory,
+                                below_4g_mem_size, above_4g_mem_size,
+                                rom_memory, &ram_memory, guest_info);
     }
 
     gsi_state = g_malloc0(sizeof(*gsi_state));
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index a7f6260..95fa01fc 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -138,9 +138,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
-        pc_memory_init(get_system_memory(),
-                       args->kernel_filename, args->kernel_cmdline,
-                       args->initrd_filename,
+        pc_memory_init(args, get_system_memory(),
                        below_4g_mem_size, above_4g_mem_size,
                        rom_memory, &ram_memory, guest_info);
     }
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 3e1e81b..699d93d 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -3,6 +3,7 @@
 
 #include "qemu-common.h"
 #include "exec/memory.h"
+#include "hw/boards.h"
 #include "hw/isa/isa.h"
 #include "hw/block/fdc.h"
 #include "net/net.h"
@@ -134,10 +135,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
 void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
                             MemoryRegion *pci_address_space);
 
-FWCfgState *pc_memory_init(MemoryRegion *system_memory,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
+FWCfgState *pc_memory_init(QEMUMachineInitArgs *args,
+                           MemoryRegion *system_memory,
                            ram_addr_t below_4g_mem_size,
                            ram_addr_t above_4g_mem_size,
                            MemoryRegion *rom_memory,
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 09/14] numa: introduce memory_region_allocate_system_memory
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (7 preceding siblings ...)
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 08/14] pc: pass QEMUMachineInitArgs to pc_memory_init Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option Hu Tao
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Paolo Bonzini <pbonzini@redhat.com>

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 hw/i386/pc.c            |  4 +---
 include/sysemu/sysemu.h |  5 +++++
 numa.c                  | 10 ++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 17d4820..ff078fb 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1165,9 +1165,7 @@ FWCfgState *pc_memory_init(QEMUMachineInitArgs *args,
      * with older qemus that used qemu_ram_alloc().
      */
     ram = g_malloc(sizeof(*ram));
-    memory_region_init_ram(ram, NULL, "pc.ram",
-                           below_4g_mem_size + above_4g_mem_size);
-    vmstate_register_ram_global(ram);
+    memory_region_allocate_system_memory(ram, NULL, "pc.ram", args);
     *ram_memory = ram;
     ram_below_4g = g_malloc(sizeof(*ram_below_4g));
     memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 4c94cf5..e9da760 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -10,6 +10,8 @@
 #include "qemu/notify.h"
 #include "qemu/main-loop.h"
 #include "qemu/bitmap.h"
+#include "qom/object.h"
+#include "hw/boards.h"
 
 /* vl.c */
 
@@ -144,6 +146,9 @@ void set_numa_nodes(void);
 void set_numa_modes(void);
 extern QemuOptsList qemu_numa_opts;
 int numa_init_func(QemuOpts *opts, void *opaque);
+void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
+                                          const char *name,
+                                          QEMUMachineInitArgs *args);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
index 827c76f..403b08b 100644
--- a/numa.c
+++ b/numa.c
@@ -188,3 +188,13 @@ void set_numa_modes(void)
         }
     }
 }
+
+void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
+                                          const char *name,
+                                          QEMUMachineInitArgs *args)
+{
+    uint64_t ram_size = args->ram_size;
+
+    memory_region_init_ram(mr, owner, name, ram_size);
+    vmstate_register_ram_global(mr);
+}
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (8 preceding siblings ...)
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 09/14] numa: introduce memory_region_allocate_system_memory Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  9:50   ` Igor Mammedov
  2014-03-04  0:10   ` Eric Blake
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list Hu Tao
                   ` (3 subsequent siblings)
  13 siblings, 2 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Paolo Bonzini <pbonzini@redhat.com>

This option provides the infrastructure for binding guest NUMA nodes
to host NUMA nodes.  For example:

 -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \
 -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
 -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
 -numa node,nodeid=1,cpus=1,memdev=ram-node1

The option replaces "-numa mem".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Conflicts:
	include/sysemu/sysemu.h
	numa.c

Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 include/sysemu/sysemu.h |  2 ++
 numa.c                  | 64 +++++++++++++++++++++++++++++++++++++++++++++++--
 qapi-schema.json        |  6 ++++-
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index e9da760..acfc0c7 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -12,6 +12,7 @@
 #include "qemu/bitmap.h"
 #include "qom/object.h"
 #include "hw/boards.h"
+#include "sysemu/hostmem.h"
 
 /* vl.c */
 
@@ -140,6 +141,7 @@ extern int nb_numa_nodes;
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+    HostMemoryBackend *node_memdev;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 void set_numa_nodes(void);
diff --git a/numa.c b/numa.c
index 403b08b..ca55ad7 100644
--- a/numa.c
+++ b/numa.c
@@ -27,6 +27,8 @@
 #include "qapi-visit.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/dealloc-visitor.h"
+#include "qapi/qmp/qerror.h"
+
 QemuOptsList qemu_numa_opts = {
     .name = "numa",
     .implied_opt_name = "type",
@@ -34,10 +36,13 @@ QemuOptsList qemu_numa_opts = {
     .desc = { { 0 } } /* validated with OptsVisitor */
 };
 
+static int have_memdevs = -1;
+
 static int numa_node_parse(NumaNodeOptions *opts)
 {
     uint16_t nodenr;
     uint16List *cpus = NULL;
+    Error *local_err = NULL;
 
     if (opts->has_nodeid) {
         nodenr = opts->nodeid;
@@ -60,6 +65,19 @@ static int numa_node_parse(NumaNodeOptions *opts)
         bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
     }
 
+    if (opts->has_mem && opts->has_memdev) {
+        fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n");
+        return -1;
+    }
+
+    if (have_memdevs == -1) {
+        have_memdevs = opts->has_memdev;
+    }
+    if (opts->has_memdev != have_memdevs) {
+        fprintf(stderr, "qemu: memdev option must be specified for either "
+                "all or no nodes\n");
+    }
+
     if (opts->has_mem) {
         int64_t mem_size;
         char *endptr;
@@ -70,7 +88,19 @@ static int numa_node_parse(NumaNodeOptions *opts)
         }
         numa_info[nodenr].node_mem = mem_size;
     }
+    if (opts->has_memdev) {
+        Object *o;
+        o = object_resolve_path_type(opts->memdev, TYPE_MEMORY_BACKEND, NULL);
+        if (!o) {
+            error_setg(&local_err, "memdev=%s is ambiguous", opts->memdev);
+            qerror_report_err(local_err);
+            return -1;
+        }
 
+        object_ref(o);
+        numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
+        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+    }
     return 0;
 }
 
@@ -189,12 +219,42 @@ void set_numa_modes(void)
     }
 }
 
+static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
+                                           const char *name,
+                                           QEMUMachineInitArgs *args)
+{
+    uint64_t ram_size = args->ram_size;
+
+    memory_region_init_ram(mr, owner, name, ram_size);
+    vmstate_register_ram_global(mr);
+}
+
 void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           QEMUMachineInitArgs *args)
 {
     uint64_t ram_size = args->ram_size;
+    uint64_t addr = 0;
+    int i;
 
-    memory_region_init_ram(mr, owner, name, ram_size);
-    vmstate_register_ram_global(mr);
+    if (nb_numa_nodes == 0 || !have_memdevs) {
+        allocate_system_memory_nonnuma(mr, owner, name, args);
+        return;
+    }
+
+    memory_region_init(mr, owner, name, ram_size);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        Error *local_err = NULL;
+        uint64_t size = numa_info[i].node_mem;
+        HostMemoryBackend *backend = numa_info[i].node_memdev;
+        MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
+        if (local_err) {
+            qerror_report_err(local_err);
+            exit(1);
+        }
+
+        memory_region_add_subregion(mr, addr, seg);
+        vmstate_register_ram_global(seg);
+        addr += size;
+    }
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index a2839b8..498ea9b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4441,7 +4441,10 @@
 #
 # @cpus: #optional VCPUs belong to this node
 #
-# @mem: #optional memory size of this node
+# @memdev: #optional memory backend object.  If specified for one node,
+#          it must be specified for all nodes.
+#
+# @mem: #optional memory size of this node; mutually exclusive with @memdev.
 #
 # Since: 2.0
 ##
@@ -4449,4 +4452,5 @@
   'data': {
    '*nodeid': 'uint16',
    '*cpus':   ['uint16'],
+   '*memdev': 'str',
    '*mem':    'str' }}
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (9 preceding siblings ...)
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  8:17   ` Hu Tao
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type Hu Tao
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

Cc: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 qapi/string-input-visitor.c       | 160 ++++++++++++++++++++++++++++++++++++--
 tests/test-string-input-visitor.c |  22 ++++++
 2 files changed, 176 insertions(+), 6 deletions(-)

diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
index a152f5d..4540ca3 100644
--- a/qapi/string-input-visitor.c
+++ b/qapi/string-input-visitor.c
@@ -15,30 +15,175 @@
 #include "qapi/visitor-impl.h"
 #include "qapi/qmp/qerror.h"
 
+enum ListMode {
+    LM_NONE,             /* not traversing a list of repeated options */
+    LM_STARTED,          /* start_list() succeeded */
+
+    LM_IN_PROGRESS,      /* next_list() has been called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently parsed QemuOpt instance of the repeated
+                          * option.
+                          *
+                          * Parsing a value into the list link will examine the
+                          * next QemuOpt instance of the repeated option, and
+                          * possibly enter LM_SIGNED_INTERVAL or
+                          * LM_UNSIGNED_INTERVAL.
+                          */
+
+    LM_SIGNED_INTERVAL,  /* next_list() has been called.
+                          *
+                          * Generating the next list link will consume the most
+                          * recently stored element from the signed interval,
+                          * parsed from the most recent QemuOpt instance of the
+                          * repeated option. This may consume QemuOpt itself
+                          * and return to LM_IN_PROGRESS.
+                          *
+                          * Parsing a value into the list link will store the
+                          * next element of the signed interval.
+                          */
+
+    LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */
+
+    LM_END
+};
+
+typedef enum ListMode ListMode;
+
 struct StringInputVisitor
 {
     Visitor visitor;
+
+    ListMode list_mode;
+
+    /* When parsing a list of repeating options as integers, values of the form
+     * "a-b", representing a closed interval, are allowed. Elements in the
+     * range are generated individually.
+     */
+    union {
+        int64_t s;
+        uint64_t u;
+    } range_next, range_limit;
+
     const char *string;
 };
 
+static void
+start_list(Visitor *v, const char *name, Error **errp)
+{
+    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+
+    /* we can't traverse a list in a list */
+    assert(siv->list_mode == LM_NONE);
+    siv->list_mode = LM_STARTED;
+}
+
+static GenericList *
+next_list(Visitor *v, GenericList **list, Error **errp)
+{
+    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+    GenericList **link;
+
+    switch (siv->list_mode) {
+    case LM_STARTED:
+        siv->list_mode = LM_IN_PROGRESS;
+        link = list;
+        break;
+
+    case LM_SIGNED_INTERVAL:
+    case LM_UNSIGNED_INTERVAL:
+        link = &(*list)->next;
+
+        if (siv->list_mode == LM_SIGNED_INTERVAL) {
+            if (siv->range_next.s < siv->range_limit.s) {
+                ++siv->range_next.s;
+                break;
+            }
+        } else if (siv->range_next.u < siv->range_limit.u) {
+            ++siv->range_next.u;
+            break;
+        }
+        siv->list_mode = LM_END;
+        /* range has been completed, fall through */
+
+    case LM_END:
+        return NULL;
+
+    case LM_IN_PROGRESS:
+        link = &(*list)->next;
+        break;
+
+    default:
+        abort();
+    }
+
+    *link = g_malloc0(sizeof **link);
+    return *link;
+}
+
+static void
+end_list(Visitor *v, Error **errp)
+{
+    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+
+    assert(siv->list_mode == LM_STARTED ||
+           siv->list_mode == LM_END ||
+           siv->list_mode == LM_IN_PROGRESS ||
+           siv->list_mode == LM_SIGNED_INTERVAL ||
+           siv->list_mode == LM_UNSIGNED_INTERVAL);
+    siv->list_mode = LM_NONE;
+}
+
 static void parse_type_int(Visitor *v, int64_t *obj, const char *name,
                            Error **errp)
 {
     StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
-    char *endp = (char *) siv->string;
+    char *str = (char *) siv->string;
     long long val;
+    char *endptr;
 
-    errno = 0;
-    if (siv->string) {
-        val = strtoll(siv->string, &endp, 0);
+    if (siv->list_mode == LM_SIGNED_INTERVAL) {
+        *obj = siv->range_next.s;
+        return;
     }
-    if (!siv->string || errno || endp == siv->string || *endp) {
+
+    if (!siv->string) {
         error_set(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                   "integer");
         return;
     }
 
-    *obj = val;
+    errno = 0;
+    val = strtoll(siv->string, &endptr, 0);
+
+    if (errno == 0 && endptr > str && INT64_MIN <= val && val <= INT64_MAX) {
+        if (*endptr == '\0') {
+            *obj = val;
+            siv->list_mode = LM_END;
+            return;
+        }
+        if (*endptr == '-' && siv->list_mode == LM_IN_PROGRESS) {
+            long long val2;
+
+            str = endptr + 1;
+            val2 = strtoll(str, &endptr, 0);
+            if (errno == 0 && endptr > str && *endptr == '\0' &&
+                INT64_MIN <= val2 && val2 <= INT64_MAX && val <= val2 &&
+                (val > INT64_MAX - 65536 ||
+                 val2 < val + 65536)) {
+                siv->range_next.s = val;
+                siv->range_limit.s = val2;
+                siv->list_mode = LM_SIGNED_INTERVAL;
+
+                /* as if entering on the top */
+                *obj = siv->range_next.s;
+                return;
+            }
+        }
+    }
+    error_set(errp, QERR_INVALID_PARAMETER_VALUE, name,
+              (siv->list_mode == LM_NONE) ? "an int64 value" :
+                                           "an int64 value or range");
 }
 
 static void parse_type_bool(Visitor *v, bool *obj, const char *name,
@@ -149,6 +294,9 @@ StringInputVisitor *string_input_visitor_new(const char *str)
     v->visitor.type_str = parse_type_str;
     v->visitor.type_number = parse_type_number;
     v->visitor.type_size = parse_type_size;
+    v->visitor.start_list = start_list;
+    v->visitor.next_list = next_list;
+    v->visitor.end_list = end_list;
     v->visitor.start_optional = parse_start_optional;
 
     v->string = str;
diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c
index 5989f81..3b47ddf 100644
--- a/tests/test-string-input-visitor.c
+++ b/tests/test-string-input-visitor.c
@@ -64,6 +64,26 @@ static void test_visitor_in_int(TestInputVisitorData *data,
     g_assert_cmpint(res, ==, value);
 }
 
+static void test_visitor_in_intList(TestInputVisitorData *data,
+                                    const void *unused)
+{
+    int64_t value[] = {-2, -1, 0, 1, 2, 3, 4};
+    int16List *res = NULL;
+    Error *errp = NULL;
+    Visitor *v;
+    int i = 0;
+
+    v = visitor_input_test_init(data, "-2-4");
+
+    visit_type_int16List(v, &res, NULL, &errp);
+    g_assert(!error_is_set(&errp));
+    while (res && i < sizeof(value) / sizeof(value[0])) {
+        printf("%d\n", res->value);
+        g_assert_cmpint(res->value, ==, value[i++]);
+        res = res->next;
+    }
+}
+
 static void test_visitor_in_bool(TestInputVisitorData *data,
                                  const void *unused)
 {
@@ -228,6 +248,8 @@ int main(int argc, char **argv)
 
     input_visitor_test_add("/string-visitor/input/int",
                            &in_visitor_data, test_visitor_in_int);
+    input_visitor_test_add("/string-visitor/input/intList",
+                           &in_visitor_data, test_visitor_in_intList);
     input_visitor_test_add("/string-visitor/input/bool",
                            &in_visitor_data, test_visitor_in_bool);
     input_visitor_test_add("/string-visitor/input/number",
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (10 preceding siblings ...)
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  9:08   ` Paolo Bonzini
  2014-02-19 11:23   ` Igor Mammedov
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields Hu Tao
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev Hu Tao
  13 siblings, 2 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

This new enum type will be used to set host memory policy of
backend host memory.

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 qapi-schema.json | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/qapi-schema.json b/qapi-schema.json
index 498ea9b..9d6370f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4454,3 +4454,23 @@
    '*cpus':   ['uint16'],
    '*memdev': 'str',
    '*mem':    'str' }}
+
+##
+# @HostMemPolicy
+#
+# Host memory policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred host node for allocation
+#
+# @membind: a strict policy that restricts memory allocation to the
+#           host nodes specified
+#
+# @interleave: the page allocations is interleaved across the set
+#              of host nodes specified
+#
+# Since 2.0
+##
+{ 'enum': 'HostMemPolicy',
+  'data': [ 'default', 'preferred', 'membind', 'interleave' ] }
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (11 preceding siblings ...)
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  9:03   ` Paolo Bonzini
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev Hu Tao
  13 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

Thus makes user control how to allocate memory for ram backend.

Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/sysemu/sysemu.h |   2 +
 2 files changed, 160 insertions(+)

diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index a496dbd..2da9341 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -10,23 +10,179 @@
  * See the COPYING file in the top-level directory.
  */
 #include "sysemu/hostmem.h"
+#include "sysemu/sysemu.h"
+#include "qemu/bitmap.h"
+#include "qapi-visit.h"
+#include "qemu/config-file.h"
+#include "qapi/opts-visitor.h"
 
 #define TYPE_MEMORY_BACKEND_RAM "memory-ram"
+#define MEMORY_BACKEND_RAM(obj) \
+    OBJECT_CHECK(HostMemoryBackendRam, (obj), TYPE_MEMORY_BACKEND_RAM)
 
+typedef struct HostMemoryBackendRam HostMemoryBackendRam;
+
+/**
+ * @HostMemoryBackendRam
+ *
+ * @parent: opaque parent object container
+ * @host_nodes: host nodes bitmap used for memory policy
+ * @policy: host memory policy
+ * @relative: if the host nodes bitmap is relative
+ */
+struct HostMemoryBackendRam {
+    /* private */
+    HostMemoryBackend parent;
+
+    DECLARE_BITMAP(host_nodes, MAX_NODES);
+    HostMemPolicy policy;
+    bool relative;
+};
+
+static void
+get_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
+               Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+    uint16List *host_nodes = NULL;
+    uint16List **node = &host_nodes;
+    unsigned long value;
+
+    value = find_first_bit(ram_backend->host_nodes, MAX_NODES);
+    if (value == MAX_NODES) {
+        return;
+    }
+
+    *node = g_malloc0(sizeof(**node));
+    (*node)->value = value;
+    node = &(*node)->next;
+
+    do {
+        value = find_next_bit(ram_backend->host_nodes, MAX_NODES, value + 1);
+        if (value == MAX_NODES) {
+            break;
+        }
+
+        *node = g_malloc0(sizeof(**node));
+        (*node)->value = value;
+        node = &(*node)->next;
+    } while (true);
+
+    visit_type_uint16List(v, &host_nodes, name, errp);
+}
+
+static void
+set_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
+               Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+    uint16List *l = NULL;
+
+    visit_type_uint16List(v, &l, name, errp);
+
+    while (l) {
+        bitmap_set(ram_backend->host_nodes, l->value, 1);
+        l = l->next;
+    }
+}
+
+static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
+    [HOST_MEM_POLICY_DEFAULT] = "default",
+    [HOST_MEM_POLICY_PREFERRED] = "preferred",
+    [HOST_MEM_POLICY_MEMBIND] = "membind",
+    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
+    [HOST_MEM_POLICY_MAX] = NULL,
+};
+
+static void
+get_policy(Object *obj, Visitor *v, void *opaque, const char *name,
+           Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+    int policy = ram_backend->policy;
+
+    visit_type_enum(v, &policy, policies, NULL, name, errp);
+}
+
+static void
+set_policy(Object *obj, Visitor *v, void *opaque, const char *name,
+           Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+    int policy;
+
+    visit_type_enum(v, &policy, policies, NULL, name, errp);
+    ram_backend->policy = policy;
+}
+
+
+static bool get_relative(Object *obj, Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+
+    return ram_backend->relative;
+}
+
+static void set_relative(Object *obj, bool value, Error **errp)
+{
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+
+    ram_backend->relative = value;
+}
+
+#include <sys/syscall.h>
+#ifndef MPOL_F_RELATIVE_NODES
+#define MPOL_F_RELATIVE_NODES (1 << 14)
+#define MPOL_F_STATIC_NODES   (1 << 15)
+#endif
 
 static int
 ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
 {
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
+    int mode = ram_backend->policy;
+    void *p;
+    unsigned long maxnode;
+
     if (!memory_region_size(&backend->mr)) {
         memory_region_init_ram(&backend->mr, OBJECT(backend),
                                object_get_canonical_path(OBJECT(backend)),
                                backend->size);
+
+        p = memory_region_get_ram_ptr(&backend->mr);
+        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
+
+        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
+            MPOL_F_STATIC_NODES;
+
+        /* This is a workaround for a long standing bug in Linux'
+         * mbind implementation, which cuts off the last specified
+         * node. To stay compatible should this bug be fixed, we
+         * specify one more node and zero this one out.
+         */
+        if (syscall(SYS_mbind, p, backend->size, mode,
+                    ram_backend->host_nodes, maxnode + 2, 0)) {
+            return -1;
+        }
     }
 
     return 0;
 }
 
 static void
+ram_backend_initfn(Object *obj)
+{
+    object_property_add(obj, "host-nodes", "int",
+                        get_host_nodes,
+                        set_host_nodes, NULL, NULL, NULL);
+    object_property_add(obj, "policy", "string",
+                        get_policy,
+                        set_policy, NULL, NULL, NULL);
+    object_property_add_bool(obj, "relative",
+                             get_relative, set_relative, NULL);
+}
+
+static void
 ram_backend_class_init(ObjectClass *oc, void *data)
 {
     HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
@@ -38,6 +194,8 @@ static const TypeInfo ram_backend_info = {
     .name = TYPE_MEMORY_BACKEND_RAM,
     .parent = TYPE_MEMORY_BACKEND,
     .class_init = ram_backend_class_init,
+    .instance_size = sizeof(HostMemoryBackendRam),
+    .instance_init = ram_backend_initfn,
 };
 
 static void register_types(void)
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index acfc0c7..a3d8c02 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -152,6 +152,8 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           QEMUMachineInitArgs *args);
 
+extern QemuOptsList qemu_memdev_opts;
+
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
     const char *name;
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev
  2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
                   ` (12 preceding siblings ...)
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields Hu Tao
@ 2014-02-19  7:54 ` Hu Tao
  2014-02-19  8:14   ` Hu Tao
  2014-02-19  9:07   ` Paolo Bonzini
  13 siblings, 2 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  7:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 backends/hostmem-ram.c | 71 ++++++++++++++++++++++++++++++++++++++++++++------
 qapi-schema.json       | 31 ++++++++++++++++++++++
 qmp-commands.hx        | 30 +++++++++++++++++++++
 3 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index 2da9341..9f19ab8 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -15,6 +15,7 @@
 #include "qapi-visit.h"
 #include "qemu/config-file.h"
 #include "qapi/opts-visitor.h"
+#include "qmp-commands.h"
 
 #define TYPE_MEMORY_BACKEND_RAM "memory-ram"
 #define MEMORY_BACKEND_RAM(obj) \
@@ -37,8 +38,66 @@ struct HostMemoryBackendRam {
     DECLARE_BITMAP(host_nodes, MAX_NODES);
     HostMemPolicy policy;
     bool relative;
+
+    QTAILQ_ENTRY(HostMemoryBackendRam) next;
+};
+
+static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
+    [HOST_MEM_POLICY_DEFAULT] = "default",
+    [HOST_MEM_POLICY_PREFERRED] = "preferred",
+    [HOST_MEM_POLICY_MEMBIND] = "membind",
+    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
+    [HOST_MEM_POLICY_MAX] = NULL,
 };
 
+static GSList *memdevs;
+
+static void func(gpointer data, gpointer user_data)
+{
+    HostMemoryBackendRam *backend = data;
+    MemdevList **list = user_data;
+    MemdevList *m;
+    uint16List **node;
+    unsigned long value;
+
+    m = g_malloc0(sizeof(*m));
+    m->value = g_malloc0(sizeof(*m->value));
+    m->value->policy = g_strdup(policies[backend->policy]);
+    m->value->relative = backend->relative;
+
+    node = &m->value->host_nodes;
+
+    value = find_first_bit(backend->host_nodes, MAX_NODES);
+    if (value < MAX_NODES) {
+        *node = g_malloc0(sizeof(**node));
+        (*node)->value = value;
+        node = &(*node)->next;
+
+        do {
+            value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
+            if (value == MAX_NODES) {
+                break;
+            }
+
+            *node = g_malloc0(sizeof(**node));
+            (*node)->value = value;
+            node = &(*node)->next;
+        } while (true);
+    }
+
+    m->next = *list;
+    *list = m;
+}
+
+MemdevList *qmp_query_memdev(Error **errp)
+{
+    MemdevList *list = NULL;
+
+    g_slist_foreach(memdevs, func, &list);
+
+    return list;
+}
+
 static void
 get_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
                Error **errp)
@@ -86,14 +145,6 @@ set_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
     }
 }
 
-static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
-    [HOST_MEM_POLICY_DEFAULT] = "default",
-    [HOST_MEM_POLICY_PREFERRED] = "preferred",
-    [HOST_MEM_POLICY_MEMBIND] = "membind",
-    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
-    [HOST_MEM_POLICY_MAX] = NULL,
-};
-
 static void
 get_policy(Object *obj, Visitor *v, void *opaque, const char *name,
            Error **errp)
@@ -172,6 +223,8 @@ ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
 static void
 ram_backend_initfn(Object *obj)
 {
+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
+
     object_property_add(obj, "host-nodes", "int",
                         get_host_nodes,
                         set_host_nodes, NULL, NULL, NULL);
@@ -180,6 +233,8 @@ ram_backend_initfn(Object *obj)
                         set_policy, NULL, NULL, NULL);
     object_property_add_bool(obj, "relative",
                              get_relative, set_relative, NULL);
+
+    memdevs = g_slist_append(memdevs, ram_backend);
 }
 
 static void
diff --git a/qapi-schema.json b/qapi-schema.json
index 9d6370f..7b5027d 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4474,3 +4474,34 @@
 ##
 { 'enum': 'HostMemPolicy',
   'data': [ 'default', 'preferred', 'membind', 'interleave' ] }
+
+##
+# @Memdev:
+#
+# Information of memory device
+#
+# @id: memory device id
+#
+# @host-nodes: host nodes for its memory policy
+#
+# @policy: memory policy of memory device
+#
+# Since: 2.0
+##
+
+{ 'type': 'Memdev',
+  'data': {
+    'host-nodes': ['uint16'],
+    'policy': 'str',
+    'relative': 'bool' }}
+
+##
+# @query-memdev:
+#
+# Returns information for all memory devices.
+#
+# Returns: a list of @Memdev.
+#
+# Since: 2.0
+##
+{ 'command': 'query-memdev', 'returns': ['Memdev'] }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cce6b81..20368f7 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3457,3 +3457,33 @@ Example:
                    } } ] }
 
 EQMP
+
+    {
+        .name       = "query-memdev",
+        .args_type  = "",
+        .mhandler.cmd_new = qmp_marshal_input_query_memdev,
+    },
+
+SQMP
+query-memdev
+------------
+
+Show memory devices information.
+
+
+Example (1):
+
+-> { "execute": "query-memdev" }
+<- { "return": [
+       {
+         "host-nodes": [0, 1],
+         "policy": "bind"
+       },
+       {
+         "host-nodes": [2, 3],
+         "policy": "preferred"
+       }
+     ]
+   }
+
+EQMP
-- 
1.8.5.2.229.g4448466


^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev Hu Tao
@ 2014-02-19  8:14   ` Hu Tao
  2014-02-19  9:07   ` Paolo Bonzini
  1 sibling, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-19  8:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

<...>

> +static void func(gpointer data, gpointer user_data)

maybe a better name.

> +{
> +    HostMemoryBackendRam *backend = data;
> +    MemdevList **list = user_data;
> +    MemdevList *m;
> +    uint16List **node;
> +    unsigned long value;
> +
> +    m = g_malloc0(sizeof(*m));
> +    m->value = g_malloc0(sizeof(*m->value));
> +    m->value->policy = g_strdup(policies[backend->policy]);
> +    m->value->relative = backend->relative;
> +
> +    node = &m->value->host_nodes;
> +
> +    value = find_first_bit(backend->host_nodes, MAX_NODES);
> +    if (value < MAX_NODES) {
> +        *node = g_malloc0(sizeof(**node));
> +        (*node)->value = value;
> +        node = &(*node)->next;
> +
> +        do {
> +            value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
> +            if (value == MAX_NODES) {
> +                break;
> +            }
> +
> +            *node = g_malloc0(sizeof(**node));
> +            (*node)->value = value;
> +            node = &(*node)->next;
> +        } while (true);
> +    }

It is useful to query also the size property. But it's a member of
parent class(HostMemoryBackend). I'm not sure what is the common
solution, but maybe we can add size to Memdev(see below) and fill it
with HostMemoryBackend::size?

> +
> +    m->next = *list;
> +    *list = m;
> +}
> +

<...>

> +
> +##
> +# @Memdev:
> +#
> +# Information of memory device
> +#
> +# @id: memory device id
> +#
> +# @host-nodes: host nodes for its memory policy
> +#
> +# @policy: memory policy of memory device
> +#
> +# Since: 2.0
> +##
> +
> +{ 'type': 'Memdev',
> +  'data': {
> +    'host-nodes': ['uint16'],
> +    'policy': 'str',
> +    'relative': 'bool' }}

add size to qeury HostMemoryBackend::size?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list Hu Tao
@ 2014-02-19  8:17   ` Hu Tao
  2014-02-19  8:42     ` Paolo Bonzini
  0 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-19  8:17 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

On Wed, Feb 19, 2014 at 03:54:02PM +0800, Hu Tao wrote:
> Cc: Laszlo Ersek <lersek@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  qapi/string-input-visitor.c       | 160 ++++++++++++++++++++++++++++++++++++--
>  tests/test-string-input-visitor.c |  22 ++++++
>  2 files changed, 176 insertions(+), 6 deletions(-)
> 
> diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
> index a152f5d..4540ca3 100644
> --- a/qapi/string-input-visitor.c
> +++ b/qapi/string-input-visitor.c
> @@ -15,30 +15,175 @@
>  #include "qapi/visitor-impl.h"
>  #include "qapi/qmp/qerror.h"
>  
> +enum ListMode {
> +    LM_NONE,             /* not traversing a list of repeated options */
> +    LM_STARTED,          /* start_list() succeeded */
> +
> +    LM_IN_PROGRESS,      /* next_list() has been called.
> +                          *
> +                          * Generating the next list link will consume the most
> +                          * recently parsed QemuOpt instance of the repeated
> +                          * option.
> +                          *
> +                          * Parsing a value into the list link will examine the
> +                          * next QemuOpt instance of the repeated option, and
> +                          * possibly enter LM_SIGNED_INTERVAL or
> +                          * LM_UNSIGNED_INTERVAL.
> +                          */
> +
> +    LM_SIGNED_INTERVAL,  /* next_list() has been called.
> +                          *
> +                          * Generating the next list link will consume the most
> +                          * recently stored element from the signed interval,
> +                          * parsed from the most recent QemuOpt instance of the
> +                          * repeated option. This may consume QemuOpt itself
> +                          * and return to LM_IN_PROGRESS.
> +                          *
> +                          * Parsing a value into the list link will store the
> +                          * next element of the signed interval.
> +                          */
> +
> +    LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */
> +
> +    LM_END
> +};
> +
> +typedef enum ListMode ListMode;
> +
>  struct StringInputVisitor
>  {
>      Visitor visitor;
> +
> +    ListMode list_mode;
> +
> +    /* When parsing a list of repeating options as integers, values of the form
> +     * "a-b", representing a closed interval, are allowed. Elements in the
> +     * range are generated individually.
> +     */
> +    union {
> +        int64_t s;
> +        uint64_t u;
> +    } range_next, range_limit;
> +
>      const char *string;
>  };
>  
> +static void
> +start_list(Visitor *v, const char *name, Error **errp)
> +{
> +    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
> +
> +    /* we can't traverse a list in a list */
> +    assert(siv->list_mode == LM_NONE);
> +    siv->list_mode = LM_STARTED;
> +}
> +
> +static GenericList *
> +next_list(Visitor *v, GenericList **list, Error **errp)
> +{
> +    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
> +    GenericList **link;
> +
> +    switch (siv->list_mode) {
> +    case LM_STARTED:
> +        siv->list_mode = LM_IN_PROGRESS;
> +        link = list;
> +        break;
> +
> +    case LM_SIGNED_INTERVAL:
> +    case LM_UNSIGNED_INTERVAL:
> +        link = &(*list)->next;
> +
> +        if (siv->list_mode == LM_SIGNED_INTERVAL) {
> +            if (siv->range_next.s < siv->range_limit.s) {
> +                ++siv->range_next.s;
> +                break;
> +            }
> +        } else if (siv->range_next.u < siv->range_limit.u) {
> +            ++siv->range_next.u;
> +            break;
> +        }
> +        siv->list_mode = LM_END;
> +        /* range has been completed, fall through */
> +
> +    case LM_END:
> +        return NULL;
> +
> +    case LM_IN_PROGRESS:
> +        link = &(*list)->next;
> +        break;
> +
> +    default:
> +        abort();
> +    }
> +
> +    *link = g_malloc0(sizeof **link);
> +    return *link;
> +}
> +
> +static void
> +end_list(Visitor *v, Error **errp)
> +{
> +    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
> +
> +    assert(siv->list_mode == LM_STARTED ||
> +           siv->list_mode == LM_END ||
> +           siv->list_mode == LM_IN_PROGRESS ||
> +           siv->list_mode == LM_SIGNED_INTERVAL ||
> +           siv->list_mode == LM_UNSIGNED_INTERVAL);
> +    siv->list_mode = LM_NONE;
> +}
> +
>  static void parse_type_int(Visitor *v, int64_t *obj, const char *name,
>                             Error **errp)
>  {
>      StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
> -    char *endp = (char *) siv->string;
> +    char *str = (char *) siv->string;
>      long long val;
> +    char *endptr;
>  
> -    errno = 0;
> -    if (siv->string) {
> -        val = strtoll(siv->string, &endp, 0);
> +    if (siv->list_mode == LM_SIGNED_INTERVAL) {
> +        *obj = siv->range_next.s;
> +        return;
>      }
> -    if (!siv->string || errno || endp == siv->string || *endp) {
> +
> +    if (!siv->string) {
>          error_set(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
>                    "integer");
>          return;
>      }
>  
> -    *obj = val;
> +    errno = 0;
> +    val = strtoll(siv->string, &endptr, 0);
> +
> +    if (errno == 0 && endptr > str && INT64_MIN <= val && val <= INT64_MAX) {
> +        if (*endptr == '\0') {
> +            *obj = val;
> +            siv->list_mode = LM_END;
> +            return;
> +        }
> +        if (*endptr == '-' && siv->list_mode == LM_IN_PROGRESS) {
> +            long long val2;
> +
> +            str = endptr + 1;
> +            val2 = strtoll(str, &endptr, 0);
> +            if (errno == 0 && endptr > str && *endptr == '\0' &&
> +                INT64_MIN <= val2 && val2 <= INT64_MAX && val <= val2 &&
> +                (val > INT64_MAX - 65536 ||
> +                 val2 < val + 65536)) {
> +                siv->range_next.s = val;
> +                siv->range_limit.s = val2;
> +                siv->list_mode = LM_SIGNED_INTERVAL;
> +
> +                /* as if entering on the top */
> +                *obj = siv->range_next.s;
> +                return;
> +            }
> +        }
> +    }
> +    error_set(errp, QERR_INVALID_PARAMETER_VALUE, name,
> +              (siv->list_mode == LM_NONE) ? "an int64 value" :
> +                                           "an int64 value or range");
>  }

Two problems:

1. the code is mostly copied from OptsVisitor. maybe we can share the
   code?

2. int list is not implemented in string outout visitor. but there is
   currently no user of it. Should we implement it or not?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list
  2014-02-19  8:17   ` Hu Tao
@ 2014-02-19  8:42     ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-19  8:42 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: imammedo, lersek, Wanlong Gao

Il 19/02/2014 09:17, Hu Tao ha scritto:
> Two problems:
>
> 1. the code is mostly copied from OptsVisitor. maybe we can share the
>    code?

I think it's not a huge problem.  Maybe OptsVisitor could be made to use 
a StringInputVisitor internally.

> 2. int list is not implemented in string outout visitor. but there is
>    currently no user of it. Should we implement it or not?

Yes, please.  We probably will add sooner or later a qom-get/qom-set 
pair of HMP commands and these will use the string output visitor.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields Hu Tao
@ 2014-02-19  9:03   ` Paolo Bonzini
  2014-02-19  9:36     ` Igor Mammedov
  2014-02-25 10:09     ` Hu Tao
  0 siblings, 2 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-19  9:03 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: imammedo, lersek, Wanlong Gao

  19/02/2014 08:54, Hu Tao ha scritto:
> Thus makes user control how to allocate memory for ram backend.
>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sysemu/sysemu.h |   2 +
>  2 files changed, 160 insertions(+)
>
> diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> index a496dbd..2da9341 100644
> --- a/backends/hostmem-ram.c
> +++ b/backends/hostmem-ram.c
> @@ -10,23 +10,179 @@
>   * See the COPYING file in the top-level directory.
>   */
>  #include "sysemu/hostmem.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/bitmap.h"
> +#include "qapi-visit.h"
> +#include "qemu/config-file.h"
> +#include "qapi/opts-visitor.h"
>
>  #define TYPE_MEMORY_BACKEND_RAM "memory-ram"
> +#define MEMORY_BACKEND_RAM(obj) \
> +    OBJECT_CHECK(HostMemoryBackendRam, (obj), TYPE_MEMORY_BACKEND_RAM)
>
> +typedef struct HostMemoryBackendRam HostMemoryBackendRam;
> +
> +/**
> + * @HostMemoryBackendRam
> + *
> + * @parent: opaque parent object container
> + * @host_nodes: host nodes bitmap used for memory policy
> + * @policy: host memory policy
> + * @relative: if the host nodes bitmap is relative
> + */
> +struct HostMemoryBackendRam {
> +    /* private */
> +    HostMemoryBackend parent;
> +
> +    DECLARE_BITMAP(host_nodes, MAX_NODES);
> +    HostMemPolicy policy;
> +    bool relative;
> +};
> +
> +static void
> +get_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
> +               Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +    uint16List *host_nodes = NULL;
> +    uint16List **node = &host_nodes;
> +    unsigned long value;
> +
> +    value = find_first_bit(ram_backend->host_nodes, MAX_NODES);
> +    if (value == MAX_NODES) {
> +        return;
> +    }
> +
> +    *node = g_malloc0(sizeof(**node));
> +    (*node)->value = value;
> +    node = &(*node)->next;
> +
> +    do {
> +        value = find_next_bit(ram_backend->host_nodes, MAX_NODES, value + 1);
> +        if (value == MAX_NODES) {
> +            break;
> +        }
> +
> +        *node = g_malloc0(sizeof(**node));
> +        (*node)->value = value;
> +        node = &(*node)->next;
> +    } while (true);
> +
> +    visit_type_uint16List(v, &host_nodes, name, errp);
> +}
> +
> +static void
> +set_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
> +               Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +    uint16List *l = NULL;
> +
> +    visit_type_uint16List(v, &l, name, errp);
> +
> +    while (l) {
> +        bitmap_set(ram_backend->host_nodes, l->value, 1);
> +        l = l->next;
> +    }
> +}
> +
> +static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
> +    [HOST_MEM_POLICY_DEFAULT] = "default",
> +    [HOST_MEM_POLICY_PREFERRED] = "preferred",
> +    [HOST_MEM_POLICY_MEMBIND] = "membind",
> +    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
> +    [HOST_MEM_POLICY_MAX] = NULL,
> +};

This is already available in qapi-types.c as HostMemPolicy_lookup.

> +static void
> +get_policy(Object *obj, Visitor *v, void *opaque, const char *name,
> +           Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +    int policy = ram_backend->policy;
> +
> +    visit_type_enum(v, &policy, policies, NULL, name, errp);
> +}
> +
> +static void
> +set_policy(Object *obj, Visitor *v, void *opaque, const char *name,
> +           Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +    int policy;
> +
> +    visit_type_enum(v, &policy, policies, NULL, name, errp);
> +    ram_backend->policy = policy;

I think you need to set an error if backend->mr != NULL.

> +}
> +
> +
> +static bool get_relative(Object *obj, Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +
> +    return ram_backend->relative;
> +}
> +
> +static void set_relative(Object *obj, bool value, Error **errp)
> +{
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +
> +    ram_backend->relative = value;
> +}

Do we need relative vs. static?  Also, the default right now is static, 
while in Linux kernel this is a tri-state: relative, static, default.

I think that for now we should just omit this and only allow the default 
setting.  We can introduce an enum later without make the API 
backwards-incompatible.

> +#include <sys/syscall.h>
> +#ifndef MPOL_F_RELATIVE_NODES
> +#define MPOL_F_RELATIVE_NODES (1 << 14)
> +#define MPOL_F_STATIC_NODES   (1 << 15)
> +#endif
>
>  static int
>  ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
>  {
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> +    int mode = ram_backend->policy;
> +    void *p;
> +    unsigned long maxnode;
> +
>      if (!memory_region_size(&backend->mr)) {
>          memory_region_init_ram(&backend->mr, OBJECT(backend),
>                                 object_get_canonical_path(OBJECT(backend)),
>                                 backend->size);
> +
> +        p = memory_region_get_ram_ptr(&backend->mr);
> +        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> +
> +        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> +            MPOL_F_STATIC_NODES;
> +        /* This is a workaround for a long standing bug in Linux'
> +         * mbind implementation, which cuts off the last specified
> +         * node. To stay compatible should this bug be fixed, we
> +         * specify one more node and zero this one out.
> +         */
> +        if (syscall(SYS_mbind, p, backend->size, mode,
> +                    ram_backend->host_nodes, maxnode + 2, 0)) {

This does not compile on non-Linux; also, does libnuma include the 
workaround?  If so, this is a hint that we should be using libnuma 
instead...

Finally, all this code should be in hostmem.c, not hostmem-ram.c, 
because the same policies can be applied to hugepage-backed memory.

Currently host_memory_backend_get_memory is calling bc->memory_init. 
Probably the call should be replaced by something like

static void
host_memory_backend_alloc(HostMemoryBackend *backend, Error **errp)
{
     Error *local_err = NULL;
     bc->memory_init(backend, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         return;
     }

     ... set policy ...
}

...

     Error *local_err = NULL;
     host_memory_backend_alloc(backend, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         return NULL;
     }

     assert(memory_region_size(&backend->mr) != 0);
     return &backend->mr;
}

> +            return -1;
> +        }
>      }
>
>      return 0;
>  }
>
>  static void
> +ram_backend_initfn(Object *obj)
> +{
> +    object_property_add(obj, "host-nodes", "int",
> +                        get_host_nodes,
> +                        set_host_nodes, NULL, NULL, NULL);
> +    object_property_add(obj, "policy", "string",

The convention is "str".

> +                        get_policy,
> +                        set_policy, NULL, NULL, NULL);
> +    object_property_add_bool(obj, "relative",
> +                             get_relative, set_relative, NULL);
> +}
> +
> +static void
>  ram_backend_class_init(ObjectClass *oc, void *data)
>  {
>      HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
> @@ -38,6 +194,8 @@ static const TypeInfo ram_backend_info = {
>      .name = TYPE_MEMORY_BACKEND_RAM,
>      .parent = TYPE_MEMORY_BACKEND,
>      .class_init = ram_backend_class_init,
> +    .instance_size = sizeof(HostMemoryBackendRam),
> +    .instance_init = ram_backend_initfn,
>  };
>
>  static void register_types(void)
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index acfc0c7..a3d8c02 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -152,6 +152,8 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
>                                            const char *name,
>                                            QEMUMachineInitArgs *args);
>
> +extern QemuOptsList qemu_memdev_opts;
> +

Not needed anymore, I think.

Paolo

>  #define MAX_OPTION_ROMS 16
>  typedef struct QEMUOptionRom {
>      const char *name;
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev Hu Tao
  2014-02-19  8:14   ` Hu Tao
@ 2014-02-19  9:07   ` Paolo Bonzini
  1 sibling, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-19  9:07 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: imammedo, lersek, Wanlong Gao

Il 19/02/2014 08:54, Hu Tao ha scritto:
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  backends/hostmem-ram.c | 71 ++++++++++++++++++++++++++++++++++++++++++++------
>  qapi-schema.json       | 31 ++++++++++++++++++++++
>  qmp-commands.hx        | 30 +++++++++++++++++++++
>  3 files changed, 124 insertions(+), 8 deletions(-)

This is in principle not necessary, because we can query everything via 
qom-get/qom-set; but I can see that it is useful.  If you want this:

(1) please put it in numa.c and code it in a way that does not use 
internal information of HostMemoryBackend; for example, you can walk 
numa_info[...].node_memdev and use object_property_get/set on the object 
to fill in the result.

This will also eliminate all duplicate code between func and the 
property visitors.

(2) please add an HMP variant "info memdev".

Paolo

> diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> index 2da9341..9f19ab8 100644
> --- a/backends/hostmem-ram.c
> +++ b/backends/hostmem-ram.c
> @@ -15,6 +15,7 @@
>  #include "qapi-visit.h"
>  #include "qemu/config-file.h"
>  #include "qapi/opts-visitor.h"
> +#include "qmp-commands.h"
>
>  #define TYPE_MEMORY_BACKEND_RAM "memory-ram"
>  #define MEMORY_BACKEND_RAM(obj) \
> @@ -37,8 +38,66 @@ struct HostMemoryBackendRam {
>      DECLARE_BITMAP(host_nodes, MAX_NODES);
>      HostMemPolicy policy;
>      bool relative;
> +
> +    QTAILQ_ENTRY(HostMemoryBackendRam) next;
> +};
> +
> +static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
> +    [HOST_MEM_POLICY_DEFAULT] = "default",
> +    [HOST_MEM_POLICY_PREFERRED] = "preferred",
> +    [HOST_MEM_POLICY_MEMBIND] = "membind",
> +    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
> +    [HOST_MEM_POLICY_MAX] = NULL,
>  };
>
> +static GSList *memdevs;
> +
> +static void func(gpointer data, gpointer user_data)
> +{
> +    HostMemoryBackendRam *backend = data;
> +    MemdevList **list = user_data;
> +    MemdevList *m;
> +    uint16List **node;
> +    unsigned long value;
> +
> +    m = g_malloc0(sizeof(*m));
> +    m->value = g_malloc0(sizeof(*m->value));
> +    m->value->policy = g_strdup(policies[backend->policy]);
> +    m->value->relative = backend->relative;
> +
> +    node = &m->value->host_nodes;
> +
> +    value = find_first_bit(backend->host_nodes, MAX_NODES);
> +    if (value < MAX_NODES) {
> +        *node = g_malloc0(sizeof(**node));
> +        (*node)->value = value;
> +        node = &(*node)->next;
> +
> +        do {
> +            value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
> +            if (value == MAX_NODES) {
> +                break;
> +            }
> +
> +            *node = g_malloc0(sizeof(**node));
> +            (*node)->value = value;
> +            node = &(*node)->next;
> +        } while (true);
> +    }
> +
> +    m->next = *list;
> +    *list = m;
> +}
> +
> +MemdevList *qmp_query_memdev(Error **errp)
> +{
> +    MemdevList *list = NULL;
> +
> +    g_slist_foreach(memdevs, func, &list);
> +
> +    return list;
> +}
> +
>  static void
>  get_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
>                 Error **errp)
> @@ -86,14 +145,6 @@ set_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
>      }
>  }
>
> -static const char *policies[HOST_MEM_POLICY_MAX + 1] = {
> -    [HOST_MEM_POLICY_DEFAULT] = "default",
> -    [HOST_MEM_POLICY_PREFERRED] = "preferred",
> -    [HOST_MEM_POLICY_MEMBIND] = "membind",
> -    [HOST_MEM_POLICY_INTERLEAVE] = "interleave",
> -    [HOST_MEM_POLICY_MAX] = NULL,
> -};
> -
>  static void
>  get_policy(Object *obj, Visitor *v, void *opaque, const char *name,
>             Error **errp)
> @@ -172,6 +223,8 @@ ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
>  static void
>  ram_backend_initfn(Object *obj)
>  {
> +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(obj);
> +
>      object_property_add(obj, "host-nodes", "int",
>                          get_host_nodes,
>                          set_host_nodes, NULL, NULL, NULL);
> @@ -180,6 +233,8 @@ ram_backend_initfn(Object *obj)
>                          set_policy, NULL, NULL, NULL);
>      object_property_add_bool(obj, "relative",
>                               get_relative, set_relative, NULL);
> +
> +    memdevs = g_slist_append(memdevs, ram_backend);
>  }
>
>  static void
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 9d6370f..7b5027d 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4474,3 +4474,34 @@
>  ##
>  { 'enum': 'HostMemPolicy',
>    'data': [ 'default', 'preferred', 'membind', 'interleave' ] }
> +
> +##
> +# @Memdev:
> +#
> +# Information of memory device
> +#
> +# @id: memory device id
> +#
> +# @host-nodes: host nodes for its memory policy
> +#
> +# @policy: memory policy of memory device
> +#
> +# Since: 2.0
> +##
> +
> +{ 'type': 'Memdev',
> +  'data': {
> +    'host-nodes': ['uint16'],
> +    'policy': 'str',
> +    'relative': 'bool' }}
> +
> +##
> +# @query-memdev:
> +#
> +# Returns information for all memory devices.
> +#
> +# Returns: a list of @Memdev.
> +#
> +# Since: 2.0
> +##
> +{ 'command': 'query-memdev', 'returns': ['Memdev'] }
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index cce6b81..20368f7 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -3457,3 +3457,33 @@ Example:
>                     } } ] }
>
>  EQMP
> +
> +    {
> +        .name       = "query-memdev",
> +        .args_type  = "",
> +        .mhandler.cmd_new = qmp_marshal_input_query_memdev,
> +    },
> +
> +SQMP
> +query-memdev
> +------------
> +
> +Show memory devices information.
> +
> +
> +Example (1):
> +
> +-> { "execute": "query-memdev" }
> +<- { "return": [
> +       {
> +         "host-nodes": [0, 1],
> +         "policy": "bind"
> +       },
> +       {
> +         "host-nodes": [2, 3],
> +         "policy": "preferred"
> +       }
> +     ]
> +   }
> +
> +EQMP
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type Hu Tao
@ 2014-02-19  9:08   ` Paolo Bonzini
  2014-02-19 11:23   ` Igor Mammedov
  1 sibling, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-19  9:08 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: imammedo, lersek, Wanlong Gao

Il 19/02/2014 08:54, Hu Tao ha scritto:
> From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
>
> This new enum type will be used to set host memory policy of
> backend host memory.
>
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  qapi-schema.json | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 498ea9b..9d6370f 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4454,3 +4454,23 @@
>     '*cpus':   ['uint16'],
>     '*memdev': 'str',
>     '*mem':    'str' }}
> +
> +##
> +# @HostMemPolicy
> +#
> +# Host memory policy types
> +#
> +# @default: restore default policy, remove any nondefault policy
> +#
> +# @preferred: set the preferred host node for allocation

host nodes

> +#
> +# @membind: a strict policy that restricts memory allocation to the
> +#           host nodes specified
> +#
> +# @interleave: the page allocations is interleaved across the set

@interleave: memory allocations are interleaved across ...

> +#              of host nodes specified
> +#
> +# Since 2.0
> +##
> +{ 'enum': 'HostMemPolicy',
> +  'data': [ 'default', 'preferred', 'membind', 'interleave' ] }
>

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure Hu Tao
@ 2014-02-19  9:15   ` Igor Mammedov
  0 siblings, 0 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19  9:15 UTC (permalink / raw)
  To: Hu Tao; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 19 Feb 2014 15:53:58 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> Provides framework for splitting host RAM allocation/
> policies into a separate backend that could be used
> by devices.
> 
> Initially only legacy RAM backend is provided, which
> uses memory_region_init_ram() allocator and compatible
> with every CLI option that affects memory_region_init_ram().
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> 
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
this doesn't use complete() mathod of user_creatable

pls, pickup more correct patch:

https://github.com/imammedo/qemu/commit/19a4c596f4fab35706335e6dd01e94413195aa30

>  backends/Makefile.objs   |   2 +
>  backends/hostmem-ram.c   |  48 ++++++++++++++++++
>  backends/hostmem.c       | 125 +++++++++++++++++++++++++++++++++++++++++++++++
>  include/sysemu/hostmem.h |  63 ++++++++++++++++++++++++
>  4 files changed, 238 insertions(+)
>  create mode 100644 backends/hostmem-ram.c
>  create mode 100644 backends/hostmem.c
>  create mode 100644 include/sysemu/hostmem.h
> 
> diff --git a/backends/Makefile.objs b/backends/Makefile.objs
> index 42557d5..e6bdc11 100644
> --- a/backends/Makefile.objs
> +++ b/backends/Makefile.objs
> @@ -6,3 +6,5 @@ common-obj-$(CONFIG_BRLAPI) += baum.o
>  $(obj)/baum.o: QEMU_CFLAGS += $(SDL_CFLAGS) 
>  
>  common-obj-$(CONFIG_TPM) += tpm.o
> +
> +common-obj-y += hostmem.o hostmem-ram.o
> diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> new file mode 100644
> index 0000000..a496dbd
> --- /dev/null
> +++ b/backends/hostmem-ram.c
> @@ -0,0 +1,48 @@
> +/*
> + * QEMU Host Memory Backend
> + *
> + * Copyright (C) 2013 Red Hat Inc
> + *
> + * Authors:
> + *   Igor Mammedov <imammedo@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +#include "sysemu/hostmem.h"
> +
> +#define TYPE_MEMORY_BACKEND_RAM "memory-ram"
> +
> +
> +static int
> +ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> +{
> +    if (!memory_region_size(&backend->mr)) {
> +        memory_region_init_ram(&backend->mr, OBJECT(backend),
> +                               object_get_canonical_path(OBJECT(backend)),
> +                               backend->size);
> +    }
> +
> +    return 0;
> +}
> +
> +static void
> +ram_backend_class_init(ObjectClass *oc, void *data)
> +{
> +    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
> +
> +    bc->memory_init = ram_backend_memory_init;
> +}
> +
> +static const TypeInfo ram_backend_info = {
> +    .name = TYPE_MEMORY_BACKEND_RAM,
> +    .parent = TYPE_MEMORY_BACKEND,
> +    .class_init = ram_backend_class_init,
> +};
> +
> +static void register_types(void)
> +{
> +    type_register_static(&ram_backend_info);
> +}
> +
> +type_init(register_types);
> diff --git a/backends/hostmem.c b/backends/hostmem.c
> new file mode 100644
> index 0000000..4b8fd8d
> --- /dev/null
> +++ b/backends/hostmem.c
> @@ -0,0 +1,125 @@
> +/*
> + * QEMU Host Memory Backend
> + *
> + * Copyright (C) 2013 Red Hat Inc
> + *
> + * Authors:
> + *   Igor Mammedov <imammedo@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +#include "sysemu/hostmem.h"
> +#include "sysemu/sysemu.h"
> +#include "qapi/visitor.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qemu/config-file.h"
> +#include "qom/object_interfaces.h"
> +
> +static void
> +hostmemory_backend_get_size(Object *obj, Visitor *v, void *opaque,
> +                            const char *name, Error **errp)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
> +    uint64_t value = backend->size;
> +
> +    visit_type_size(v, &value, name, errp);
> +}
> +
> +static void
> +hostmemory_backend_set_size(Object *obj, Visitor *v, void *opaque,
> +                            const char *name, Error **errp)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
> +    uint64_t value;
> +
> +    if (memory_region_size(&backend->mr)) {
> +        error_setg(errp, "cannot change property value\n");
> +        return;
> +    }
> +
> +    visit_type_size(v, &value, name, errp);
> +    if (error_is_set(errp)) {
> +        return;
> +    }
> +    if (!value) {
> +        error_setg(errp, "Property '%s.%s' doesn't take value '%" PRIu64 "'",
> +                   object_get_typename(obj), name , value);
> +        return;
> +    }
> +    backend->size = value;
> +}
> +
> +static void hostmemory_backend_initfn(Object *obj)
> +{
> +    object_property_add(obj, "size", "int",
> +                        hostmemory_backend_get_size,
> +                        hostmemory_backend_set_size, NULL, NULL, NULL);
> +}
> +
> +static void hostmemory_backend_finalize(Object *obj)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
> +
> +    if (memory_region_size(&backend->mr)) {
> +        memory_region_destroy(&backend->mr);
> +    }
> +}
> +
> +static int
> +hostmemory_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> +{
> +    error_setg(errp, "memory_init is not implemented for type [%s]",
> +               object_get_typename(OBJECT(backend)));
> +
> +    return -1;
> +}
> +
> +MemoryRegion *
> +host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
> +{
> +    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(backend);
> +    Object *obj = OBJECT(backend);
> +
> +    if (!backend->size) {
> +        error_setg(errp, "Invalid property [%s.size] value: %" PRIu64,
> +                   object_get_typename(obj), backend->size);
> +        return NULL;
> +    }
> +
> +    if (bc->memory_init(backend, errp) < 0) {
> +        return NULL;
> +    }
> +
> +    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
> +}
> +
> +static void
> +hostmemory_backend_class_init(ObjectClass *oc, void *data)
> +{
> +    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
> +
> +    bc->memory_init = hostmemory_backend_memory_init;
> +}
> +
> +static const TypeInfo hostmemory_backend_info = {
> +    .name = TYPE_MEMORY_BACKEND,
> +    .parent = TYPE_OBJECT,
> +    .abstract = true,
> +    .class_size = sizeof(HostMemoryBackendClass),
> +    .class_init = hostmemory_backend_class_init,
> +    .instance_size = sizeof(HostMemoryBackend),
> +    .instance_init = hostmemory_backend_initfn,
> +    .instance_finalize = hostmemory_backend_finalize,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_USER_CREATABLE },
> +        { }
> +    }
> +};
> +
> +static void register_types(void)
> +{
> +    type_register_static(&hostmemory_backend_info);
> +}
> +
> +type_init(register_types);
> diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
> new file mode 100644
> index 0000000..5847943
> --- /dev/null
> +++ b/include/sysemu/hostmem.h
> @@ -0,0 +1,63 @@
> +/*
> + * QEMU Host Memory Backend
> + *
> + * Copyright (C) 2013 Red Hat Inc
> + *
> + * Authors:
> + *   Igor Mammedov <imammedo@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +#ifndef QEMU_RAM_H
> +#define QEMU_RAM_H
> +
> +#include "qom/object.h"
> +#include "qapi/error.h"
> +#include "exec/memory.h"
> +#include "qemu/option.h"
> +
> +#define TYPE_MEMORY_BACKEND "memory"
> +#define MEMORY_BACKEND(obj) \
> +    OBJECT_CHECK(HostMemoryBackend, (obj), TYPE_MEMORY_BACKEND)
> +#define MEMORY_BACKEND_GET_CLASS(obj) \
> +    OBJECT_GET_CLASS(HostMemoryBackendClass, (obj), TYPE_MEMORY_BACKEND)
> +#define MEMORY_BACKEND_CLASS(klass) \
> +    OBJECT_CLASS_CHECK(HostMemoryBackendClass, (klass), TYPE_MEMORY_BACKEND)
> +
> +typedef struct HostMemoryBackend HostMemoryBackend;
> +typedef struct HostMemoryBackendClass HostMemoryBackendClass;
> +
> +/**
> + * HostMemoryBackendClass:
> + * @parent_class: opaque parent class container
> + * @memory_init: hook for derived classes to perform memory allocation
> + */
> +struct HostMemoryBackendClass {
> +    ObjectClass parent_class;
> +
> +    int (*memory_init)(HostMemoryBackend *backend, Error **errp);
> +};
> +
> +/**
> + * @HostMemoryBackend
> + *
> + * @parent: opaque parent object container
> + * @size: amount of memory backend provides
> + * @id: unique identification string in memdev namespace
> + * @mr: MemoryRegion representing host memory belonging to backend
> + */
> +struct HostMemoryBackend {
> +    /* private */
> +    Object parent;
> +
> +    /* protected */
> +    uint64_t size;
> +
> +    MemoryRegion mr;
> +};
> +
> +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
> +                                             Error **errp);
> +
> +#endif

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info Hu Tao
@ 2014-02-19  9:26   ` Igor Mammedov
  2014-02-21  2:54     ` hu tao
  0 siblings, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19  9:26 UTC (permalink / raw)
  To: Hu Tao; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao, Andre Przywara

On Wed, 19 Feb 2014 15:53:54 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> 
> Add the numa_info structure to contain the numa nodes memory,
> VCPUs information and the future added numa nodes host memory
> policies.
this is old version that breaks spar build which Wanlong already fixed.

You can replace patches 1-5 with more recent ones posted recently:
http://www.mail-archive.com/qemu-devel@nongnu.org/msg216404.html

> 
> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  hw/i386/pc.c            | 12 ++++++++----
>  include/sysemu/sysemu.h |  8 ++++++--
>  monitor.c               |  2 +-
>  numa.c                  | 23 ++++++++++++-----------
>  vl.c                    |  7 +++----
>  5 files changed, 30 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index e715a33..a464e48 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -674,14 +674,14 @@ static FWCfgState *bochs_bios_init(void)
>          unsigned int apic_id = x86_cpu_apic_id_from_index(i);
>          assert(apic_id < apic_id_limit);
>          for (j = 0; j < nb_numa_nodes; j++) {
> -            if (test_bit(i, node_cpumask[j])) {
> +            if (test_bit(i, numa_info[j].node_cpu)) {
>                  numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
>                  break;
>              }
>          }
>      }
>      for (i = 0; i < nb_numa_nodes; i++) {
> -        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
> +        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
>      }
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
>                       (1 + apic_id_limit + nb_numa_nodes) *
> @@ -1077,8 +1077,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
>      guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
>      guest_info->apic_xrupt_override = kvm_allows_irq0_override();
>      guest_info->numa_nodes = nb_numa_nodes;
> -    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
> +    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
>                                      sizeof *guest_info->node_mem);
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        guest_info->node_mem[i] = numa_info[i].node_mem;
> +    }
> +
>      guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
>                                       sizeof *guest_info->node_cpu);
>  
> @@ -1086,7 +1090,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
>          unsigned int apic_id = x86_cpu_apic_id_from_index(i);
>          assert(apic_id < guest_info->apic_id_limit);
>          for (j = 0; j < nb_numa_nodes; j++) {
> -            if (test_bit(i, node_cpumask[j])) {
> +            if (test_bit(i, numa_info[j].node_cpu)) {
>                  guest_info->node_cpu[apic_id] = j;
>                  break;
>              }
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 2509649..d873b42 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -9,6 +9,7 @@
>  #include "qapi-types.h"
>  #include "qemu/notify.h"
>  #include "qemu/main-loop.h"
> +#include "qemu/bitmap.h"
>  
>  /* vl.c */
>  
> @@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock;
>  #define MAX_NODES 64
>  #define MAX_CPUMASK_BITS 255
>  extern int nb_numa_nodes;
> -extern uint64_t node_mem[MAX_NODES];
> -extern unsigned long *node_cpumask[MAX_NODES];
> +typedef struct node_info {
> +    uint64_t node_mem;
> +    DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
> +} NodeInfo;
> +extern NodeInfo numa_info[MAX_NODES];
>  void numa_add(const char *optarg);
>  void set_numa_nodes(void);
>  void set_numa_modes(void);
> diff --git a/monitor.c b/monitor.c
> index 690c152..0284735 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
>          }
>          monitor_printf(mon, "\n");
>          monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
> -            node_mem[i] >> 20);
> +            numa_info[i].node_mem >> 20);
>      }
>  }
>  
> diff --git a/numa.c b/numa.c
> index a06e2d1..1f413a0 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus)
>          goto error;
>      }
>  
> -    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
> +    bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
>      return;
>  
>  error:
> @@ -101,7 +101,7 @@ void numa_add(const char *optarg)
>          }
>  
>          if (get_param_value(option, 128, "mem", optarg) == 0) {
> -            node_mem[nodenr] = 0;
> +            numa_info[nodenr].node_mem = 0;
>          } else {
>              int64_t sval;
>              sval = strtosz(option, &endptr);
> @@ -109,7 +109,7 @@ void numa_add(const char *optarg)
>                  fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
>                  exit(1);
>              }
> -            node_mem[nodenr] = sval;
> +            numa_info[nodenr].node_mem = sval;
>          }
>          if (get_param_value(option, 128, "cpus", optarg) != 0) {
>              numa_node_parse_cpus(nodenr, option);
> @@ -134,7 +134,7 @@ void set_numa_nodes(void)
>           * and distribute the available memory equally across all nodes
>           */
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            if (node_mem[i] != 0) {
> +            if (numa_info[i].node_mem != 0) {
>                  break;
>              }
>          }
> @@ -145,15 +145,16 @@ void set_numa_nodes(void)
>               * the final node gets the rest.
>               */
>              for (i = 0; i < nb_numa_nodes - 1; i++) {
> -                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
> -                usedmem += node_mem[i];
> +                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
> +                                        ~((1 << 23UL) - 1);
> +                usedmem += numa_info[i].node_mem;
>              }
> -            node_mem[i] = ram_size - usedmem;
> +            numa_info[i].node_mem = ram_size - usedmem;
>          }
>  
>          uint64_t numa_total = 0;
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            numa_total += node_mem[i];
> +            numa_total += numa_info[i].node_mem;
>          }
>          if (numa_total != ram_size) {
>              fprintf(stderr, "qemu: numa nodes total memory size "
> @@ -162,7 +163,7 @@ void set_numa_nodes(void)
>          }
>  
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
> +            if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
>                  break;
>              }
>          }
> @@ -172,7 +173,7 @@ void set_numa_nodes(void)
>           */
>          if (i == nb_numa_nodes) {
>              for (i = 0; i < max_cpus; i++) {
> -                set_bit(i, node_cpumask[i % nb_numa_nodes]);
> +                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
>              }
>          }
>      }
> @@ -185,7 +186,7 @@ void set_numa_modes(void)
>  
>      CPU_FOREACH(cpu) {
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
> +            if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
>                  cpu->numa_node = i;
>              }
>          }
> diff --git a/vl.c b/vl.c
> index 0adac0c..915f8b7 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -196,8 +196,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
>      QTAILQ_HEAD_INITIALIZER(fw_boot_order);
>  
>  int nb_numa_nodes;
> -uint64_t node_mem[MAX_NODES];
> -unsigned long *node_cpumask[MAX_NODES];
> +NodeInfo numa_info[MAX_NODES];
>  
>  uint8_t qemu_uuid[16];
>  bool qemu_uuid_set;
> @@ -2787,8 +2786,8 @@ int main(int argc, char **argv, char **envp)
>      translation = BIOS_ATA_TRANSLATION_AUTO;
>  
>      for (i = 0; i < MAX_NODES; i++) {
> -        node_mem[i] = 0;
> -        node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
> +        numa_info[i].node_mem = 0;
> +        bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
>      }
>  
>      nb_numa_nodes = 0;

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  9:03   ` Paolo Bonzini
@ 2014-02-19  9:36     ` Igor Mammedov
  2014-02-25 10:20       ` Hu Tao
  2014-02-26  5:57       ` Hu Tao
  2014-02-25 10:09     ` Hu Tao
  1 sibling, 2 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19  9:36 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Hu Tao, lersek, qemu-devel, Wanlong Gao

On Wed, 19 Feb 2014 10:03:13 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

>   19/02/2014 08:54, Hu Tao ha scritto:
> > Thus makes user control how to allocate memory for ram backend.
> >
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> >  backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/sysemu/sysemu.h |   2 +
> >  2 files changed, 160 insertions(+)
> >
> > diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
[...]

> >  static int
> >  ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> >  {
> > +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> > +    int mode = ram_backend->policy;
> > +    void *p;
> > +    unsigned long maxnode;
> > +
> >      if (!memory_region_size(&backend->mr)) {
> >          memory_region_init_ram(&backend->mr, OBJECT(backend),
> >                                 object_get_canonical_path(OBJECT(backend)),
> >                                 backend->size);
> > +
> > +        p = memory_region_get_ram_ptr(&backend->mr);
> > +        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> > +
> > +        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> > +            MPOL_F_STATIC_NODES;
> > +        /* This is a workaround for a long standing bug in Linux'
> > +         * mbind implementation, which cuts off the last specified
> > +         * node. To stay compatible should this bug be fixed, we
> > +         * specify one more node and zero this one out.
> > +         */
> > +        if (syscall(SYS_mbind, p, backend->size, mode,
> > +                    ram_backend->host_nodes, maxnode + 2, 0)) {
> 
> This does not compile on non-Linux; also, does libnuma include the 
> workaround?  If so, this is a hint that we should be using libnuma 
> instead...
> 
> Finally, all this code should be in hostmem.c, not hostmem-ram.c, 
> because the same policies can be applied to hugepage-backed memory.
> 
> Currently host_memory_backend_get_memory is calling bc->memory_init. 
> Probably the call should be replaced by something like
I've pushed to github updated version of memdev, where
host_memory_backend_get_memory() is just convenience wrapper to get
access to memdev's internal MemoryRegion.

All initialization now is done in user_creatable->complete() method
which calls ram_backend_memory_init() so leaving it as is should be fine.

> 
> static void
> host_memory_backend_alloc(HostMemoryBackend *backend, Error **errp)
> {
>      Error *local_err = NULL;
>      bc->memory_init(backend, &local_err);
>      if (local_err != NULL) {
>          error_propagate(errp, local_err);
>          return;
>      }
> 
>      ... set policy ...
> }
> 
> ...
> 
>      Error *local_err = NULL;
>      host_memory_backend_alloc(backend, &local_err);
>      if (local_err != NULL) {
>          error_propagate(errp, local_err);
>          return NULL;
>      }
> 
>      assert(memory_region_size(&backend->mr) != 0);
>      return &backend->mr;
> }
> 
[...]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option Hu Tao
@ 2014-02-19  9:50   ` Igor Mammedov
  2014-02-19 11:53     ` Paolo Bonzini
  2014-03-04  0:10   ` Eric Blake
  1 sibling, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19  9:50 UTC (permalink / raw)
  To: Hu Tao; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 19 Feb 2014 15:54:01 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This option provides the infrastructure for binding guest NUMA nodes
> to host NUMA nodes.  For example:
> 
>  -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \
>  -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
>  -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
>  -numa node,nodeid=1,cpus=1,memdev=ram-node1
> 
> The option replaces "-numa mem".
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Conflicts:
> 	include/sysemu/sysemu.h
> 	numa.c
> 
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  include/sysemu/sysemu.h |  2 ++
>  numa.c                  | 64 +++++++++++++++++++++++++++++++++++++++++++++++--
>  qapi-schema.json        |  6 ++++-
>  3 files changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index e9da760..acfc0c7 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -12,6 +12,7 @@
>  #include "qemu/bitmap.h"
>  #include "qom/object.h"
>  #include "hw/boards.h"
> +#include "sysemu/hostmem.h"
>  
>  /* vl.c */
>  
> @@ -140,6 +141,7 @@ extern int nb_numa_nodes;
>  typedef struct node_info {
>      uint64_t node_mem;
>      DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
> +    HostMemoryBackend *node_memdev;
>  } NodeInfo;
>  extern NodeInfo numa_info[MAX_NODES];
>  void set_numa_nodes(void);
> diff --git a/numa.c b/numa.c
> index 403b08b..ca55ad7 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -27,6 +27,8 @@
>  #include "qapi-visit.h"
>  #include "qapi/opts-visitor.h"
>  #include "qapi/dealloc-visitor.h"
> +#include "qapi/qmp/qerror.h"
> +
>  QemuOptsList qemu_numa_opts = {
>      .name = "numa",
>      .implied_opt_name = "type",
> @@ -34,10 +36,13 @@ QemuOptsList qemu_numa_opts = {
>      .desc = { { 0 } } /* validated with OptsVisitor */
>  };
>  
> +static int have_memdevs = -1;
> +
>  static int numa_node_parse(NumaNodeOptions *opts)
>  {
>      uint16_t nodenr;
>      uint16List *cpus = NULL;
> +    Error *local_err = NULL;
>  
>      if (opts->has_nodeid) {
>          nodenr = opts->nodeid;
> @@ -60,6 +65,19 @@ static int numa_node_parse(NumaNodeOptions *opts)
>          bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
>      }
>  
> +    if (opts->has_mem && opts->has_memdev) {
> +        fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n");
> +        return -1;
> +    }
> +
> +    if (have_memdevs == -1) {
> +        have_memdevs = opts->has_memdev;
> +    }
> +    if (opts->has_memdev != have_memdevs) {
> +        fprintf(stderr, "qemu: memdev option must be specified for either "
> +                "all or no nodes\n");
> +    }
> +
>      if (opts->has_mem) {
>          int64_t mem_size;
>          char *endptr;
> @@ -70,7 +88,19 @@ static int numa_node_parse(NumaNodeOptions *opts)
>          }
>          numa_info[nodenr].node_mem = mem_size;
>      }
> +    if (opts->has_memdev) {
> +        Object *o;
> +        o = object_resolve_path_type(opts->memdev, TYPE_MEMORY_BACKEND, NULL);
> +        if (!o) {
> +            error_setg(&local_err, "memdev=%s is ambiguous", opts->memdev);
> +            qerror_report_err(local_err);
> +            return -1;
> +        }
>  
> +        object_ref(o);
> +        numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
> +        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
if you make numa_info  QOM object node_memdev link<> property,
then above hunk could be replaced with just setting link.
And node_mem could be replaced with readonly property that reads size
directly from memdev avoiding data duplication.

As side-effect it numa_info will also become accessible for introspection
using QOM interface. Something like:
 qom-list /machine/memory-node[X]
 qom-get /machine/memory-node[X]/memory_size


> +    }
>      return 0;
>  }
>  
> @@ -189,12 +219,42 @@ void set_numa_modes(void)
>      }
>  }
>  
> +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
> +                                           const char *name,
> +                                           QEMUMachineInitArgs *args)
> +{
> +    uint64_t ram_size = args->ram_size;
> +
> +    memory_region_init_ram(mr, owner, name, ram_size);
> +    vmstate_register_ram_global(mr);
> +}
> +
>  void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
>                                            const char *name,
>                                            QEMUMachineInitArgs *args)
>  {
>      uint64_t ram_size = args->ram_size;
> +    uint64_t addr = 0;
> +    int i;
>  
> -    memory_region_init_ram(mr, owner, name, ram_size);
> -    vmstate_register_ram_global(mr);
> +    if (nb_numa_nodes == 0 || !have_memdevs) {
> +        allocate_system_memory_nonnuma(mr, owner, name, args);
> +        return;
> +    }
> +
> +    memory_region_init(mr, owner, name, ram_size);
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        Error *local_err = NULL;
> +        uint64_t size = numa_info[i].node_mem;
> +        HostMemoryBackend *backend = numa_info[i].node_memdev;
> +        MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
> +        if (local_err) {
> +            qerror_report_err(local_err);
> +            exit(1);
> +        }
> +
> +        memory_region_add_subregion(mr, addr, seg);
> +        vmstate_register_ram_global(seg);
> +        addr += size;
> +    }
>  }
> diff --git a/qapi-schema.json b/qapi-schema.json
> index a2839b8..498ea9b 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4441,7 +4441,10 @@
>  #
>  # @cpus: #optional VCPUs belong to this node
>  #
> -# @mem: #optional memory size of this node
> +# @memdev: #optional memory backend object.  If specified for one node,
> +#          it must be specified for all nodes.
> +#
> +# @mem: #optional memory size of this node; mutually exclusive with @memdev.
>  #
>  # Since: 2.0
>  ##
> @@ -4449,4 +4452,5 @@
>    'data': {
>     '*nodeid': 'uint16',
>     '*cpus':   ['uint16'],
> +   '*memdev': 'str',
>     '*mem':    'str' }}

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor Hu Tao
@ 2014-02-19  9:54   ` Igor Mammedov
  0 siblings, 0 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19  9:54 UTC (permalink / raw)
  To: Hu Tao; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 19 Feb 2014 15:53:57 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> From: Igor Mammedov <imammedo@redhat.com>
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
replace it with a better patch from Paolo that soon
will get merged into master:
http://www.mail-archive.com/qemu-devel@nongnu.org/msg216512.html


> ---
>  qapi/string-input-visitor.c | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
> index 8f1bc41..a152f5d 100644
> --- a/qapi/string-input-visitor.c
> +++ b/qapi/string-input-visitor.c
> @@ -97,6 +97,23 @@ static void parse_type_number(Visitor *v, double *obj, const char *name,
>      *obj = val;
>  }
>  
> +static void parse_type_size(Visitor *v, uint64_t *obj, const char *name,
> +                            Error **errp)
> +{
> +    StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
> +    int64_t val;
> +    char *endp;
> +
> +    val = strtosz_suffix(siv->string ? siv->string : "", &endp,
> +                         STRTOSZ_DEFSUFFIX_B);
> +    if (val < 0 || *endp != '\0') {
> +        error_set(errp, QERR_INVALID_PARAMETER_VALUE, name,
> +                  "a size value representible as a non-negative int64");
> +        return;
> +    }
> +    *obj = val;
> +}
> +
>  static void parse_start_optional(Visitor *v, bool *present,
>                                   const char *name, Error **errp)
>  {
> @@ -131,6 +148,7 @@ StringInputVisitor *string_input_visitor_new(const char *str)
>      v->visitor.type_bool = parse_type_bool;
>      v->visitor.type_str = parse_type_str;
>      v->visitor.type_number = parse_type_number;
> +    v->visitor.type_size = parse_type_size;
>      v->visitor.start_optional = parse_start_optional;
>  
>      v->string = str;

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type Hu Tao
  2014-02-19  9:08   ` Paolo Bonzini
@ 2014-02-19 11:23   ` Igor Mammedov
  1 sibling, 0 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-19 11:23 UTC (permalink / raw)
  To: Hu Tao; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 19 Feb 2014 15:54:03 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> 
> This new enum type will be used to set host memory policy of
> backend host memory.
perhaps squashing it into the next path would be better.

> 
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  qapi-schema.json | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 498ea9b..9d6370f 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4454,3 +4454,23 @@
>     '*cpus':   ['uint16'],
>     '*memdev': 'str',
>     '*mem':    'str' }}
> +
> +##
> +# @HostMemPolicy
> +#
> +# Host memory policy types
> +#
> +# @default: restore default policy, remove any nondefault policy
> +#
> +# @preferred: set the preferred host node for allocation
> +#
> +# @membind: a strict policy that restricts memory allocation to the
> +#           host nodes specified
> +#
> +# @interleave: the page allocations is interleaved across the set
> +#              of host nodes specified
> +#
> +# Since 2.0
> +##
> +{ 'enum': 'HostMemPolicy',
> +  'data': [ 'default', 'preferred', 'membind', 'interleave' ] }

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option
  2014-02-19  9:50   ` Igor Mammedov
@ 2014-02-19 11:53     ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-19 11:53 UTC (permalink / raw)
  To: Igor Mammedov, Hu Tao; +Cc: lersek, qemu-devel, Wanlong Gao

Il 19/02/2014 10:50, Igor Mammedov ha scritto:
>> > +        numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
>> > +        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
> if you make numa_info  QOM object node_memdev link<> property,
> then above hunk could be replaced with just setting link.
> And node_mem could be replaced with readonly property that reads size
> directly from memdev avoiding data duplication.
>
> As side-effect it numa_info will also become accessible for introspection
> using QOM interface. Something like:
>  qom-list /machine/memory-node[X]
>  qom-get /machine/memory-node[X]/memory_size

I agree, but I think we can do it on top.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info
  2014-02-19  9:26   ` Igor Mammedov
@ 2014-02-21  2:54     ` hu tao
  0 siblings, 0 replies; 53+ messages in thread
From: hu tao @ 2014-02-21  2:54 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Andre Przywara, Hu Tao, qemu-devel, pbonzini, lersek, Wanlong Gao

[-- Attachment #1: Type: text/plain, Size: 577 bytes --]

On Wed, Feb 19, 2014 at 5:26 PM, Igor Mammedov <imammedo@redhat.com> wrote:

> On Wed, 19 Feb 2014 15:53:54 +0800
> Hu Tao <hutao@cn.fujitsu.com> wrote:
>
> > From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> >
> > Add the numa_info structure to contain the numa nodes memory,
> > VCPUs information and the future added numa nodes host memory
> > policies.
> this is old version that breaks spar build which Wanlong already fixed.
>
> You can replace patches 1-5 with more recent ones posted recently:
> http://www.mail-archive.com/qemu-devel@nongnu.org/msg216404.html


Thanks!

[-- Attachment #2: Type: text/html, Size: 1180 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  9:03   ` Paolo Bonzini
  2014-02-19  9:36     ` Igor Mammedov
@ 2014-02-25 10:09     ` Hu Tao
  2014-03-03  3:24       ` Hu Tao
  1 sibling, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-25 10:09 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: imammedo, lersek, qemu-devel, Wanlong Gao

On Wed, Feb 19, 2014 at 10:03:13AM +0100, Paolo Bonzini wrote:

<...>

> > static int
> > ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> > {
> >+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> >+    int mode = ram_backend->policy;
> >+    void *p;
> >+    unsigned long maxnode;
> >+
> >     if (!memory_region_size(&backend->mr)) {
> >         memory_region_init_ram(&backend->mr, OBJECT(backend),
> >                                object_get_canonical_path(OBJECT(backend)),
> >                                backend->size);
> >+
> >+        p = memory_region_get_ram_ptr(&backend->mr);
> >+        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> >+
> >+        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> >+            MPOL_F_STATIC_NODES;
> >+        /* This is a workaround for a long standing bug in Linux'
> >+         * mbind implementation, which cuts off the last specified
> >+         * node. To stay compatible should this bug be fixed, we
> >+         * specify one more node and zero this one out.
> >+         */
> >+        if (syscall(SYS_mbind, p, backend->size, mode,
> >+                    ram_backend->host_nodes, maxnode + 2, 0)) {
> 
> This does not compile on non-Linux; also, does libnuma include the
> workaround?  If so, this is a hint that we should be using libnuma
> instead...

Tested with libnuma and works fine without the workaround. Will use
libnuma in v19.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  9:36     ` Igor Mammedov
@ 2014-02-25 10:20       ` Hu Tao
  2014-02-25 14:15         ` Paolo Bonzini
  2014-02-26  5:57       ` Hu Tao
  1 sibling, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-25 10:20 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, Feb 19, 2014 at 10:36:57AM +0100, Igor Mammedov wrote:
> On Wed, 19 Feb 2014 10:03:13 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> >   19/02/2014 08:54, Hu Tao ha scritto:
> > > Thus makes user control how to allocate memory for ram backend.
> > >
> > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > > ---
> > >  backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
> > >  include/sysemu/sysemu.h |   2 +
> > >  2 files changed, 160 insertions(+)
> > >
> > > diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> [...]
> 
> > >  static int
> > >  ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> > >  {
> > > +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> > > +    int mode = ram_backend->policy;
> > > +    void *p;
> > > +    unsigned long maxnode;
> > > +
> > >      if (!memory_region_size(&backend->mr)) {
> > >          memory_region_init_ram(&backend->mr, OBJECT(backend),
> > >                                 object_get_canonical_path(OBJECT(backend)),
> > >                                 backend->size);
> > > +
> > > +        p = memory_region_get_ram_ptr(&backend->mr);
> > > +        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> > > +
> > > +        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> > > +            MPOL_F_STATIC_NODES;
> > > +        /* This is a workaround for a long standing bug in Linux'
> > > +         * mbind implementation, which cuts off the last specified
> > > +         * node. To stay compatible should this bug be fixed, we
> > > +         * specify one more node and zero this one out.
> > > +         */
> > > +        if (syscall(SYS_mbind, p, backend->size, mode,
> > > +                    ram_backend->host_nodes, maxnode + 2, 0)) {
> > 
> > This does not compile on non-Linux; also, does libnuma include the 
> > workaround?  If so, this is a hint that we should be using libnuma 
> > instead...
> > 
> > Finally, all this code should be in hostmem.c, not hostmem-ram.c, 
> > because the same policies can be applied to hugepage-backed memory.
> > 
> > Currently host_memory_backend_get_memory is calling bc->memory_init. 
> > Probably the call should be replaced by something like
> I've pushed to github updated version of memdev, where
> host_memory_backend_get_memory() is just convenience wrapper to get
> access to memdev's internal MemoryRegion.
> 
> All initialization now is done in user_creatable->complete() method
> which calls ram_backend_memory_init() so leaving it as is should be fine.

There is a problem that user_creatable_complete() is called before
adding object to "/objects" (see object_create()), which triggers a
assert failure when calling object_get_canonical_path() in
ram_backend_memory_init(). Any ideas?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size
  2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size Hu Tao
@ 2014-02-25 13:38   ` Eric Blake
  0 siblings, 0 replies; 53+ messages in thread
From: Eric Blake @ 2014-02-25 13:38 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

[-- Attachment #1: Type: text/plain, Size: 762 bytes --]

On 02/19/2014 12:53 AM, Hu Tao wrote:
> From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> 
> If the total number of the assigned numa nodes memory is not
> equal to the assigned ram size, it will write the wrong data
> to ACPI talb, then the guest will ignore the wrong ACPI table

s/talb/table/

> and recognize all memory to one node. It's buggy, we should
> check it to ensure that we write the right data to ACPI table.
> 
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-25 10:20       ` Hu Tao
@ 2014-02-25 14:15         ` Paolo Bonzini
  2014-02-26  5:00           ` Hu Tao
  0 siblings, 1 reply; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-25 14:15 UTC (permalink / raw)
  To: Hu Tao, Igor Mammedov; +Cc: lersek, qemu-devel, Wanlong Gao

Il 25/02/2014 11:20, Hu Tao ha scritto:
> There is a problem that user_creatable_complete() is called before
> adding object to "/objects" (see object_create()), which triggers a
> assert failure when calling object_get_canonical_path() in
> ram_backend_memory_init(). Any ideas?

You can call object_property_add_child before calling 
user_creatable_complete, and then call object_unparent (in addition to 
object_unref) if creation fails.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-25 14:15         ` Paolo Bonzini
@ 2014-02-26  5:00           ` Hu Tao
  2014-02-26  8:47             ` Igor Mammedov
  0 siblings, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-26  5:00 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Igor Mammedov, lersek, qemu-devel, Wanlong Gao

On Tue, Feb 25, 2014 at 03:15:53PM +0100, Paolo Bonzini wrote:
> Il 25/02/2014 11:20, Hu Tao ha scritto:
> >There is a problem that user_creatable_complete() is called before
> >adding object to "/objects" (see object_create()), which triggers a
> >assert failure when calling object_get_canonical_path() in
> >ram_backend_memory_init(). Any ideas?
> 
> You can call object_property_add_child before calling
> user_creatable_complete, and then call object_unparent (in addition
> to object_unref) if creation fails.
> 
> Paolo

Something like this?

>From 59c999c840e4305bb2b95389bbea32e07c1c14c0 Mon Sep 17 00:00:00 2001
From: Hu Tao <hutao@cn.fujitsu.com>
Date: Wed, 26 Feb 2014 12:54:34 +0800
Subject: [PATCH] call user_creatable_complete() after adding object to
 "/objects"

This makes it possible to get the path of object by calling
object_get_canonical_path() in the complete callback if
someone wants it.

Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 vl.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vl.c b/vl.c
index 1d27b34..30b4297 100644
--- a/vl.c
+++ b/vl.c
@@ -2770,14 +2770,15 @@ static int object_create(QemuOpts *opts, void *opaque)
         goto out;
     }
 
+    object_property_add_child(container_get(object_get_root(), "/objects"),
+                              id, obj, &local_err);
+
     user_creatable_complete(obj, &local_err);
     if (local_err) {
+        object_unparent(obj);
         goto out;
     }
 
-    object_property_add_child(container_get(object_get_root(), "/objects"),
-                              id, obj, &local_err);
-
 out:
     object_unref(obj);
     if (local_err) {
-- 
1.8.5.2.229.g4448466

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-19  9:36     ` Igor Mammedov
  2014-02-25 10:20       ` Hu Tao
@ 2014-02-26  5:57       ` Hu Tao
  2014-02-26  9:05         ` Paolo Bonzini
  2014-02-26  9:10         ` Igor Mammedov
  1 sibling, 2 replies; 53+ messages in thread
From: Hu Tao @ 2014-02-26  5:57 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, Feb 19, 2014 at 10:36:57AM +0100, Igor Mammedov wrote:
> On Wed, 19 Feb 2014 10:03:13 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> >   19/02/2014 08:54, Hu Tao ha scritto:
> > > Thus makes user control how to allocate memory for ram backend.
> > >
> > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > > ---
> > >  backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
> > >  include/sysemu/sysemu.h |   2 +
> > >  2 files changed, 160 insertions(+)
> > >
> > > diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> [...]
> 
> > >  static int
> > >  ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> > >  {
> > > +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> > > +    int mode = ram_backend->policy;
> > > +    void *p;
> > > +    unsigned long maxnode;
> > > +
> > >      if (!memory_region_size(&backend->mr)) {
> > >          memory_region_init_ram(&backend->mr, OBJECT(backend),
> > >                                 object_get_canonical_path(OBJECT(backend)),
> > >                                 backend->size);
> > > +
> > > +        p = memory_region_get_ram_ptr(&backend->mr);
> > > +        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> > > +
> > > +        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> > > +            MPOL_F_STATIC_NODES;
> > > +        /* This is a workaround for a long standing bug in Linux'
> > > +         * mbind implementation, which cuts off the last specified
> > > +         * node. To stay compatible should this bug be fixed, we
> > > +         * specify one more node and zero this one out.
> > > +         */
> > > +        if (syscall(SYS_mbind, p, backend->size, mode,
> > > +                    ram_backend->host_nodes, maxnode + 2, 0)) {
> > 
> > This does not compile on non-Linux; also, does libnuma include the 
> > workaround?  If so, this is a hint that we should be using libnuma 
> > instead...
> > 
> > Finally, all this code should be in hostmem.c, not hostmem-ram.c, 
> > because the same policies can be applied to hugepage-backed memory.
> > 
> > Currently host_memory_backend_get_memory is calling bc->memory_init. 
> > Probably the call should be replaced by something like
> I've pushed to github updated version of memdev, where
> host_memory_backend_get_memory() is just convenience wrapper to get
> access to memdev's internal MemoryRegion.
> 
> All initialization now is done in user_creatable->complete() method
> which calls ram_backend_memory_init() so leaving it as is should be fine.

If lines about memory polices are moved up to hostmem.c, the only thing
left in ram_backend_memory_init() is calling memory_region_init_ram() to
allocate memory. Then it comes a problem that when to apply memory
polices? Choices:

1. apply memory polices in hostmem.c since this is the place user sets
   memory polices. But user_creatable_complete() seems not to support
   this.( but fix me)

2. cast to HostMemoryBackend in ram_backend_memory_init() (or in other
   memory backends) and add lines to apply memory polices.

3. provide an interface in HostMemoryBackendClass to do the thing and
   call it in subclasses. (this is basically the same as 2 except that
   we can reuse code)

Opinions?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  5:00           ` Hu Tao
@ 2014-02-26  8:47             ` Igor Mammedov
  2014-02-26  8:59               ` Hu Tao
  2014-02-26 11:22               ` Paolo Bonzini
  0 siblings, 2 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26  8:47 UTC (permalink / raw)
  To: Hu Tao; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 13:00:03 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> On Tue, Feb 25, 2014 at 03:15:53PM +0100, Paolo Bonzini wrote:
> > Il 25/02/2014 11:20, Hu Tao ha scritto:
> > >There is a problem that user_creatable_complete() is called before
> > >adding object to "/objects" (see object_create()), which triggers a
> > >assert failure when calling object_get_canonical_path() in
> > >ram_backend_memory_init(). Any ideas?
> > 
> > You can call object_property_add_child before calling
> > user_creatable_complete, and then call object_unparent (in addition
> > to object_unref) if creation fails.
> > 
> > Paolo
> 
> Something like this?
> 
> From 59c999c840e4305bb2b95389bbea32e07c1c14c0 Mon Sep 17 00:00:00 2001
> From: Hu Tao <hutao@cn.fujitsu.com>
> Date: Wed, 26 Feb 2014 12:54:34 +0800
> Subject: [PATCH] call user_creatable_complete() after adding object to
>  "/objects"
> 
> This makes it possible to get the path of object by calling
> object_get_canonical_path() in the complete callback if
> someone wants it.
> 
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  vl.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/vl.c b/vl.c
> index 1d27b34..30b4297 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2770,14 +2770,15 @@ static int object_create(QemuOpts *opts, void *opaque)
>          goto out;
>      }
>  
> +    object_property_add_child(container_get(object_get_root(), "/objects"),
> +                              id, obj, &local_err);
> +
>      user_creatable_complete(obj, &local_err);
>      if (local_err) {
> +        object_unparent(obj);
>          goto out;
>      }
>  
> -    object_property_add_child(container_get(object_get_root(), "/objects"),
> -                              id, obj, &local_err);
> -
>  out:
>      object_unref(obj);
>      if (local_err) {
Failure case is not handled properly,
I'm sorry that I've forgotten to mention prerequisite path
https://github.com/imammedo/qemu/commit/72a079b88e055fc690c8895a99ccbcce36f6fc1f
in stage tree https://github.com/imammedo/qemu/commits/memory-hotplug-v8

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  8:47             ` Igor Mammedov
@ 2014-02-26  8:59               ` Hu Tao
  2014-02-26 12:19                 ` Igor Mammedov
  2014-02-26 11:22               ` Paolo Bonzini
  1 sibling, 1 reply; 53+ messages in thread
From: Hu Tao @ 2014-02-26  8:59 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, Feb 26, 2014 at 09:47:08AM +0100, Igor Mammedov wrote:
> On Wed, 26 Feb 2014 13:00:03 +0800
> Hu Tao <hutao@cn.fujitsu.com> wrote:
> 
> > On Tue, Feb 25, 2014 at 03:15:53PM +0100, Paolo Bonzini wrote:
> > > Il 25/02/2014 11:20, Hu Tao ha scritto:
> > > >There is a problem that user_creatable_complete() is called before
> > > >adding object to "/objects" (see object_create()), which triggers a
> > > >assert failure when calling object_get_canonical_path() in
> > > >ram_backend_memory_init(). Any ideas?
> > > 
> > > You can call object_property_add_child before calling
> > > user_creatable_complete, and then call object_unparent (in addition
> > > to object_unref) if creation fails.
> > > 
> > > Paolo
> > 
> > Something like this?
> > 
> > From 59c999c840e4305bb2b95389bbea32e07c1c14c0 Mon Sep 17 00:00:00 2001
> > From: Hu Tao <hutao@cn.fujitsu.com>
> > Date: Wed, 26 Feb 2014 12:54:34 +0800
> > Subject: [PATCH] call user_creatable_complete() after adding object to
> >  "/objects"
> > 
> > This makes it possible to get the path of object by calling
> > object_get_canonical_path() in the complete callback if
> > someone wants it.
> > 
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> >  vl.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> > 
> > diff --git a/vl.c b/vl.c
> > index 1d27b34..30b4297 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -2770,14 +2770,15 @@ static int object_create(QemuOpts *opts, void *opaque)
> >          goto out;
> >      }
> >  
> > +    object_property_add_child(container_get(object_get_root(), "/objects"),
> > +                              id, obj, &local_err);
> > +
> >      user_creatable_complete(obj, &local_err);
> >      if (local_err) {
> > +        object_unparent(obj);
> >          goto out;
> >      }
> >  
> > -    object_property_add_child(container_get(object_get_root(), "/objects"),
> > -                              id, obj, &local_err);
> > -
> >  out:
> >      object_unref(obj);
> >      if (local_err) {
> Failure case is not handled properly,
> I'm sorry that I've forgotten to mention prerequisite path
> https://github.com/imammedo/qemu/commit/72a079b88e055fc690c8895a99ccbcce36f6fc1f
> in stage tree https://github.com/imammedo/qemu/commits/memory-hotplug-v8

No problem. I should've noticed the patch when cherry-picking. Will you
post it seperately?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  5:57       ` Hu Tao
@ 2014-02-26  9:05         ` Paolo Bonzini
  2014-02-26  9:10         ` Igor Mammedov
  1 sibling, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26  9:05 UTC (permalink / raw)
  To: Hu Tao, Igor Mammedov; +Cc: lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 06:57, Hu Tao ha scritto:
> If lines about memory polices are moved up to hostmem.c, the only thing
> left in ram_backend_memory_init() is calling memory_region_init_ram() to
> allocate memory. Then it comes a problem that when to apply memory
> polices? Choices:
>
> 1. apply memory polices in hostmem.c since this is the place user sets
>    memory polices. But user_creatable_complete() seems not to support
>    this.( but fix me)
>
> 2. cast to HostMemoryBackend in ram_backend_memory_init() (or in other
>    memory backends) and add lines to apply memory polices.
>
> 3. provide an interface in HostMemoryBackendClass to do the thing and
>    call it in subclasses. (this is basically the same as 2 except that
>    we can reuse code)

I like (3).  I understand it's something like

void memory_backend_apply_mempolicy(HostMemoryBackend *be,
                                     void *addr, size_t len, Error **err)

?

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  5:57       ` Hu Tao
  2014-02-26  9:05         ` Paolo Bonzini
@ 2014-02-26  9:10         ` Igor Mammedov
  2014-02-26 10:33           ` Paolo Bonzini
  1 sibling, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26  9:10 UTC (permalink / raw)
  To: Hu Tao; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 13:57:06 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> On Wed, Feb 19, 2014 at 10:36:57AM +0100, Igor Mammedov wrote:
> > On Wed, 19 Feb 2014 10:03:13 +0100
> > Paolo Bonzini <pbonzini@redhat.com> wrote:
> > 
> > >   19/02/2014 08:54, Hu Tao ha scritto:
> > > > Thus makes user control how to allocate memory for ram backend.
> > > >
> > > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > > > ---
> > > >  backends/hostmem-ram.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  include/sysemu/sysemu.h |   2 +
> > > >  2 files changed, 160 insertions(+)
> > > >
> > > > diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
> > [...]
> > 
> > > >  static int
> > > >  ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> > > >  {
> > > > +    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> > > > +    int mode = ram_backend->policy;
> > > > +    void *p;
> > > > +    unsigned long maxnode;
> > > > +
> > > >      if (!memory_region_size(&backend->mr)) {
> > > >          memory_region_init_ram(&backend->mr, OBJECT(backend),
> > > >                                 object_get_canonical_path(OBJECT(backend)),
> > > >                                 backend->size);
> > > > +
> > > > +        p = memory_region_get_ram_ptr(&backend->mr);
> > > > +        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> > > > +
> > > > +        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> > > > +            MPOL_F_STATIC_NODES;
> > > > +        /* This is a workaround for a long standing bug in Linux'
> > > > +         * mbind implementation, which cuts off the last specified
> > > > +         * node. To stay compatible should this bug be fixed, we
> > > > +         * specify one more node and zero this one out.
> > > > +         */
> > > > +        if (syscall(SYS_mbind, p, backend->size, mode,
> > > > +                    ram_backend->host_nodes, maxnode + 2, 0)) {
> > > 
> > > This does not compile on non-Linux; also, does libnuma include the 
> > > workaround?  If so, this is a hint that we should be using libnuma 
> > > instead...
> > > 
> > > Finally, all this code should be in hostmem.c, not hostmem-ram.c, 
> > > because the same policies can be applied to hugepage-backed memory.
> > > 
> > > Currently host_memory_backend_get_memory is calling bc->memory_init. 
> > > Probably the call should be replaced by something like
> > I've pushed to github updated version of memdev, where
> > host_memory_backend_get_memory() is just convenience wrapper to get
> > access to memdev's internal MemoryRegion.
> > 
> > All initialization now is done in user_creatable->complete() method
> > which calls ram_backend_memory_init() so leaving it as is should be fine.
> 
> If lines about memory polices are moved up to hostmem.c, the only thing
> left in ram_backend_memory_init() is calling memory_region_init_ram() to
> allocate memory. Then it comes a problem that when to apply memory
> polices? Choices:
> 
> 1. apply memory polices in hostmem.c since this is the place user sets
>    memory polices. But user_creatable_complete() seems not to support
>    this.( but fix me)
if we assume that NUMA policies apply to every hostmem derived backend,
then we could realize() approach used by DEVICE. i.e.
set NUMA policies in hostmem.c:hostmemory_backend_memory_init()

Add parent_complete field to ram-backend class and store there parent's
complete pointer. Then we can do:

ram_backend_memory_init(UserCreatable *uc, Error **errp) {
    memory_region_init_ram();
    ...
    MEMORY_BACKEND_RAM_CLASS(uc)->parent_complete(uc, errp);
    ...
}

> 
> 2. cast to HostMemoryBackend in ram_backend_memory_init() (or in other
>    memory backends) and add lines to apply memory polices.
> 
> 3. provide an interface in HostMemoryBackendClass to do the thing and
>    call it in subclasses. (this is basically the same as 2 except that
>    we can reuse code)
> 
> Opinions?
> 
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  9:10         ` Igor Mammedov
@ 2014-02-26 10:33           ` Paolo Bonzini
  2014-02-26 12:31             ` Igor Mammedov
  0 siblings, 1 reply; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 10:33 UTC (permalink / raw)
  To: Igor Mammedov, Hu Tao; +Cc: lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 10:10, Igor Mammedov ha scritto:
> if we assume that NUMA policies apply to every hostmem derived backend,
> then we could realize() approach used by DEVICE. i.e.
> set NUMA policies in hostmem.c:hostmemory_backend_memory_init()
>
> Add parent_complete field to ram-backend class and store there parent's
> complete pointer. Then we can do:
>
> ram_backend_memory_init(UserCreatable *uc, Error **errp) {
>     memory_region_init_ram();
>     ...
>     MEMORY_BACKEND_RAM_CLASS(uc)->parent_complete(uc, errp);
>     ...
> }
>

The problem is that some backends might not be handled the same way. 
For example, not all backends might produce a single void*/size_t pair 
for the entire region.  Think of a "composite" backend that produces a 
large memory region from two smaller ones.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  8:47             ` Igor Mammedov
  2014-02-26  8:59               ` Hu Tao
@ 2014-02-26 11:22               ` Paolo Bonzini
  1 sibling, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 11:22 UTC (permalink / raw)
  To: Igor Mammedov, Hu Tao; +Cc: lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 09:47, Igor Mammedov ha scritto:
> I'm sorry that I've forgotten to mention prerequisite path
> https://github.com/imammedo/qemu/commit/72a079b88e055fc690c8895a99ccbcce36f6fc1f
> in stage tree https://github.com/imammedo/qemu/commits/memory-hotplug-v8

Thanks, I'll add this patch to the numa-queue branch.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26  8:59               ` Hu Tao
@ 2014-02-26 12:19                 ` Igor Mammedov
  0 siblings, 0 replies; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26 12:19 UTC (permalink / raw)
  To: Hu Tao; +Cc: Paolo Bonzini, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 16:59:44 +0800
Hu Tao <hutao@cn.fujitsu.com> wrote:

> On Wed, Feb 26, 2014 at 09:47:08AM +0100, Igor Mammedov wrote:
> > On Wed, 26 Feb 2014 13:00:03 +0800
> > Hu Tao <hutao@cn.fujitsu.com> wrote:
> > 
> > > On Tue, Feb 25, 2014 at 03:15:53PM +0100, Paolo Bonzini wrote:
> > > > Il 25/02/2014 11:20, Hu Tao ha scritto:
> > > > >There is a problem that user_creatable_complete() is called before
> > > > >adding object to "/objects" (see object_create()), which triggers a
> > > > >assert failure when calling object_get_canonical_path() in
> > > > >ram_backend_memory_init(). Any ideas?
> > > > 
> > > > You can call object_property_add_child before calling
> > > > user_creatable_complete, and then call object_unparent (in addition
> > > > to object_unref) if creation fails.
> > > > 
> > > > Paolo
> > > 
> > > Something like this?
> > > 
> > > From 59c999c840e4305bb2b95389bbea32e07c1c14c0 Mon Sep 17 00:00:00 2001
> > > From: Hu Tao <hutao@cn.fujitsu.com>
> > > Date: Wed, 26 Feb 2014 12:54:34 +0800
> > > Subject: [PATCH] call user_creatable_complete() after adding object to
> > >  "/objects"
> > > 
> > > This makes it possible to get the path of object by calling
> > > object_get_canonical_path() in the complete callback if
> > > someone wants it.
> > > 
> > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > > ---
> > >  vl.c | 7 ++++---
> > >  1 file changed, 4 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/vl.c b/vl.c
> > > index 1d27b34..30b4297 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -2770,14 +2770,15 @@ static int object_create(QemuOpts *opts, void *opaque)
> > >          goto out;
> > >      }
> > >  
> > > +    object_property_add_child(container_get(object_get_root(), "/objects"),
> > > +                              id, obj, &local_err);
> > > +
> > >      user_creatable_complete(obj, &local_err);
> > >      if (local_err) {
> > > +        object_unparent(obj);
> > >          goto out;
> > >      }
> > >  
> > > -    object_property_add_child(container_get(object_get_root(), "/objects"),
> > > -                              id, obj, &local_err);
> > > -
> > >  out:
> > >      object_unref(obj);
> > >      if (local_err) {
> > Failure case is not handled properly,
> > I'm sorry that I've forgotten to mention prerequisite path
> > https://github.com/imammedo/qemu/commit/72a079b88e055fc690c8895a99ccbcce36f6fc1f
> > in stage tree https://github.com/imammedo/qemu/commits/memory-hotplug-v8
> 
> No problem. I should've noticed the patch when cherry-picking. Will you
> post it seperately?
> 
> 

sure

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 10:33           ` Paolo Bonzini
@ 2014-02-26 12:31             ` Igor Mammedov
  2014-02-26 12:45               ` Paolo Bonzini
  0 siblings, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26 12:31 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Hu Tao, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 11:33:30 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 26/02/2014 10:10, Igor Mammedov ha scritto:
> > if we assume that NUMA policies apply to every hostmem derived backend,
> > then we could realize() approach used by DEVICE. i.e.
> > set NUMA policies in hostmem.c:hostmemory_backend_memory_init()
> >
> > Add parent_complete field to ram-backend class and store there parent's
> > complete pointer. Then we can do:
> >
> > ram_backend_memory_init(UserCreatable *uc, Error **errp) {
> >     memory_region_init_ram();
> >     ...
> >     MEMORY_BACKEND_RAM_CLASS(uc)->parent_complete(uc, errp);
> >     ...
> > }
> >
> 
> The problem is that some backends might not be handled the same way. 
> For example, not all backends might produce a single void*/size_t pair 
> for the entire region.  Think of a "composite" backend that produces a 
> large memory region from two smaller ones.

I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
Is there a need in composite one or something similar?

> 
> Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 12:31             ` Igor Mammedov
@ 2014-02-26 12:45               ` Paolo Bonzini
  2014-02-26 12:58                 ` Marcelo Tosatti
  2014-02-26 13:43                 ` Igor Mammedov
  0 siblings, 2 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 12:45 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Hu Tao, Marcelo Tosatti, lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 13:31, Igor Mammedov ha scritto:
>> > The problem is that some backends might not be handled the same way.
>> > For example, not all backends might produce a single void*/size_t pair
>> > for the entire region.  Think of a "composite" backend that produces a
>> > large memory region from two smaller ones.
> I'd prefer to keep backends simple, with 1:1 mapping to memory regions.

I agree.  However not all backends may have a mapping to a RAM memory 
region.  A composite backend could create a container memory region 
whose children are other HostMemoryBackend objects.

> Is there a need in composite one or something similar?

I've heard of users that want a node backed partially by hugetlbfs and 
partially by regular RAM.  Not sure why.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 12:45               ` Paolo Bonzini
@ 2014-02-26 12:58                 ` Marcelo Tosatti
  2014-02-26 13:14                   ` Paolo Bonzini
  2014-02-26 13:43                 ` Igor Mammedov
  1 sibling, 1 reply; 53+ messages in thread
From: Marcelo Tosatti @ 2014-02-26 12:58 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Igor Mammedov, lersek, qemu-devel, Wanlong Gao, Hu Tao

On Wed, Feb 26, 2014 at 01:45:38PM +0100, Paolo Bonzini wrote:
> Il 26/02/2014 13:31, Igor Mammedov ha scritto:
> >>> The problem is that some backends might not be handled the same way.
> >>> For example, not all backends might produce a single void*/size_t pair
> >>> for the entire region.  Think of a "composite" backend that produces a
> >>> large memory region from two smaller ones.
> >I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
> 
> I agree.  However not all backends may have a mapping to a RAM
> memory region.  A composite backend could create a container memory
> region whose children are other HostMemoryBackend objects.
> 
> >Is there a need in composite one or something similar?
> 
> I've heard of users that want a node backed partially by hugetlbfs
> and partially by regular RAM.  Not sure why.
> 
> Paolo

To overcommit the non hugetlbfs backed guest RAM (think guest pagecache on that non
hugetlbfs backed memory, swappable and KSM-able).

The problem is, you have to in someway guarantee the guest allocates 
1GB pages out of the hugetlb backed GPA ranges. Some thoughts
(honestly, dislike all of them):

1) Boot guest with hugepages, allocate hugepages in guest,
later on hotplug 4K backed ranges. HV-unaware reboot might fail,
though.

2) Communicate hugepage GPAs to guest.

3) Create holes in non hugepage backed GPA range.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 12:58                 ` Marcelo Tosatti
@ 2014-02-26 13:14                   ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 13:14 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Igor Mammedov, lersek, qemu-devel, Wanlong Gao, Hu Tao

Il 26/02/2014 13:58, Marcelo Tosatti ha scritto:
>>> I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
>>
>> I agree.  However not all backends may have a mapping to a RAM
>> memory region.  A composite backend could create a container memory
>> region whose children are other HostMemoryBackend objects.
>>
>>> Is there a need in composite one or something similar?
>>
>> I've heard of users that want a node backed partially by hugetlbfs
>> and partially by regular RAM.  Not sure why.
>
> To overcommit the non hugetlbfs backed guest RAM (think guest pagecache on that non
> hugetlbfs backed memory, swappable and KSM-able).
>
> The problem is, you have to in someway guarantee the guest allocates
> 1GB pages out of the hugetlb backed GPA ranges. Some thoughts
> (honestly, dislike all of them):
>
> 1) Boot guest with hugepages, allocate hugepages in guest,
> later on hotplug 4K backed ranges. HV-unaware reboot might fail,
> though.
>
> 2) Communicate hugepage GPAs to guest.
>
> 3) Create holes in non hugepage backed GPA range.

I guess (2) is the only one I "like", and I like it just because it 
officially becomes Not Our Problem.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 12:45               ` Paolo Bonzini
  2014-02-26 12:58                 ` Marcelo Tosatti
@ 2014-02-26 13:43                 ` Igor Mammedov
  2014-02-26 13:47                   ` Paolo Bonzini
  1 sibling, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26 13:43 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Hu Tao, Marcelo Tosatti, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 13:45:38 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 26/02/2014 13:31, Igor Mammedov ha scritto:
> >> > The problem is that some backends might not be handled the same way.
> >> > For example, not all backends might produce a single void*/size_t pair
> >> > for the entire region.  Think of a "composite" backend that produces a
> >> > large memory region from two smaller ones.
> > I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
> 
> I agree.  However not all backends may have a mapping to a RAM memory 
> region.  A composite backend could create a container memory region 
> whose children are other HostMemoryBackend objects.
> 
> > Is there a need in composite one or something similar?
> 
> I've heard of users that want a node backed partially by hugetlbfs and 
> partially by regular RAM.  Not sure why.
Isn't issue here in how backend is mapped into GPA? Well that is not
backend's job.

Once one starts to put layout (alignment, noncontinuously mapped
memory regions inside of container, ...), mapping HPA->GPA gets complicated.

It would be better to use simple building blocks and model as:
2 separate backends (ram + hugetlbfs) and 2 corresponding DIMM devices.


> 
> Paolo
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 13:43                 ` Igor Mammedov
@ 2014-02-26 13:47                   ` Paolo Bonzini
  2014-02-26 14:25                     ` Igor Mammedov
  0 siblings, 1 reply; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 13:47 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Hu Tao, Marcelo Tosatti, lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 14:43, Igor Mammedov ha scritto:
> On Wed, 26 Feb 2014 13:45:38 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>> Il 26/02/2014 13:31, Igor Mammedov ha scritto:
>>>>> The problem is that some backends might not be handled the same way.
>>>>> For example, not all backends might produce a single void*/size_t pair
>>>>> for the entire region.  Think of a "composite" backend that produces a
>>>>> large memory region from two smaller ones.
>>> I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
>>
>> I agree.  However not all backends may have a mapping to a RAM memory
>> region.  A composite backend could create a container memory region
>> whose children are other HostMemoryBackend objects.
>>
>>> Is there a need in composite one or something similar?
>>
>> I've heard of users that want a node backed partially by hugetlbfs and
>> partially by regular RAM.  Not sure why.
> Isn't issue here in how backend is mapped into GPA? Well that is not
> backend's job.
>
> Once one starts to put layout (alignment, noncontinuously mapped
> memory regions inside of container, ...), mapping HPA->GPA gets complicated.
>
> It would be better to use simple building blocks and model as:
> 2 separate backends (ram + hugetlbfs) and 2 corresponding DIMM devices.

Right, I had forgotten that you can have cold-plugged DIMM devices. 
That's a nice solution, also because it simplifies passing the GPA 
configuration down to the guest.

How would that translate to sharing HostMemoryBackend code for memory 
policies?  Which of Hu Tao's proposals do you like best?

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 13:47                   ` Paolo Bonzini
@ 2014-02-26 14:25                     ` Igor Mammedov
  2014-02-26 14:39                       ` Paolo Bonzini
  0 siblings, 1 reply; 53+ messages in thread
From: Igor Mammedov @ 2014-02-26 14:25 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Hu Tao, Marcelo Tosatti, lersek, qemu-devel, Wanlong Gao

On Wed, 26 Feb 2014 14:47:28 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 26/02/2014 14:43, Igor Mammedov ha scritto:
> > On Wed, 26 Feb 2014 13:45:38 +0100
> > Paolo Bonzini <pbonzini@redhat.com> wrote:
> >
> >> Il 26/02/2014 13:31, Igor Mammedov ha scritto:
> >>>>> The problem is that some backends might not be handled the same way.
> >>>>> For example, not all backends might produce a single void*/size_t pair
> >>>>> for the entire region.  Think of a "composite" backend that produces a
> >>>>> large memory region from two smaller ones.
> >>> I'd prefer to keep backends simple, with 1:1 mapping to memory regions.
> >>
> >> I agree.  However not all backends may have a mapping to a RAM memory
> >> region.  A composite backend could create a container memory region
> >> whose children are other HostMemoryBackend objects.
> >>
> >>> Is there a need in composite one or something similar?
> >>
> >> I've heard of users that want a node backed partially by hugetlbfs and
> >> partially by regular RAM.  Not sure why.
> > Isn't issue here in how backend is mapped into GPA? Well that is not
> > backend's job.
> >
> > Once one starts to put layout (alignment, noncontinuously mapped
> > memory regions inside of container, ...), mapping HPA->GPA gets complicated.
> >
> > It would be better to use simple building blocks and model as:
> > 2 separate backends (ram + hugetlbfs) and 2 corresponding DIMM devices.
> 
> Right, I had forgotten that you can have cold-plugged DIMM devices. 
> That's a nice solution, also because it simplifies passing the GPA 
> configuration down to the guest.
> 
> How would that translate to sharing HostMemoryBackend code for memory 
> policies?  Which of Hu Tao's proposals do you like best?
possible choices could be:

 1: 'realize' approach I suggested
      drawback is: assumption that all backends derived from HostMemoryBackend
                   will inherit NUMA controls even if backend shouldn't have
                   one (for example: fictional remote memory backend)
      plus: derived types from HostMemoryBackend, don't need to know anything
            about NUMA.
 2: #3 from Hu Tao's suggestion
      drawback is: every new backend have to explicitly call NUMA callbacks
      somewhat plus is that not NUMA aware backends could ignore NUMA callbacks,
      but they would still have NUMA properties available, which is confusing.

 3: might be over-engineered #1 from above: build proper class hierarchy:
      HostMemoryBackend  -> NumaMemoryBackend -> RamBackend
                        |                     -> HugepageBackend
                        |-> whatever else

> 
> Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-26 14:25                     ` Igor Mammedov
@ 2014-02-26 14:39                       ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2014-02-26 14:39 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: Hu Tao, Marcelo Tosatti, lersek, qemu-devel, Wanlong Gao

Il 26/02/2014 15:25, Igor Mammedov ha scritto:
>  1: 'realize' approach I suggested
>       drawback is: assumption that all backends derived from HostMemoryBackend
>                    will inherit NUMA controls even if backend shouldn't have
>                    one (for example: fictional remote memory backend)
>       plus: derived types from HostMemoryBackend, don't need to know anything
>             about NUMA.

Let's go with this for now.  It keeps things simple.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields
  2014-02-25 10:09     ` Hu Tao
@ 2014-03-03  3:24       ` Hu Tao
  0 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-03-03  3:24 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: imammedo, lersek, qemu-devel, Wanlong Gao

On Tue, Feb 25, 2014 at 06:09:20PM +0800, Hu Tao wrote:
> On Wed, Feb 19, 2014 at 10:03:13AM +0100, Paolo Bonzini wrote:
> 
> <...>
> 
> > > static int
> > > ram_backend_memory_init(HostMemoryBackend *backend, Error **errp)
> > > {
> > >+    HostMemoryBackendRam *ram_backend = MEMORY_BACKEND_RAM(backend);
> > >+    int mode = ram_backend->policy;
> > >+    void *p;
> > >+    unsigned long maxnode;
> > >+
> > >     if (!memory_region_size(&backend->mr)) {
> > >         memory_region_init_ram(&backend->mr, OBJECT(backend),
> > >                                object_get_canonical_path(OBJECT(backend)),
> > >                                backend->size);
> > >+
> > >+        p = memory_region_get_ram_ptr(&backend->mr);
> > >+        maxnode = find_last_bit(ram_backend->host_nodes, MAX_NODES);
> > >+
> > >+        mode |= ram_backend->relative ? MPOL_F_RELATIVE_NODES :
> > >+            MPOL_F_STATIC_NODES;
> > >+        /* This is a workaround for a long standing bug in Linux'
> > >+         * mbind implementation, which cuts off the last specified
> > >+         * node. To stay compatible should this bug be fixed, we
> > >+         * specify one more node and zero this one out.
> > >+         */
> > >+        if (syscall(SYS_mbind, p, backend->size, mode,
> > >+                    ram_backend->host_nodes, maxnode + 2, 0)) {
> > 
> > This does not compile on non-Linux; also, does libnuma include the
> > workaround?  If so, this is a hint that we should be using libnuma
> > instead...
> 
> Tested with libnuma and works fine without the workaround. Will use
> libnuma in v19.

Sorry I might do something wrong about the test. With libnuma the
workaround is still needed. I checked numactl-2.0.9, it doesn't include
the workaround.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option
  2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option Hu Tao
  2014-02-19  9:50   ` Igor Mammedov
@ 2014-03-04  0:10   ` Eric Blake
  2014-03-04  2:20     ` Hu Tao
  1 sibling, 1 reply; 53+ messages in thread
From: Eric Blake @ 2014-03-04  0:10 UTC (permalink / raw)
  To: Hu Tao, qemu-devel; +Cc: pbonzini, lersek, Wanlong Gao, imammedo

[-- Attachment #1: Type: text/plain, Size: 1210 bytes --]

On 02/19/2014 12:54 AM, Hu Tao wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This option provides the infrastructure for binding guest NUMA nodes
> to host NUMA nodes.  For example:
> 
>  -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \
>  -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
>  -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
>  -numa node,nodeid=1,cpus=1,memdev=ram-node1
> 
> The option replaces "-numa mem".
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Conflicts:
> 	include/sysemu/sysemu.h
> 	numa.c

Until this patch is taken upstream, a 'Conflicts:' section in your
commit message doesn't make sense.  That is useful for downstream or
stable branch backports, but doesn't belong on the mainline branch.

> 
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  include/sysemu/sysemu.h |  2 ++
>  numa.c                  | 64 +++++++++++++++++++++++++++++++++++++++++++++++--
>  qapi-schema.json        |  6 ++++-
>  3 files changed, 69 insertions(+), 3 deletions(-)
> 

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option
  2014-03-04  0:10   ` Eric Blake
@ 2014-03-04  2:20     ` Hu Tao
  0 siblings, 0 replies; 53+ messages in thread
From: Hu Tao @ 2014-03-04  2:20 UTC (permalink / raw)
  To: Eric Blake; +Cc: pbonzini, lersek, qemu-devel, Wanlong Gao, imammedo

On Mon, Mar 03, 2014 at 05:10:20PM -0700, Eric Blake wrote:
> On 02/19/2014 12:54 AM, Hu Tao wrote:
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > This option provides the infrastructure for binding guest NUMA nodes
> > to host NUMA nodes.  For example:
> > 
> >  -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \
> >  -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
> >  -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
> >  -numa node,nodeid=1,cpus=1,memdev=ram-node1
> > 
> > The option replaces "-numa mem".
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > Conflicts:
> > 	include/sysemu/sysemu.h
> > 	numa.c
> 
> Until this patch is taken upstream, a 'Conflicts:' section in your
> commit message doesn't make sense.  That is useful for downstream or
> stable branch backports, but doesn't belong on the mainline branch.

I think this was introduced during rebase. Will fix.

Thanks.

^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2014-03-04  2:22 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-19  7:53 [Qemu-devel] [PATCH v18 00/14] Add support for binding guest numa nodes to host numa nodes Hu Tao
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 01/14] NUMA: move numa related code to new file numa.c Hu Tao
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 02/14] NUMA: check if the total numa memory size is equal to ram_size Hu Tao
2014-02-25 13:38   ` Eric Blake
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 03/14] NUMA: Add numa_info structure to contain numa nodes info Hu Tao
2014-02-19  9:26   ` Igor Mammedov
2014-02-21  2:54     ` hu tao
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 04/14] NUMA: convert -numa option to use OptsVisitor Hu Tao
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 05/14] NUMA: expand MAX_NODES from 64 to 128 Hu Tao
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 06/14] qapi: add SIZE type parser to string_input_visitor Hu Tao
2014-02-19  9:54   ` Igor Mammedov
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 07/14] add memdev backend infrastructure Hu Tao
2014-02-19  9:15   ` Igor Mammedov
2014-02-19  7:53 ` [Qemu-devel] [PATCH v18 08/14] pc: pass QEMUMachineInitArgs to pc_memory_init Hu Tao
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 09/14] numa: introduce memory_region_allocate_system_memory Hu Tao
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 10/14] numa: add -numa node, memdev= option Hu Tao
2014-02-19  9:50   ` Igor Mammedov
2014-02-19 11:53     ` Paolo Bonzini
2014-03-04  0:10   ` Eric Blake
2014-03-04  2:20     ` Hu Tao
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 11/14] qapi: make string input visitor parse int list Hu Tao
2014-02-19  8:17   ` Hu Tao
2014-02-19  8:42     ` Paolo Bonzini
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 12/14] qapi: add HostMemPolicy enum type Hu Tao
2014-02-19  9:08   ` Paolo Bonzini
2014-02-19 11:23   ` Igor Mammedov
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 13/14] memory backend: fill memory backend ram fields Hu Tao
2014-02-19  9:03   ` Paolo Bonzini
2014-02-19  9:36     ` Igor Mammedov
2014-02-25 10:20       ` Hu Tao
2014-02-25 14:15         ` Paolo Bonzini
2014-02-26  5:00           ` Hu Tao
2014-02-26  8:47             ` Igor Mammedov
2014-02-26  8:59               ` Hu Tao
2014-02-26 12:19                 ` Igor Mammedov
2014-02-26 11:22               ` Paolo Bonzini
2014-02-26  5:57       ` Hu Tao
2014-02-26  9:05         ` Paolo Bonzini
2014-02-26  9:10         ` Igor Mammedov
2014-02-26 10:33           ` Paolo Bonzini
2014-02-26 12:31             ` Igor Mammedov
2014-02-26 12:45               ` Paolo Bonzini
2014-02-26 12:58                 ` Marcelo Tosatti
2014-02-26 13:14                   ` Paolo Bonzini
2014-02-26 13:43                 ` Igor Mammedov
2014-02-26 13:47                   ` Paolo Bonzini
2014-02-26 14:25                     ` Igor Mammedov
2014-02-26 14:39                       ` Paolo Bonzini
2014-02-25 10:09     ` Hu Tao
2014-03-03  3:24       ` Hu Tao
2014-02-19  7:54 ` [Qemu-devel] [PATCH v18 14/14] amp: add query-memdev Hu Tao
2014-02-19  8:14   ` Hu Tao
2014-02-19  9:07   ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.