From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1LAjjG-0006Ej-Ac for qemu-devel@nongnu.org; Thu, 11 Dec 2008 06:29:14 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1LAjjE-0006Di-LC for qemu-devel@nongnu.org; Thu, 11 Dec 2008 06:29:13 -0500 Received: from [199.232.76.173] (port=52634 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1LAjjE-0006DV-7X for qemu-devel@nongnu.org; Thu, 11 Dec 2008 06:29:12 -0500 Received: from outbound-sin.frontbridge.com ([207.46.51.80]:11431 helo=SG2EHSOBE003.bigfish.com) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_ARCFOUR_MD5:16) (Exim 4.60) (envelope-from ) id 1LAjjC-0006CZ-W4 for qemu-devel@nongnu.org; Thu, 11 Dec 2008 06:29:11 -0500 Message-ID: <4940F990.9000304@amd.com> Date: Thu, 11 Dec 2008 12:29:20 +0100 From: Andre Przywara MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------010002070002050909070608" Subject: [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Anthony Liguori Cc: qemu-devel@nongnu.org, Avi Kivity --------------010002070002050909070608 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit This adds and parses a -numa command line option to QEMU. Signed-off-by: Andre Przywara -- Andre Przywara AMD-Operating System Research Center (OSRC), Dresden, Germany Tel: +49 351 277-84917 ----to satisfy European Law for business letters: AMD Saxony Limited Liability Company & Co. KG, Wilschdorfer Landstr. 101, 01109 Dresden, Germany Register Court Dresden: HRA 4896, General Partner authorized to represent: AMD Saxony LLC (Wilmington, Delaware, US) General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy --------------010002070002050909070608 Content-Type: text/x-patch; name="qemunuma_cmdline.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="qemunuma_cmdline.patch" # HG changeset patch # User Andre Przywara # Date 1228991781 -3600 # Node ID 394d02758aa4358be3bcd14f9d59efaf42e89328 # Parent b3a4224604d267a0e406a6d6809947f342a6b2ed introduce -numa command line option diff -r b3a4224604d2 -r 394d02758aa4 sysemu.h --- a/sysemu.h Thu Dec 11 11:22:27 2008 +0100 +++ b/sysemu.h Thu Dec 11 11:36:21 2008 +0100 @@ -92,6 +92,16 @@ extern int alt_grab; extern int alt_grab; extern int usb_enabled; extern int smp_cpus; + +#define MAX_NODES 64 +extern int numnumanodes; +extern uint64_t hostnodes[MAX_NODES]; +extern uint64_t node_mem[MAX_NODES]; +extern uint64_t node_to_cpus[MAX_NODES]; + +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems, + uint64_t *cpus, int maxentries, int expect_numnodes); + extern int cursor_hide; extern int graphic_rotate; extern int no_quit; diff -r b3a4224604d2 -r 394d02758aa4 vl.c --- a/vl.c Thu Dec 11 11:22:27 2008 +0100 +++ b/vl.c Thu Dec 11 11:36:21 2008 +0100 @@ -222,6 +222,10 @@ int win2k_install_hack = 0; #endif int usb_enabled = 0; int smp_cpus = 1; +int numnumanodes = 0; +uint64_t hostnodes[MAX_NODES]; +uint64_t node_mem[MAX_NODES]; +uint64_t node_to_cpus[MAX_NODES]; const char *vnc_display; int acpi_enabled = 1; int fd_bootchk = 1; @@ -3968,6 +3972,10 @@ static void help(int exitcode) "-daemonize daemonize QEMU after initializing\n" #endif "-option-rom rom load a file, rom, into the option ROM space\n" + "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n" + " create a multi NUMA node guest and optionally pin it to\n" + " to the given host nodes. If mem and cpu are omitted,\n" + " resources are split equally\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" #endif @@ -4065,6 +4073,7 @@ enum { QEMU_OPTION_usb, QEMU_OPTION_usbdevice, QEMU_OPTION_smp, + QEMU_OPTION_numa, QEMU_OPTION_vnc, QEMU_OPTION_no_acpi, QEMU_OPTION_curses, @@ -4171,6 +4180,7 @@ static const QEMUOption qemu_options[] = { "win2k-hack", 0, QEMU_OPTION_win2k_hack }, { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice }, { "smp", HAS_ARG, QEMU_OPTION_smp }, + { "numa", HAS_ARG, QEMU_OPTION_numa}, { "vnc", HAS_ARG, QEMU_OPTION_vnc }, #ifdef CONFIG_CURSES { "curses", 0, QEMU_OPTION_curses }, @@ -4456,6 +4466,88 @@ static void termsig_setup(void) } #endif + +#define PARSE_FLAG_BITMASK 1 +#define PARSE_FLAG_SUFFIX 2 + +static int parse_to_array (const char *arg, uint64_t *array, + char delim, int maxentries, int flags) +{ +const char *s; +char *end; +int num = 0; +unsigned long long int val,endval; + + for (s = arg; s != NULL && *s != 0; s++) { + val = strtoull (s, &end, 10); + if (end == s && *s != '*') {num++; continue;} + if (num >= maxentries) break; + switch (*end) { + case 'g': + case 'G': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + /* fall through */ + case 'm': + case 'M': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + /* fall through */ + case 'k': + case 'K': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + break; + case '*': + val = (unsigned long long int)-1; + break; + case '-': + if (!(flags & PARSE_FLAG_BITMASK)) break; + s = end + 1; + endval = strtoull (s, &end, 10); + val = (1 << (endval + 1)) - (1 << val); + break; + case 0: + if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024; + /* fall through */ + default: + if (flags & PARSE_FLAG_BITMASK) val = 1 << val; + break; + } + array[num++] = val; + if ((s = strchr (end, delim)) == NULL) break; + } + return num; +} + +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems, + uint64_t *cpus, int maxentries, int expect_numnodes) +{ +const char *s; +char *arg, *val, *end, *strtokarg; +int num = 0; + + arg = strdup(opt); strtokarg = arg; + if (expect_numnodes) { + s = strtok(strtokarg, ","); + if (s == NULL) {free (arg); return -1;} + num = strtol (s, &end, 10); + if (s == end) {free (arg); return -1;} + strtokarg = NULL; + } + while ((s=strtok(strtokarg, ","))!=NULL) { + strtokarg = NULL; + if ((val = strchr (s, ':'))) { + *val++ = 0; + if (!strcmp (s, "mem") && mems != NULL) { + parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX); + } else if (!strcmp (s, "cpu") && cpus != NULL) { + parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK); + } else if (!strcmp (s, "pin") && hostnodes != NULL) { + parse_to_array (val, hostnodes, ';', maxentries, 0); + } + } + } + free (arg); + return num; +} int main(int argc, char **argv, char **envp) { @@ -4556,6 +4648,12 @@ int main(int argc, char **argv, char **e for(i = 1; i < MAX_PARALLEL_PORTS; i++) parallel_devices[i] = NULL; parallel_device_index = 0; + + for(i = 0; i < MAX_NODES; i++) { + hostnodes[i] = (uint64_t)-1; + node_to_cpus[i] = 0; + node_mem[i] = 0; + } usb_devices_index = 0; @@ -5011,6 +5109,14 @@ int main(int argc, char **argv, char **e exit(1); } break; + case QEMU_OPTION_numa: + numnumanodes = parse_numa_args (optarg, + hostnodes, node_mem, node_to_cpus, MAX_NODES, 1); + if (numnumanodes < 0) { + fprintf(stderr, "Invalid number of NUMA nodes\n"); + exit(1); + } + break; case QEMU_OPTION_vnc: vnc_display = optarg; break; @@ -5151,6 +5257,24 @@ int main(int argc, char **argv, char **e monitor_device = "stdio"; } + if (numnumanodes > 0) { + int i; + + if (numnumanodes > smp_cpus) + numnumanodes = smp_cpus; + + for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break; + if (i == numnumanodes) { + for (i = 0; i < numnumanodes; i++) + node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1); + } + for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break; + if (i == numnumanodes) { + for (i = 0; i < smp_cpus; i++) + node_to_cpus[i % numnumanodes] |= 1<