* [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option
@ 2008-12-11 11:29 Andre Przywara
2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
0 siblings, 1 reply; 3+ messages in thread
From: Andre Przywara @ 2008-12-11 11:29 UTC (permalink / raw)
To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity
[-- Attachment #1: Type: text/plain, Size: 550 bytes --]
This adds and parses a -numa command line option to QEMU.
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
[-- Attachment #2: qemunuma_cmdline.patch --]
[-- Type: text/x-patch, Size: 6938 bytes --]
# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1228991781 -3600
# Node ID 394d02758aa4358be3bcd14f9d59efaf42e89328
# Parent b3a4224604d267a0e406a6d6809947f342a6b2ed
introduce -numa command line option
diff -r b3a4224604d2 -r 394d02758aa4 sysemu.h
--- a/sysemu.h Thu Dec 11 11:22:27 2008 +0100
+++ b/sysemu.h Thu Dec 11 11:36:21 2008 +0100
@@ -92,6 +92,16 @@ extern int alt_grab;
extern int alt_grab;
extern int usb_enabled;
extern int smp_cpus;
+
+#define MAX_NODES 64
+extern int numnumanodes;
+extern uint64_t hostnodes[MAX_NODES];
+extern uint64_t node_mem[MAX_NODES];
+extern uint64_t node_to_cpus[MAX_NODES];
+
+int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
+ uint64_t *cpus, int maxentries, int expect_numnodes);
+
extern int cursor_hide;
extern int graphic_rotate;
extern int no_quit;
diff -r b3a4224604d2 -r 394d02758aa4 vl.c
--- a/vl.c Thu Dec 11 11:22:27 2008 +0100
+++ b/vl.c Thu Dec 11 11:36:21 2008 +0100
@@ -222,6 +222,10 @@ int win2k_install_hack = 0;
#endif
int usb_enabled = 0;
int smp_cpus = 1;
+int numnumanodes = 0;
+uint64_t hostnodes[MAX_NODES];
+uint64_t node_mem[MAX_NODES];
+uint64_t node_to_cpus[MAX_NODES];
const char *vnc_display;
int acpi_enabled = 1;
int fd_bootchk = 1;
@@ -3968,6 +3972,10 @@ static void help(int exitcode)
"-daemonize daemonize QEMU after initializing\n"
#endif
"-option-rom rom load a file, rom, into the option ROM space\n"
+ "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
+ " create a multi NUMA node guest and optionally pin it to\n"
+ " to the given host nodes. If mem and cpu are omitted,\n"
+ " resources are split equally\n"
#ifdef TARGET_SPARC
"-prom-env variable=value set OpenBIOS nvram variables\n"
#endif
@@ -4065,6 +4073,7 @@ enum {
QEMU_OPTION_usb,
QEMU_OPTION_usbdevice,
QEMU_OPTION_smp,
+ QEMU_OPTION_numa,
QEMU_OPTION_vnc,
QEMU_OPTION_no_acpi,
QEMU_OPTION_curses,
@@ -4171,6 +4180,7 @@ static const QEMUOption qemu_options[] =
{ "win2k-hack", 0, QEMU_OPTION_win2k_hack },
{ "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice },
{ "smp", HAS_ARG, QEMU_OPTION_smp },
+ { "numa", HAS_ARG, QEMU_OPTION_numa},
{ "vnc", HAS_ARG, QEMU_OPTION_vnc },
#ifdef CONFIG_CURSES
{ "curses", 0, QEMU_OPTION_curses },
@@ -4456,6 +4466,88 @@ static void termsig_setup(void)
}
#endif
+
+#define PARSE_FLAG_BITMASK 1
+#define PARSE_FLAG_SUFFIX 2
+
+static int parse_to_array (const char *arg, uint64_t *array,
+ char delim, int maxentries, int flags)
+{
+const char *s;
+char *end;
+int num = 0;
+unsigned long long int val,endval;
+
+ for (s = arg; s != NULL && *s != 0; s++) {
+ val = strtoull (s, &end, 10);
+ if (end == s && *s != '*') {num++; continue;}
+ if (num >= maxentries) break;
+ switch (*end) {
+ case 'g':
+ case 'G':
+ if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+ /* fall through */
+ case 'm':
+ case 'M':
+ if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+ /* fall through */
+ case 'k':
+ case 'K':
+ if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+ break;
+ case '*':
+ val = (unsigned long long int)-1;
+ break;
+ case '-':
+ if (!(flags & PARSE_FLAG_BITMASK)) break;
+ s = end + 1;
+ endval = strtoull (s, &end, 10);
+ val = (1 << (endval + 1)) - (1 << val);
+ break;
+ case 0:
+ if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
+ /* fall through */
+ default:
+ if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
+ break;
+ }
+ array[num++] = val;
+ if ((s = strchr (end, delim)) == NULL) break;
+ }
+ return num;
+}
+
+int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
+ uint64_t *cpus, int maxentries, int expect_numnodes)
+{
+const char *s;
+char *arg, *val, *end, *strtokarg;
+int num = 0;
+
+ arg = strdup(opt); strtokarg = arg;
+ if (expect_numnodes) {
+ s = strtok(strtokarg, ",");
+ if (s == NULL) {free (arg); return -1;}
+ num = strtol (s, &end, 10);
+ if (s == end) {free (arg); return -1;}
+ strtokarg = NULL;
+ }
+ while ((s=strtok(strtokarg, ","))!=NULL) {
+ strtokarg = NULL;
+ if ((val = strchr (s, ':'))) {
+ *val++ = 0;
+ if (!strcmp (s, "mem") && mems != NULL) {
+ parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
+ } else if (!strcmp (s, "cpu") && cpus != NULL) {
+ parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
+ } else if (!strcmp (s, "pin") && hostnodes != NULL) {
+ parse_to_array (val, hostnodes, ';', maxentries, 0);
+ }
+ }
+ }
+ free (arg);
+ return num;
+}
int main(int argc, char **argv, char **envp)
{
@@ -4556,6 +4648,12 @@ int main(int argc, char **argv, char **e
for(i = 1; i < MAX_PARALLEL_PORTS; i++)
parallel_devices[i] = NULL;
parallel_device_index = 0;
+
+ for(i = 0; i < MAX_NODES; i++) {
+ hostnodes[i] = (uint64_t)-1;
+ node_to_cpus[i] = 0;
+ node_mem[i] = 0;
+ }
usb_devices_index = 0;
@@ -5011,6 +5109,14 @@ int main(int argc, char **argv, char **e
exit(1);
}
break;
+ case QEMU_OPTION_numa:
+ numnumanodes = parse_numa_args (optarg,
+ hostnodes, node_mem, node_to_cpus, MAX_NODES, 1);
+ if (numnumanodes < 0) {
+ fprintf(stderr, "Invalid number of NUMA nodes\n");
+ exit(1);
+ }
+ break;
case QEMU_OPTION_vnc:
vnc_display = optarg;
break;
@@ -5151,6 +5257,24 @@ int main(int argc, char **argv, char **e
monitor_device = "stdio";
}
+ if (numnumanodes > 0) {
+ int i;
+
+ if (numnumanodes > smp_cpus)
+ numnumanodes = smp_cpus;
+
+ for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break;
+ if (i == numnumanodes) {
+ for (i = 0; i < numnumanodes; i++)
+ node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1);
+ }
+ for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break;
+ if (i == numnumanodes) {
+ for (i = 0; i < smp_cpus; i++)
+ node_to_cpus[i % numnumanodes] |= 1<<i;
+ }
+ }
+
#ifndef _WIN32
if (daemonize) {
pid_t pid;
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] Re: [PATCH 1/3] NUMA: add -numa command line option
2008-12-11 11:29 [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option Andre Przywara
@ 2008-12-12 14:56 ` Anthony Liguori
2008-12-13 17:12 ` Andre Przywara
0 siblings, 1 reply; 3+ messages in thread
From: Anthony Liguori @ 2008-12-12 14:56 UTC (permalink / raw)
To: Andre Przywara; +Cc: qemu-devel, Avi Kivity
Andre Przywara wrote:
> This adds and parses a -numa command line option to QEMU.
>
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
>
> # HG changeset patch
> # User Andre Przywara <andre.przywara@amd.com>
> # Date 1228991781 -3600
> # Node ID 394d02758aa4358be3bcd14f9d59efaf42e89328
> # Parent b3a4224604d267a0e406a6d6809947f342a6b2ed
> introduce -numa command line option
>
> diff -r b3a4224604d2 -r 394d02758aa4 sysemu.h
> --- a/sysemu.h Thu Dec 11 11:22:27 2008 +0100
> +++ b/sysemu.h Thu Dec 11 11:36:21 2008 +0100
> @@ -92,6 +92,16 @@ extern int alt_grab;
> extern int alt_grab;
> extern int usb_enabled;
> extern int smp_cpus;
> +
> +#define MAX_NODES 64
> +extern int numnumanodes;
> +extern uint64_t hostnodes[MAX_NODES];
>
This should not be in this patch. This patch should just contain the
bits needed to create a virtual guest NUMA topology. The hostnode stuff
should be a separate patch.
> +extern uint64_t node_mem[MAX_NODES];
> +extern uint64_t node_to_cpus[MAX_NODES];
> +
> +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
> + uint64_t *cpus, int maxentries, int expect_numnodes);
> +
> extern int cursor_hide;
> extern int graphic_rotate;
> extern int no_quit;
> diff -r b3a4224604d2 -r 394d02758aa4 vl.c
> --- a/vl.c Thu Dec 11 11:22:27 2008 +0100
> +++ b/vl.c Thu Dec 11 11:36:21 2008 +0100
> @@ -222,6 +222,10 @@ int win2k_install_hack = 0;
> #endif
> int usb_enabled = 0;
> int smp_cpus = 1;
> +int numnumanodes = 0;
> +uint64_t hostnodes[MAX_NODES];
> +uint64_t node_mem[MAX_NODES];
> +uint64_t node_to_cpus[MAX_NODES];
> const char *vnc_display;
> int acpi_enabled = 1;
> int fd_bootchk = 1;
> @@ -3968,6 +3972,10 @@ static void help(int exitcode)
> "-daemonize daemonize QEMU after initializing\n"
> #endif
> "-option-rom rom load a file, rom, into the option ROM space\n"
> + "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
> + " create a multi NUMA node guest and optionally pin it to\n"
> + " to the given host nodes. If mem and cpu are omitted,\n"
> + " resources are split equally\n"
>
You're whitespace damaged.
> +
> +#define PARSE_FLAG_BITMASK 1
> +#define PARSE_FLAG_SUFFIX 2
> +
> +static int parse_to_array (const char *arg, uint64_t *array,
> + char delim, int maxentries, int flags)
> +{
> +const char *s;
> +char *end;
> +int num = 0;
> +unsigned long long int val,endval;
>
You're really whitespace damaged.
> + for (s = arg; s != NULL && *s != 0; s++) {
> + val = strtoull (s, &end, 10);
> + if (end == s && *s != '*') {num++; continue;}
> + if (num >= maxentries) break;
> + switch (*end) {
> + case 'g':
> + case 'G':
> + if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> + /* fall through */
> + case 'm':
> + case 'M':
> + if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> + /* fall through */
> + case 'k':
> + case 'K':
> + if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> + break;
> + case '*':
> + val = (unsigned long long int)-1;
> + break;
> + case '-':
> + if (!(flags & PARSE_FLAG_BITMASK)) break;
> + s = end + 1;
> + endval = strtoull (s, &end, 10);
> + val = (1 << (endval + 1)) - (1 << val);
> + break;
> + case 0:
> + if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
> + /* fall through */
> + default:
> + if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
> + break;
> + }
> + array[num++] = val;
> + if ((s = strchr (end, delim)) == NULL) break;
> + }
> + return num;
> +}
>
>
This syntax is sufficiently complex that it needs thorough documentation
(in qemu-doc.texi).
> +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
> + uint64_t *cpus, int maxentries, int expect_numnodes)
> +{
> +const char *s;
> +char *arg, *val, *end, *strtokarg;
> +int num = 0;
> +
> + arg = strdup(opt); strtokarg = arg;
> + if (expect_numnodes) {
> + s = strtok(strtokarg, ",");
>
strtok is generally evil. strsep() is a better choice.
> + if (s == NULL) {free (arg); return -1;}
>
Don't do this one on line.
> + num = strtol (s, &end, 10);
> + if (s == end) {free (arg); return -1;}
> + strtokarg = NULL;
> + }
> + while ((s=strtok(strtokarg, ","))!=NULL) {
> + strtokarg = NULL;
> + if ((val = strchr (s, ':'))) {
> + *val++ = 0;
> + if (!strcmp (s, "mem") && mems != NULL) {
> + parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
> + } else if (!strcmp (s, "cpu") && cpus != NULL) {
> + parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
> + } else if (!strcmp (s, "pin") && hostnodes != NULL) {
> + parse_to_array (val, hostnodes, ';', maxentries, 0);
> + }
> + }
> + }
> + free (arg);
> + return num;
> +
>
Please try to make things make the rest of qemu better. You have a lot
of weird space after function names, etc. Consistency in large projects
is important.
> int main(int argc, char **argv, char **envp)
> {
> @@ -4556,6 +4648,12 @@ int main(int argc, char **argv, char **e
> for(i = 1; i < MAX_PARALLEL_PORTS; i++)
> parallel_devices[i] = NULL;
> parallel_device_index = 0;
> +
> + for(i = 0; i < MAX_NODES; i++) {
> + hostnodes[i] = (uint64_t)-1;
> + node_to_cpus[i] = 0;
> + node_mem[i] = 0;
> + }
>
> usb_devices_index = 0;
>
> @@ -5011,6 +5109,14 @@ int main(int argc, char **argv, char **e
> exit(1);
> }
> break;
> + case QEMU_OPTION_numa:
> + numnumanodes = parse_numa_args (optarg,
> + hostnodes, node_mem, node_to_cpus, MAX_NODES, 1);
> + if (numnumanodes < 0) {
> + fprintf(stderr, "Invalid number of NUMA nodes\n");
> + exit(1);
> + }
> + break;
> case QEMU_OPTION_vnc:
> vnc_display = optarg;
> break;
> @@ -5151,6 +5257,24 @@ int main(int argc, char **argv, char **e
> monitor_device = "stdio";
> }
>
> + if (numnumanodes > 0) {
> + int i;
> +
> + if (numnumanodes > smp_cpus)
> + numnumanodes = smp_cpus;
>
Why have this limitation? We would like to see CPU-less nodes supported
either as memory-only nodes or as IO nodes.
Which leads me to the question of how to plan on describing which nodes
have what hardware?
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] Re: [PATCH 1/3] NUMA: add -numa command line option
2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
@ 2008-12-13 17:12 ` Andre Przywara
0 siblings, 0 replies; 3+ messages in thread
From: Andre Przywara @ 2008-12-13 17:12 UTC (permalink / raw)
To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity
Anthony Liguori wrote:
> Andre Przywara wrote:
>> This adds and parses a -numa command line option to QEMU.
>>...
>> +
>> +#define MAX_NODES 64
>> +extern int numnumanodes;
>> +extern uint64_t hostnodes[MAX_NODES];
>>
>
> This should not be in this patch. This patch should just contain the
> bits needed to create a virtual guest NUMA topology. The hostnode stuff
> should be a separate patch.
Agreed. I now moved the firmware interface parts of patch 2 into this
one, so that you have full functionality with this first patch already
(except host pinning and the BIOS part). Shall I move the 'info numa'
part of the monitor also into this one or do you prefer smaller patches?
>> @@ -3968,6 +3972,10 @@ static void help(int exitcode)
>> "-daemonize daemonize QEMU after initializing\n"
>> #endif
>> "-option-rom rom load a file, rom, into the option ROM space\n"
>> + "-numa
>> nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
>> + " create a multi NUMA node guest and
>> optionally pin it to\n"
>> + " to the given host nodes. If mem and cpu
>> are omitted,\n"
>> + " resources are split equally\n"
>>
>
> You're whitespace damaged.
Actually the damage here is in the three lines before, which start with
a leading tab. All other help lines are formatted like my ones.
Do you want me to send whitespace fixes for this? Before or after the
NUMA part?
>> + if (numnumanodes > 0) {
>> + int i;
>> +
>> + if (numnumanodes > smp_cpus)
>> + numnumanodes = smp_cpus;
>>
>
> Why have this limitation? We would like to see CPU-less nodes supported
> either as memory-only nodes or as IO nodes.
I introduced this after some tests, which made Linux crash. Honestly I
don't remember the exact behavior or the error message anymore, but I
will do some research again.
> Which leads me to the question of how to plan on describing which nodes
> have what hardware?
Do you mean peripheral devices? Like multiple southbridges connected to
different NUMA nodes? I don't think that this kind of emulation is
needed for QEMU, beside that this is out of scope for the ACPI tables.
If you meant this, I can elaborate on this. If not, please explain.
Regards,
Andre.
--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2008-12-13 17:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-11 11:29 [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option Andre Przywara
2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
2008-12-13 17:12 ` Andre Przywara
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.