All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option
@ 2008-12-11 11:29 Andre Przywara
  2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
  0 siblings, 1 reply; 3+ messages in thread
From: Andre Przywara @ 2008-12-11 11:29 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 550 bytes --]

This adds and parses a -numa command line option to QEMU.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: qemunuma_cmdline.patch --]
[-- Type: text/x-patch, Size: 6938 bytes --]

# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1228991781 -3600
# Node ID 394d02758aa4358be3bcd14f9d59efaf42e89328
# Parent  b3a4224604d267a0e406a6d6809947f342a6b2ed
introduce -numa command line option

diff -r b3a4224604d2 -r 394d02758aa4 sysemu.h
--- a/sysemu.h	Thu Dec 11 11:22:27 2008 +0100
+++ b/sysemu.h	Thu Dec 11 11:36:21 2008 +0100
@@ -92,6 +92,16 @@ extern int alt_grab;
 extern int alt_grab;
 extern int usb_enabled;
 extern int smp_cpus;
+
+#define MAX_NODES 64
+extern int numnumanodes;
+extern uint64_t hostnodes[MAX_NODES];
+extern uint64_t node_mem[MAX_NODES];
+extern uint64_t node_to_cpus[MAX_NODES];
+
+int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
+                     uint64_t *cpus, int maxentries, int expect_numnodes);
+
 extern int cursor_hide;
 extern int graphic_rotate;
 extern int no_quit;
diff -r b3a4224604d2 -r 394d02758aa4 vl.c
--- a/vl.c	Thu Dec 11 11:22:27 2008 +0100
+++ b/vl.c	Thu Dec 11 11:36:21 2008 +0100
@@ -222,6 +222,10 @@ int win2k_install_hack = 0;
 #endif
 int usb_enabled = 0;
 int smp_cpus = 1;
+int numnumanodes = 0;
+uint64_t hostnodes[MAX_NODES];
+uint64_t node_mem[MAX_NODES];
+uint64_t node_to_cpus[MAX_NODES];
 const char *vnc_display;
 int acpi_enabled = 1;
 int fd_bootchk = 1;
@@ -3968,6 +3972,10 @@ static void help(int exitcode)
 	   "-daemonize      daemonize QEMU after initializing\n"
 #endif
 	   "-option-rom rom load a file, rom, into the option ROM space\n"
+           "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
+           "                create a multi NUMA node guest and optionally pin it to\n"
+           "                to the given host nodes. If mem and cpu are omitted,\n"
+           "                resources are split equally\n"
 #ifdef TARGET_SPARC
            "-prom-env variable=value  set OpenBIOS nvram variables\n"
 #endif
@@ -4065,6 +4073,7 @@ enum {
     QEMU_OPTION_usb,
     QEMU_OPTION_usbdevice,
     QEMU_OPTION_smp,
+    QEMU_OPTION_numa,
     QEMU_OPTION_vnc,
     QEMU_OPTION_no_acpi,
     QEMU_OPTION_curses,
@@ -4171,6 +4180,7 @@ static const QEMUOption qemu_options[] =
     { "win2k-hack", 0, QEMU_OPTION_win2k_hack },
     { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice },
     { "smp", HAS_ARG, QEMU_OPTION_smp },
+    { "numa", HAS_ARG, QEMU_OPTION_numa},
     { "vnc", HAS_ARG, QEMU_OPTION_vnc },
 #ifdef CONFIG_CURSES
     { "curses", 0, QEMU_OPTION_curses },
@@ -4456,6 +4466,88 @@ static void termsig_setup(void)
 }
 
 #endif
+
+#define PARSE_FLAG_BITMASK   1
+#define PARSE_FLAG_SUFFIX    2
+
+static int parse_to_array (const char *arg, uint64_t *array,
+    char delim, int maxentries, int flags)
+{
+const char *s;
+char *end;
+int num = 0;
+unsigned long long int val,endval;
+
+    for (s = arg; s != NULL && *s != 0; s++) {
+        val = strtoull (s, &end, 10);
+        if (end == s && *s != '*') {num++; continue;}
+        if (num >= maxentries) break;
+        switch (*end) {
+            case 'g':
+            case 'G':
+                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	/* fall through */
+            case 'm':
+            case 'M':
+                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	/* fall through */
+            case 'k':
+            case 'K':
+           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	    break;
+            case '*':
+                val = (unsigned long long int)-1;
+                break;
+            case '-':
+           	    if (!(flags & PARSE_FLAG_BITMASK)) break;
+                s = end + 1;
+                endval = strtoull (s, &end, 10);
+           	    val = (1 << (endval + 1)) - (1 << val);
+                break;
+            case 0:
+           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
+           	/* fall through */
+            default:
+           	    if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
+           	    break;
+        }
+        array[num++] = val;
+        if ((s = strchr (end, delim)) == NULL) break;
+    }
+    return num;
+}
+
+int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
+                     uint64_t *cpus, int maxentries, int expect_numnodes)
+{
+const char *s;
+char *arg, *val, *end, *strtokarg;
+int num = 0;
+
+    arg = strdup(opt); strtokarg = arg;
+    if (expect_numnodes) {
+        s = strtok(strtokarg, ",");
+        if (s == NULL) {free (arg); return -1;}
+        num = strtol (s, &end, 10);
+        if (s == end) {free (arg); return -1;}
+        strtokarg = NULL;
+    }
+    while ((s=strtok(strtokarg, ","))!=NULL) {
+        strtokarg = NULL;
+        if ((val = strchr (s, ':'))) {
+            *val++ = 0;
+            if (!strcmp (s, "mem") && mems != NULL) {
+                parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
+            } else if (!strcmp (s, "cpu") && cpus != NULL) {
+                parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
+            } else if (!strcmp (s, "pin") && hostnodes != NULL) {
+                parse_to_array (val, hostnodes, ';', maxentries, 0);
+            }
+        }
+    }
+    free (arg);
+    return num;
+}
 
 int main(int argc, char **argv, char **envp)
 {
@@ -4556,6 +4648,12 @@ int main(int argc, char **argv, char **e
     for(i = 1; i < MAX_PARALLEL_PORTS; i++)
         parallel_devices[i] = NULL;
     parallel_device_index = 0;
+
+    for(i = 0; i < MAX_NODES; i++) {
+        hostnodes[i] = (uint64_t)-1;
+        node_to_cpus[i] = 0;
+        node_mem[i] = 0;
+    }
 
     usb_devices_index = 0;
 
@@ -5011,6 +5109,14 @@ int main(int argc, char **argv, char **e
                     exit(1);
                 }
                 break;
+            case QEMU_OPTION_numa:
+                numnumanodes = parse_numa_args (optarg,
+                    hostnodes, node_mem, node_to_cpus, MAX_NODES, 1);
+                if (numnumanodes < 0) {
+                    fprintf(stderr, "Invalid number of NUMA nodes\n");
+                    exit(1);
+                }
+                break;
 	    case QEMU_OPTION_vnc:
 		vnc_display = optarg;
 		break;
@@ -5151,6 +5257,24 @@ int main(int argc, char **argv, char **e
            monitor_device = "stdio";
     }
 
+    if (numnumanodes > 0) {
+        int i;
+
+        if (numnumanodes > smp_cpus)
+            numnumanodes = smp_cpus;
+
+        for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break;
+        if (i == numnumanodes) {
+            for (i = 0; i < numnumanodes; i++)
+                node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1);
+        }
+        for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break;
+        if (i == numnumanodes) {
+            for (i = 0; i < smp_cpus; i++)
+                node_to_cpus[i % numnumanodes] |= 1<<i;
+        }
+    }
+
 #ifndef _WIN32
     if (daemonize) {
 	pid_t pid;

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] Re: [PATCH 1/3] NUMA: add -numa command line option
  2008-12-11 11:29 [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option Andre Przywara
@ 2008-12-12 14:56 ` Anthony Liguori
  2008-12-13 17:12   ` Andre Przywara
  0 siblings, 1 reply; 3+ messages in thread
From: Anthony Liguori @ 2008-12-12 14:56 UTC (permalink / raw)
  To: Andre Przywara; +Cc: qemu-devel, Avi Kivity

Andre Przywara wrote:
> This adds and parses a -numa command line option to QEMU.
>
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
>
> # HG changeset patch
> # User Andre Przywara <andre.przywara@amd.com>
> # Date 1228991781 -3600
> # Node ID 394d02758aa4358be3bcd14f9d59efaf42e89328
> # Parent  b3a4224604d267a0e406a6d6809947f342a6b2ed
> introduce -numa command line option
>
> diff -r b3a4224604d2 -r 394d02758aa4 sysemu.h
> --- a/sysemu.h	Thu Dec 11 11:22:27 2008 +0100
> +++ b/sysemu.h	Thu Dec 11 11:36:21 2008 +0100
> @@ -92,6 +92,16 @@ extern int alt_grab;
>  extern int alt_grab;
>  extern int usb_enabled;
>  extern int smp_cpus;
> +
> +#define MAX_NODES 64
> +extern int numnumanodes;
> +extern uint64_t hostnodes[MAX_NODES];
>   

This should not be in this patch.  This patch should just contain the 
bits needed to create a virtual guest NUMA topology.  The hostnode stuff 
should be a separate patch.

> +extern uint64_t node_mem[MAX_NODES];
> +extern uint64_t node_to_cpus[MAX_NODES];
> +
> +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
> +                     uint64_t *cpus, int maxentries, int expect_numnodes);
> +
>  extern int cursor_hide;
>  extern int graphic_rotate;
>  extern int no_quit;
> diff -r b3a4224604d2 -r 394d02758aa4 vl.c
> --- a/vl.c	Thu Dec 11 11:22:27 2008 +0100
> +++ b/vl.c	Thu Dec 11 11:36:21 2008 +0100
> @@ -222,6 +222,10 @@ int win2k_install_hack = 0;
>  #endif
>  int usb_enabled = 0;
>  int smp_cpus = 1;
> +int numnumanodes = 0;
> +uint64_t hostnodes[MAX_NODES];
> +uint64_t node_mem[MAX_NODES];
> +uint64_t node_to_cpus[MAX_NODES];
>  const char *vnc_display;
>  int acpi_enabled = 1;
>  int fd_bootchk = 1;
> @@ -3968,6 +3972,10 @@ static void help(int exitcode)
>  	   "-daemonize      daemonize QEMU after initializing\n"
>  #endif
>  	   "-option-rom rom load a file, rom, into the option ROM space\n"
> +           "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
> +           "                create a multi NUMA node guest and optionally pin it to\n"
> +           "                to the given host nodes. If mem and cpu are omitted,\n"
> +           "                resources are split equally\n"
>   

You're whitespace damaged.

> +
> +#define PARSE_FLAG_BITMASK   1
> +#define PARSE_FLAG_SUFFIX    2
> +
> +static int parse_to_array (const char *arg, uint64_t *array,
> +    char delim, int maxentries, int flags)
> +{
> +const char *s;
> +char *end;
> +int num = 0;
> +unsigned long long int val,endval;
>   

You're really whitespace damaged.

> +    for (s = arg; s != NULL && *s != 0; s++) {
> +        val = strtoull (s, &end, 10);
> +        if (end == s && *s != '*') {num++; continue;}
> +        if (num >= maxentries) break;
> +        switch (*end) {
> +            case 'g':
> +            case 'G':
> +                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	/* fall through */
> +            case 'm':
> +            case 'M':
> +                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	/* fall through */
> +            case 'k':
> +            case 'K':
> +           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	    break;
> +            case '*':
> +                val = (unsigned long long int)-1;
> +                break;
> +            case '-':
> +           	    if (!(flags & PARSE_FLAG_BITMASK)) break;
> +                s = end + 1;
> +                endval = strtoull (s, &end, 10);
> +           	    val = (1 << (endval + 1)) - (1 << val);
> +                break;
> +            case 0:
> +           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
> +           	/* fall through */
> +            default:
> +           	    if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
> +           	    break;
> +        }
> +        array[num++] = val;
> +        if ((s = strchr (end, delim)) == NULL) break;
> +    }
> +    return num;
> +}
>
>   

This syntax is sufficiently complex that it needs thorough documentation 
(in qemu-doc.texi).

> +int parse_numa_args (const char *opt, uint64_t *hostnodes, uint64_t *mems,
> +                     uint64_t *cpus, int maxentries, int expect_numnodes)
> +{
> +const char *s;
> +char *arg, *val, *end, *strtokarg;
> +int num = 0;
> +
> +    arg = strdup(opt); strtokarg = arg;
> +    if (expect_numnodes) {
> +        s = strtok(strtokarg, ",");
>   

strtok is generally evil.  strsep() is a better choice.

> +        if (s == NULL) {free (arg); return -1;}
>   

Don't do this one on line.

> +        num = strtol (s, &end, 10);
> +        if (s == end) {free (arg); return -1;}
> +        strtokarg = NULL;
> +    }
> +    while ((s=strtok(strtokarg, ","))!=NULL) {
> +        strtokarg = NULL;
> +        if ((val = strchr (s, ':'))) {
> +            *val++ = 0;
> +            if (!strcmp (s, "mem") && mems != NULL) {
> +                parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
> +            } else if (!strcmp (s, "cpu") && cpus != NULL) {
> +                parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
> +            } else if (!strcmp (s, "pin") && hostnodes != NULL) {
> +                parse_to_array (val, hostnodes, ';', maxentries, 0);
> +            }
> +        }
> +    }
> +    free (arg);
> +    return num;
> +
>   

Please try to make things make the rest of qemu better.  You have a lot 
of weird space after function names, etc.  Consistency in large projects 
is important.

>  int main(int argc, char **argv, char **envp)
>  {
> @@ -4556,6 +4648,12 @@ int main(int argc, char **argv, char **e
>      for(i = 1; i < MAX_PARALLEL_PORTS; i++)
>          parallel_devices[i] = NULL;
>      parallel_device_index = 0;
> +
> +    for(i = 0; i < MAX_NODES; i++) {
> +        hostnodes[i] = (uint64_t)-1;
> +        node_to_cpus[i] = 0;
> +        node_mem[i] = 0;
> +    }
>  
>      usb_devices_index = 0;
>  
> @@ -5011,6 +5109,14 @@ int main(int argc, char **argv, char **e
>                      exit(1);
>                  }
>                  break;
> +            case QEMU_OPTION_numa:
> +                numnumanodes = parse_numa_args (optarg,
> +                    hostnodes, node_mem, node_to_cpus, MAX_NODES, 1);
> +                if (numnumanodes < 0) {
> +                    fprintf(stderr, "Invalid number of NUMA nodes\n");
> +                    exit(1);
> +                }
> +                break;
>  	    case QEMU_OPTION_vnc:
>  		vnc_display = optarg;
>  		break;
> @@ -5151,6 +5257,24 @@ int main(int argc, char **argv, char **e
>             monitor_device = "stdio";
>      }
>  
> +    if (numnumanodes > 0) {
> +        int i;
> +
> +        if (numnumanodes > smp_cpus)
> +            numnumanodes = smp_cpus;
>   

Why have this limitation?  We would like to see CPU-less nodes supported 
either as memory-only nodes or as IO nodes.

Which leads me to the question of how to plan on describing which nodes 
have what hardware?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] Re: [PATCH 1/3] NUMA: add -numa command line option
  2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
@ 2008-12-13 17:12   ` Andre Przywara
  0 siblings, 0 replies; 3+ messages in thread
From: Andre Przywara @ 2008-12-13 17:12 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity

Anthony Liguori wrote:
> Andre Przywara wrote:
>> This adds and parses a -numa command line option to QEMU.
>>...
>> +
>> +#define MAX_NODES 64
>> +extern int numnumanodes;
>> +extern uint64_t hostnodes[MAX_NODES];
>>   
> 
> This should not be in this patch.  This patch should just contain the 
> bits needed to create a virtual guest NUMA topology.  The hostnode stuff 
> should be a separate patch.
Agreed. I now moved the firmware interface parts of patch 2 into this 
one, so that you have full functionality with this first patch already 
(except host pinning and the BIOS part). Shall I move the 'info numa' 
part of the monitor also into this one or do you prefer smaller patches?

>> @@ -3968,6 +3972,10 @@ static void help(int exitcode)
>>         "-daemonize      daemonize QEMU after initializing\n"
>>  #endif
>>         "-option-rom rom load a file, rom, into the option ROM space\n"
>> +           "-numa 
>> nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
>> +           "                create a multi NUMA node guest and 
>> optionally pin it to\n"
>> +           "                to the given host nodes. If mem and cpu 
>> are omitted,\n"
>> +           "                resources are split equally\n"
>>   
> 
> You're whitespace damaged.
Actually the damage here is in the three lines before, which start with 
a leading tab. All other help lines are formatted like my ones.
Do you want me to send whitespace fixes for this? Before or after the 
NUMA part?

>> +    if (numnumanodes > 0) {
>> +        int i;
>> +
>> +        if (numnumanodes > smp_cpus)
>> +            numnumanodes = smp_cpus;
>>   
> 
> Why have this limitation?  We would like to see CPU-less nodes supported 
> either as memory-only nodes or as IO nodes.
I introduced this after some tests, which made Linux crash. Honestly I 
don't remember the exact behavior or the error message anymore, but I 
will do some research again.

> Which leads me to the question of how to plan on describing which nodes 
> have what hardware?
Do you mean peripheral devices? Like multiple southbridges connected to 
different NUMA nodes? I don't think that this kind of emulation is 
needed for QEMU, beside that this is out of scope for the ACPI tables.
If you meant this, I can elaborate on this. If not, please explain.

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-12-13 17:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-11 11:29 [Qemu-devel] [PATCH 1/3] NUMA: add -numa command line option Andre Przywara
2008-12-12 14:56 ` [Qemu-devel] " Anthony Liguori
2008-12-13 17:12   ` Andre Przywara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.