All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/8] v2: introduce -numa command line option
@ 2008-12-16 14:13 Andre Przywara
  2008-12-16 21:13 ` [Qemu-devel] " Anthony Liguori
  0 siblings, 1 reply; 4+ messages in thread
From: Andre Przywara @ 2008-12-16 14:13 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 491 bytes --]

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: qemunuma_v2_cmdline.patch --]
[-- Type: text/x-patch, Size: 7048 bytes --]

# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1229425990 -3600
# Node ID 6690ab0a34fdedaebbfe1068dfe7351734e8a1d7
# Parent  4271a7547668e4b02cae88e66c936d9cacab624e
add -numa command line option

diff -r 4271a7547668 -r 6690ab0a34fd sysemu.h
--- a/sysemu.h	Tue Dec 16 12:02:50 2008 +0100
+++ b/sysemu.h	Tue Dec 16 12:13:10 2008 +0100
@@ -92,6 +92,15 @@ extern int alt_grab;
 extern int alt_grab;
 extern int usb_enabled;
 extern int smp_cpus;
+
+#define MAX_NODES 64
+extern int numnumanodes;
+extern uint64_t node_mem[MAX_NODES];
+extern uint64_t node_to_cpus[MAX_NODES];
+
+int parse_numa_args(const char *opt, uint64_t *mems,
+                     uint64_t *cpus, int maxentries, int expect_numnodes);
+
 extern int cursor_hide;
 extern int graphic_rotate;
 extern int no_quit;
diff -r 4271a7547668 -r 6690ab0a34fd vl.c
--- a/vl.c	Tue Dec 16 12:02:50 2008 +0100
+++ b/vl.c	Tue Dec 16 12:13:10 2008 +0100
@@ -222,6 +222,9 @@ int win2k_install_hack = 0;
 #endif
 int usb_enabled = 0;
 int smp_cpus = 1;
+int numnumanodes = 0;
+uint64_t node_mem[MAX_NODES];
+uint64_t node_to_cpus[MAX_NODES];
 const char *vnc_display;
 int acpi_enabled = 1;
 int fd_bootchk = 1;
@@ -3968,6 +3971,10 @@ static void help(int exitcode)
 	   "-daemonize      daemonize QEMU after initializing\n"
 #endif
 	   "-option-rom rom load a file, rom, into the option ROM space\n"
+           "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n"
+           "                create a multi NUMA node guest and optionally pin it to\n"
+           "                to the given host nodes. If mem and cpu are omitted,\n"
+           "                resources are split equally\n"
 #ifdef TARGET_SPARC
            "-prom-env variable=value  set OpenBIOS nvram variables\n"
 #endif
@@ -4065,6 +4072,7 @@ enum {
     QEMU_OPTION_usb,
     QEMU_OPTION_usbdevice,
     QEMU_OPTION_smp,
+    QEMU_OPTION_numa,
     QEMU_OPTION_vnc,
     QEMU_OPTION_no_acpi,
     QEMU_OPTION_curses,
@@ -4171,6 +4179,7 @@ static const QEMUOption qemu_options[] =
     { "win2k-hack", 0, QEMU_OPTION_win2k_hack },
     { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice },
     { "smp", HAS_ARG, QEMU_OPTION_smp },
+    { "numa", HAS_ARG, QEMU_OPTION_numa},
     { "vnc", HAS_ARG, QEMU_OPTION_vnc },
 #ifdef CONFIG_CURSES
     { "curses", 0, QEMU_OPTION_curses },
@@ -4456,6 +4465,94 @@ static void termsig_setup(void)
 }
 
 #endif
+
+#define PARSE_FLAG_BITMASK   1
+#define PARSE_FLAG_SUFFIX    2
+
+static int parse_to_array(const char *arg, uint64_t *array,
+    char delim, int maxentries, int flags)
+{
+    const char *s;
+    char *end;
+    int num = 0;
+    unsigned long long int val,endval;
+
+    for (s = arg; s != NULL && *s != 0; s++) {
+        val = strtoull(s, &end, 10);
+        if (end == s && *s != '*') {
+            num++; continue;
+        }
+        if (num >= maxentries) break;
+        if (*end == delim && (flags & PARSE_FLAG_SUFFIX))
+            val *= 1024 * 1024;
+        switch (*end) {
+            case 'g':
+            case 'G':
+                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	/* fall through */
+            case 'm':
+            case 'M':
+                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	/* fall through */
+            case 'k':
+            case 'K':
+           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
+           	    break;
+            case '*':
+                val = (unsigned long long int)-1;
+                break;
+            case '-':
+           	    if (!(flags & PARSE_FLAG_BITMASK)) break;
+                s = end + 1;
+                endval = strtoull(s, &end, 10);
+           	    val = (1 << (endval + 1)) - (1 << val);
+                break;
+            case 0:
+           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
+           	/* fall through */
+            default:
+           	    if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
+           	    break;
+        }
+        array[num++] = val;
+        if ((s = strchr(end, delim)) == NULL) break;
+    }
+    return num;
+}
+
+int parse_numa_args(const char *opt, uint64_t *mems,
+                     uint64_t *cpus, int maxentries, int expect_numnodes)
+{
+const char *s;
+char *arg, *val, *end, *token;
+int num = 0;
+
+    arg = strdup(opt); token = arg;
+    if (expect_numnodes) {
+        s = strsep(&token, ",");
+        if (s == NULL) {
+            free(arg);
+            return -1;
+        }
+        num = strtol(s, &end, 10);
+        if (s == end) {
+            free(arg);
+            return -1;
+        }
+    }
+    while ((s=strsep(&token, ","))!=NULL) {
+        if ((val = strchr(s, ':'))) {
+            *val++ = 0;
+            if (!strcmp(s, "mem") && mems != NULL) {
+                parse_to_array(val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
+            } else if (!strcmp(s, "cpu") && cpus != NULL) {
+                parse_to_array(val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
+            }
+        }
+    }
+    free(arg);
+    return num;
+}
 
 int main(int argc, char **argv, char **envp)
 {
@@ -4556,6 +4653,11 @@ int main(int argc, char **argv, char **e
     for(i = 1; i < MAX_PARALLEL_PORTS; i++)
         parallel_devices[i] = NULL;
     parallel_device_index = 0;
+
+    for(i = 0; i < MAX_NODES; i++) {
+        node_to_cpus[i] = 0;
+        node_mem[i] = 0;
+    }
 
     usb_devices_index = 0;
 
@@ -5011,6 +5113,20 @@ int main(int argc, char **argv, char **e
                     exit(1);
                 }
                 break;
+            case QEMU_OPTION_numa:
+                if (numnumanodes > 0)
+                    parse_numa_args(optarg, node_mem,
+                        node_to_cpus, MAX_NODES, 0);
+                else
+                    numnumanodes = parse_numa_args(optarg,
+                        node_mem, node_to_cpus, MAX_NODES, 1);
+                numnumanodes = parse_numa_args(optarg,
+                    node_mem, node_to_cpus, MAX_NODES, 1);
+                if (numnumanodes < 0) {
+                    fprintf(stderr, "Invalid number of NUMA nodes\n");
+                    exit(1);
+                }
+                break;
 	    case QEMU_OPTION_vnc:
 		vnc_display = optarg;
 		break;
@@ -5151,6 +5267,24 @@ int main(int argc, char **argv, char **e
            monitor_device = "stdio";
     }
 
+    if (numnumanodes > 0) {
+        int i;
+
+        if (numnumanodes > smp_cpus)
+            numnumanodes = smp_cpus;
+
+        for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break;
+        if (i == numnumanodes) {
+            for (i = 0; i < numnumanodes; i++)
+                node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1);
+        }
+        for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break;
+        if (i == numnumanodes) {
+            for (i = 0; i < smp_cpus; i++)
+                node_to_cpus[i % numnumanodes] |= 1<<i;
+        }
+    }
+
 #ifndef _WIN32
     if (daemonize) {
 	pid_t pid;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH 1/8] v2: introduce -numa command line option
  2008-12-16 14:13 [Qemu-devel] [PATCH 1/8] v2: introduce -numa command line option Andre Przywara
@ 2008-12-16 21:13 ` Anthony Liguori
  2008-12-16 23:31   ` Andre Przywara
  0 siblings, 1 reply; 4+ messages in thread
From: Anthony Liguori @ 2008-12-16 21:13 UTC (permalink / raw)
  To: Andre Przywara; +Cc: qemu-devel, Avi Kivity

Andre Przywara wrote:
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
>
> # HG changeset patch
> # User Andre Przywara <andre.przywara@amd.com>
> # Date 1229425990 -3600
> # Node ID 6690ab0a34fdedaebbfe1068dfe7351734e8a1d7
> # Parent  4271a7547668e4b02cae88e66c936d9cacab624e
> add -numa command line option
>
> diff -r 4271a7547668 -r 6690ab0a34fd sysemu.h
>
> @@ -4065,6 +4072,7 @@ enum {
>      QEMU_OPTION_usb,
>      QEMU_OPTION_usbdevice,
>      QEMU_OPTION_smp,
> +    QEMU_OPTION_numa,
>      QEMU_OPTION_vnc,
>      QEMU_OPTION_no_acpi,
>      QEMU_OPTION_curses,
> @@ -4171,6 +4179,7 @@ static const QEMUOption qemu_options[] =
>      { "win2k-hack", 0, QEMU_OPTION_win2k_hack },
>      { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice },
>      { "smp", HAS_ARG, QEMU_OPTION_smp },
> +    { "numa", HAS_ARG, QEMU_OPTION_numa},
>      { "vnc", HAS_ARG, QEMU_OPTION_vnc },
>  #ifdef CONFIG_CURSES
>      { "curses", 0, QEMU_OPTION_curses },
> @@ -4456,6 +4465,94 @@ static void termsig_setup(void)
>  }
>  
>  #endif
> +
> +#define PARSE_FLAG_BITMASK   1
> +#define PARSE_FLAG_SUFFIX    2
> +
> +static int parse_to_array(const char *arg, uint64_t *array,
> +    char delim, int maxentries, int flags)
> +{
> +    const char *s;
> +    char *end;
> +    int num = 0;
> +    unsigned long long int val,endval;
> +
> +    for (s = arg; s != NULL && *s != 0; s++) {
> +        val = strtoull(s, &end, 10);
> +        if (end == s && *s != '*') {
> +            num++; continue;
> +        }
> +        if (num >= maxentries) break;
> +        if (*end == delim && (flags & PARSE_FLAG_SUFFIX))
> +            val *= 1024 * 1024;
> +        switch (*end) {
> +            case 'g':
> +            case 'G':
> +                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	/* fall through */
> +            case 'm':
> +            case 'M':
> +                if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	/* fall through */
> +            case 'k':
> +            case 'K':
> +           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024;
> +           	    break;
> +            case '*':
> +                val = (unsigned long long int)-1;
> +                break;
> +            case '-':
> +           	    if (!(flags & PARSE_FLAG_BITMASK)) break;
> +                s = end + 1;
> +                endval = strtoull(s, &end, 10);
> +           	    val = (1 << (endval + 1)) - (1 << val);
> +                break;
> +            case 0:
> +           	    if (flags & PARSE_FLAG_SUFFIX) val *= 1024 * 1024;
> +           	/* fall through */
> +            default:
> +           	    if (flags & PARSE_FLAG_BITMASK) val = 1 << val;
> +           	    break;
>   

The fall throughs here are very confusion.  No suffix means G or bitmask 
depending on the context?  The indenting is really messed up in this 
function too.

> +int parse_numa_args(const char *opt, uint64_t *mems,
> +                     uint64_t *cpus, int maxentries, int expect_numnodes)
> +{
> +const char *s;
> +char *arg, *val, *end, *token;
> +int num = 0;
>   

This indenting is messed up.

> +    arg = strdup(opt); token = arg;
> +    if (expect_numnodes) {
> +        s = strsep(&token, ",");
> +        if (s == NULL) {
> +            free(arg);
> +            return -1;
> +        }
> +        num = strtol(s, &end, 10);
> +        if (s == end) {
> +            free(arg);
> +            return -1;
> +        }
> +    }
> +    while ((s=strsep(&token, ","))!=NULL) {
> +        if ((val = strchr(s, ':'))) {
> +            *val++ = 0;
> +            if (!strcmp(s, "mem") && mems != NULL) {
> +                parse_to_array(val, mems, ';', maxentries, PARSE_FLAG_SUFFIX);
> +            } else if (!strcmp(s, "cpu") && cpus != NULL) {
> +                parse_to_array(val, cpus, ';', maxentries, PARSE_FLAG_BITMASK);
> +            }
> +        }
> +    }
> +    free(arg);
> +    return num;
> +}
>  
>  int main(int argc, char **argv, char **envp)
>  {
> @@ -4556,6 +4653,11 @@ int main(int argc, char **argv, char **e
>      for(i = 1; i < MAX_PARALLEL_PORTS; i++)
>          parallel_devices[i] = NULL;
>      parallel_device_index = 0;
> +
> +    for(i = 0; i < MAX_NODES; i++) {
> +        node_to_cpus[i] = 0;
> +        node_mem[i] = 0;
> +    }
>  
>      usb_devices_index = 0;
>  
> @@ -5011,6 +5113,20 @@ int main(int argc, char **argv, char **e
>                      exit(1);
>                  }
>                  break;
> +            case QEMU_OPTION_numa:
> +                if (numnumanodes > 0)
> +                    parse_numa_args(optarg, node_mem,
> +                        node_to_cpus, MAX_NODES, 0);
> +                else
> +                    numnumanodes = parse_numa_args(optarg,
> +                        node_mem, node_to_cpus, MAX_NODES, 1);
> +                numnumanodes = parse_numa_args(optarg,
> +                    node_mem, node_to_cpus, MAX_NODES, 1);
> +                if (numnumanodes < 0) {
> +                    fprintf(stderr, "Invalid number of NUMA nodes\n");
> +                    exit(1);
> +                }
> +                break;
>  	    case QEMU_OPTION_vnc:
>  		vnc_display = optarg;
>  		break;
> @@ -5151,6 +5267,24 @@ int main(int argc, char **argv, char **e
>             monitor_device = "stdio";
>      }
>  
> +    if (numnumanodes > 0) {
> +        int i;
> +
> +        if (numnumanodes > smp_cpus)
> +            numnumanodes = smp_cpus;
> +
> +        for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break;
>   

Please split to multiple lines.

> +        if (i == numnumanodes) {
> +            for (i = 0; i < numnumanodes; i++)
> +                node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1);
> +        }
> +        for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break;
> +        if (i == numnumanodes) {
> +            for (i = 0; i < smp_cpus; i++)
> +                node_to_cpus[i % numnumanodes] |= 1<<i;
>   

The way CPUs are allocate here seems strange?  Each CPU is assigned 
round robin?  Should you have node 0 contain 1..X, node 1 contain X..Y, 
node 2 contain Y..smp_cpus?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH 1/8] v2: introduce -numa command line option
  2008-12-16 21:13 ` [Qemu-devel] " Anthony Liguori
@ 2008-12-16 23:31   ` Andre Przywara
  2008-12-17  0:05     ` Anthony Liguori
  0 siblings, 1 reply; 4+ messages in thread
From: Andre Przywara @ 2008-12-16 23:31 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, Avi Kivity

Anthony Liguori wrote:
>> +        if (i == numnumanodes) {
>> +            for (i = 0; i < smp_cpus; i++)
>> +                node_to_cpus[i % numnumanodes] |= 1<<i;
>>   
> 
> The way CPUs are allocate here seems strange?  Each CPU is assigned 
> round robin?  Should you have node 0 contain 1..X, node 1 contain X..Y, 
> node 2 contain Y..smp_cpus?
This is because an algorithm to do it this way is more complicated than 
this round-robin scheme (imagine distributing 8 CPUs over 3 nodes). 
Actually I know machines which do this natively (probably for the same 
reason, code simplicity in the BIOS). OSes can cope with my variant too, 
there is no guarantee that sequentially numbered CPUs belong to one node.
But if you insist, I fix it.

Regards,
Andre.

-- 
Andre Przywara
AMD-OSRC (Dresden)
Tel: x84917

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] Re: [PATCH 1/8] v2: introduce -numa command line option
  2008-12-16 23:31   ` Andre Przywara
@ 2008-12-17  0:05     ` Anthony Liguori
  0 siblings, 0 replies; 4+ messages in thread
From: Anthony Liguori @ 2008-12-17  0:05 UTC (permalink / raw)
  To: Andre Przywara; +Cc: qemu-devel, Avi Kivity

Andre Przywara wrote:
> Anthony Liguori wrote:
>>> +        if (i == numnumanodes) {
>>> +            for (i = 0; i < smp_cpus; i++)
>>> +                node_to_cpus[i % numnumanodes] |= 1<<i;
>>>   
>>
>> The way CPUs are allocate here seems strange?  Each CPU is assigned 
>> round robin?  Should you have node 0 contain 1..X, node 1 contain 
>> X..Y, node 2 contain Y..smp_cpus?
> This is because an algorithm to do it this way is more complicated 
> than this round-robin scheme (imagine distributing 8 CPUs over 3 
> nodes). Actually I know machines which do this natively (probably for 
> the same reason, code simplicity in the BIOS). OSes can cope with my 
> variant too, there is no guarantee that sequentially numbered CPUs 
> belong to one node.
> But if you insist, I fix it.

It just surprised me.  If there's a reason you picked this, that would 
be a good reason to stick in a comment :-)

Regards,

Anthony Liguori

>
> Regards,
> Andre.
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2008-12-17  0:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-16 14:13 [Qemu-devel] [PATCH 1/8] v2: introduce -numa command line option Andre Przywara
2008-12-16 21:13 ` [Qemu-devel] " Anthony Liguori
2008-12-16 23:31   ` Andre Przywara
2008-12-17  0:05     ` Anthony Liguori

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.