All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
@ 2015-04-13 16:09 Don Slutz
  2015-04-13 16:20 ` Wei Liu
                   ` (3 more replies)
  0 siblings, 4 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-13 16:09 UTC (permalink / raw)
  To: xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, Shriram Rajagopalan, Yang Hongyang

If QEMU has called on xc_domain_setmaxmem to add more memory for
option ROMs, domain restore needs to also increase the memory.

Signed-off-by: Don Slutz <dslutz@verizon.com>
---
To see the hvmloader loader issue:

  xl cre -p e1000x8.xfg
  xl save e1000x8 e1000x8.save
  xl restore e1000x8.save

With e1000x8.xfg:
-------------------------------------------------------------------------------
builder = "hvm"
device_model_args_hvm = [
 "-monitor",
 "pty",
 "-boot",
 "menu=on",
]
device_model_version = "qemu-xen"
disk = [
 "/dev/etherd/e500.1,,xvda",
 "/dev/etherd/e500.2,,xvdb",
 "/dev/etherd/e500.3,,xvde",
 "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD1.iso,,hdc,cdrom",
 "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD2.iso,,hdd,cdrom",
]
maxmem = "8192"
memory = "8192"
name = "e1000x8"
serial = "pty"
tsc_mode = "native_paravirt"
uuid = "e5000105-3d83-c962-07ae-2bc46c3644a0"
videoram = "16"
vif = [
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:a0",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:aa",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:b4",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:be",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:c8",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:d2",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:dc",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:e6",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:f0",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:fa",
]
viridian = 0
xen_platform_pci = 1
mmio_hole = 2048
vcpus = 1
maxvcpus = 6
on_poweroff = "preserve"
on_reboot = "preserve"
on_watchdog = "preserve"
on_crash = "preserve"
-------------------------------------------------------------------------------


 tools/libxc/xc_domain_restore.c | 75 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 2ab9f46..28b4fa6 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -47,6 +47,13 @@
 #include <xen/hvm/ioreq.h>
 #include <xen/hvm/params.h>
 
+/* Leave some slack so that hvmloader does not complain about lack of
+ * memory at boot time ("Could not allocate order=0 extent").
+ * Once hvmloader is modified to cope with that situation without
+ * printing warning messages, QEMU_SPARE_PAGES can be removed.
+ */
+#define QEMU_SPARE_PAGES 16
+
 struct restore_ctx {
     unsigned long max_mfn; /* max mfn of the current host machine */
     unsigned long hvirt_start; /* virtual starting address of the hypervisor */
@@ -209,12 +216,44 @@ static int uncanonicalize_pagetable(
         if (!ctx->hvm && ctx->superpages)
             rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
         else
+        {
+            xc_domaininfo_t info;
+            unsigned long free_pages;
+
+            if ((xc_domain_getinfolist(xch, dom, 1, &info) != 1) ||
+                (info.domain != dom)) {
+                ERROR("Failed xc_domain_getinfolist for batch (uncanonicalize_pagetable)\n");
+                errno = ENOMEM;
+                return 0;
+            }
+            free_pages = info.max_pages - info.tot_pages;
+            if (free_pages > QEMU_SPARE_PAGES) {
+                free_pages -= QEMU_SPARE_PAGES;
+            } else {
+                free_pages = 0;
+            }
+            if (free_pages < nr_mfns)
+            {
+                DPRINTF("Adjust memory for batch (uncanonicalize_pagetable): free_pages=%lu nr_mfns=%d max_pages=%lu tot_pages=%lu max_mfn=%lu\n",
+                        free_pages, nr_mfns, (unsigned long)info.max_pages,
+                        (unsigned long)info.tot_pages, ctx->max_mfn);
+                if (xc_domain_setmaxmem(xch, dom,
+                                        ((info.max_pages + nr_mfns - free_pages)
+                                         << (XC_PAGE_SHIFT - 10))) < 0)
+                {
+                    ERROR("Failed xc_domain_setmaxmem for batch (uncanonicalize_pagetable)\n");
+                    errno = ENOMEM;
+                    return 0;
+                }
+            }
             rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
                                                   ctx->p2m_batch);
+        }
 
         if (rc)
         {
-            ERROR("Failed to allocate memory for batch.!\n");
+            ERROR("Failed to allocate memory for batch. rc=%d nr_mfns=%d!\n",
+                  rc, nr_mfns);
             errno = ENOMEM;
             return 0;
         }
@@ -1241,12 +1280,44 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx,
         if (!ctx->hvm && ctx->superpages)
             rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
         else
+        {
+            xc_domaininfo_t info;
+            unsigned long free_pages;
+
+            if ((xc_domain_getinfolist(xch, dom, 1, &info) != 1) ||
+                (info.domain != dom)) {
+                ERROR("Failed xc_domain_getinfolist for apply_batch\n");
+                errno = ENOMEM;
+                return -1;
+            }
+            free_pages = info.max_pages - info.tot_pages;
+            if (free_pages > QEMU_SPARE_PAGES) {
+                free_pages -= QEMU_SPARE_PAGES;
+            } else {
+                free_pages = 0;
+            }
+            if (free_pages < nr_mfns)
+            {
+                DPRINTF("Adjust memory for apply_batch: free_pages=%lu nr_mfns=%d max_pages=%lu tot_pages=%lu max_mfn=%lu\n",
+                        free_pages, nr_mfns, (unsigned long)info.max_pages,
+                        (unsigned long)info.tot_pages, ctx->max_mfn);
+                if (xc_domain_setmaxmem(xch, dom,
+                                        ((info.max_pages + nr_mfns - free_pages)
+                                         << (XC_PAGE_SHIFT - 10))) < 0)
+                {
+                    ERROR("Failed xc_domain_setmaxmem for apply_batch\n");
+                    errno = ENOMEM;
+                    return -1;
+                }
+            }
             rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
                                                   ctx->p2m_batch);
+        }
 
         if (rc)
         {
-            ERROR("Failed to allocate memory for batch.!\n"); 
+            ERROR("Failed to allocate memory for apply_batch. rc=%d nr_mfns=%d max_mfn=%lu!\n",
+                  rc, nr_mfns, ctx->max_mfn);
             errno = ENOMEM;
             return -1;
         }
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:09 [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Don Slutz
@ 2015-04-13 16:20 ` Wei Liu
  2015-04-13 23:51   ` Don Slutz
  2015-04-13 16:25 ` Andrew Cooper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 47+ messages in thread
From: Wei Liu @ 2015-04-13 16:20 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	xen-devel, Shriram Rajagopalan, Yang Hongyang

On Mon, Apr 13, 2015 at 12:09:13PM -0400, Don Slutz wrote:
> If QEMU has called on xc_domain_setmaxmem to add more memory for
> option ROMs, domain restore needs to also increase the memory.
> 
> Signed-off-by: Don Slutz <dslutz@verizon.com>
> ---
> To see the hvmloader loader issue:
> 
>   xl cre -p e1000x8.xfg
>   xl save e1000x8 e1000x8.save
>   xl restore e1000x8.save
> 
> With e1000x8.xfg:
> -------------------------------------------------------------------------------
> builder = "hvm"
> device_model_args_hvm = [
>  "-monitor",
>  "pty",
>  "-boot",
>  "menu=on",
> ]
> device_model_version = "qemu-xen"
> disk = [
>  "/dev/etherd/e500.1,,xvda",
>  "/dev/etherd/e500.2,,xvdb",
>  "/dev/etherd/e500.3,,xvde",
>  "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD1.iso,,hdc,cdrom",
>  "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD2.iso,,hdd,cdrom",
> ]
> maxmem = "8192"
> memory = "8192"
> name = "e1000x8"
> serial = "pty"
> tsc_mode = "native_paravirt"
> uuid = "e5000105-3d83-c962-07ae-2bc46c3644a0"
> videoram = "16"
> vif = [
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:a0",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:aa",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:b4",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:be",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:c8",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:d2",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:dc",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:e6",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:f0",
>  "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:fa",
> ]
> viridian = 0
> xen_platform_pci = 1
> mmio_hole = 2048
> vcpus = 1
> maxvcpus = 6
> on_poweroff = "preserve"
> on_reboot = "preserve"
> on_watchdog = "preserve"
> on_crash = "preserve"
> -------------------------------------------------------------------------------
> 
> 
>  tools/libxc/xc_domain_restore.c | 75 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
> index 2ab9f46..28b4fa6 100644
> --- a/tools/libxc/xc_domain_restore.c
> +++ b/tools/libxc/xc_domain_restore.c
> @@ -47,6 +47,13 @@
>  #include <xen/hvm/ioreq.h>
>  #include <xen/hvm/params.h>
>  
> +/* Leave some slack so that hvmloader does not complain about lack of
> + * memory at boot time ("Could not allocate order=0 extent").
> + * Once hvmloader is modified to cope with that situation without
> + * printing warning messages, QEMU_SPARE_PAGES can be removed.

Yes, please properly modify hvmloader to do this instead of trying to
workaround this over and over again.

But didn't Stefano make some changes to libxl too to handle the extra
ram needed?

> + */
> +#define QEMU_SPARE_PAGES 16
> +

This is still arbitrary and prone to breakage.

>  struct restore_ctx {
>      unsigned long max_mfn; /* max mfn of the current host machine */
>      unsigned long hvirt_start; /* virtual starting address of the hypervisor */
> @@ -209,12 +216,44 @@ static int uncanonicalize_pagetable(
>          if (!ctx->hvm && ctx->superpages)
>              rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
>          else
> +        {
> +            xc_domaininfo_t info;
> +            unsigned long free_pages;
> +
> +            if ((xc_domain_getinfolist(xch, dom, 1, &info) != 1) ||
> +                (info.domain != dom)) {
> +                ERROR("Failed xc_domain_getinfolist for batch (uncanonicalize_pagetable)\n");
> +                errno = ENOMEM;
> +                return 0;
> +            }
> +            free_pages = info.max_pages - info.tot_pages;
> +            if (free_pages > QEMU_SPARE_PAGES) {
> +                free_pages -= QEMU_SPARE_PAGES;
> +            } else {
> +                free_pages = 0;
> +            }
> +            if (free_pages < nr_mfns)
> +            {
> +                DPRINTF("Adjust memory for batch (uncanonicalize_pagetable): free_pages=%lu nr_mfns=%d max_pages=%lu tot_pages=%lu max_mfn=%lu\n",
> +                        free_pages, nr_mfns, (unsigned long)info.max_pages,
> +                        (unsigned long)info.tot_pages, ctx->max_mfn);
> +                if (xc_domain_setmaxmem(xch, dom,
> +                                        ((info.max_pages + nr_mfns - free_pages)
> +                                         << (XC_PAGE_SHIFT - 10))) < 0)
> +                {
> +                    ERROR("Failed xc_domain_setmaxmem for batch (uncanonicalize_pagetable)\n");
> +                    errno = ENOMEM;
> +                    return 0;
> +                }
> +            }

This is not maintainable. Trying to do smart things inside libxc
without libxl knowing it is prone to breakage.

Wei.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:09 [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Don Slutz
  2015-04-13 16:20 ` Wei Liu
@ 2015-04-13 16:25 ` Andrew Cooper
  2015-04-13 23:51   ` Don Slutz
  2015-04-14  3:46 ` Hongyang Yang
  2015-04-15 14:30 ` Ian Campbell
  3 siblings, 1 reply; 47+ messages in thread
From: Andrew Cooper @ 2015-04-13 16:25 UTC (permalink / raw)
  To: Don Slutz, xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Shriram Rajagopalan, Yang Hongyang

On 13/04/15 17:09, Don Slutz wrote:
> If QEMU has called on xc_domain_setmaxmem to add more memory for
> option ROMs, domain restore needs to also increase the memory.
>
> Signed-off-by: Don Slutz <dslutz@verizon.com>

hvmloader has no interaction with xc_domain_restore().

It is xl's job to propagate the current memory as part of migration.  If
qemu has had to bump it up, this should be reflected in the domain
config file.

The migration path should not be hacked up to fix a higher level
toolstack bug.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:25 ` Andrew Cooper
@ 2015-04-13 23:51   ` Don Slutz
  2015-04-14  8:53     ` Wei Liu
  2015-04-14  8:53     ` Andrew Cooper
  0 siblings, 2 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-13 23:51 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Shriram Rajagopalan, Yang Hongyang

On 04/13/15 12:25, Andrew Cooper wrote:
> On 13/04/15 17:09, Don Slutz wrote:
>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>> option ROMs, domain restore needs to also increase the memory.
>>
>> Signed-off-by: Don Slutz <dslutz@verizon.com>
> hvmloader has no interaction with xc_domain_restore().

I did not mean to imply that it did.  Somehow I lost the fact that this
is a bug in master:

[root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
Parsing config from /home/don/e1000x8.xfg
got a tsc mode string: "native_paravirt"
[root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481  100%
[root@dcs-xen-52 don]# xl restore e1000x8.save                          
Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
 Savefile contains xl domain config in JSON format
Parsing config from <saved>
xc: error: Failed to allocate memory for batch.!: Internal error
libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
restoring domain: Cannot allocate memory
libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
(re-)build domain: -3
libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
guest with domid 2
libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
destroy domain 2 following failed creation
[root@dcs-xen-52 don]#                         

The hvmloader part turns out to be a "warning message" that is ok to
ignore.  However "xl save" followed by "xl restore" is currently broken
without some fix.

> It is xl's job to propagate the current memory as part of migration.  If
> qemu has had to bump it up, this should be reflected in the domain
> config file.

Not at all sure how QEMU (either in dom0 or a driver domain) is expected
to change a file (the domain config file).  My guess is that you are
saying that "xl save" (before xc_domain_save) is the correct place to
record (or add extra info).   However it looks to me that all the data
needed is in the save file, but
I could be wrong.

The page data is correctly saved into the save file (migration stream). 
However when
the new domain is created, it's size is "wrong".  You cannot adjust any
of the config info to fix the size, because the option ROM space to
reserve is not an existing config option.   So if I am following you
correctly, libxl needs to add and process more info to handle this case.

> The migration path should not be hacked up to fix a higher level
> toolstack bug.

I do not see how QEMU is part of the "higher level toolstack".  If you
mean xl vs xc, then
I can see "xl save" some how doing the work.  This patch works for me,
but I am happy to
explore other ways to fix this bug.

   -Don Slutz

> ~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:20 ` Wei Liu
@ 2015-04-13 23:51   ` Don Slutz
  0 siblings, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-13 23:51 UTC (permalink / raw)
  To: Wei Liu
  Cc: Ian Campbell, Stefano Stabellini, Ian Jackson, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On 04/13/15 12:20, Wei Liu wrote:
> On Mon, Apr 13, 2015 at 12:09:13PM -0400, Don Slutz wrote:
>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>> option ROMs, domain restore needs to also increase the memory.
>>
>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>> ---
>> To see the hvmloader loader issue:
>>
...
>>  
>> +/* Leave some slack so that hvmloader does not complain about lack of
>> + * memory at boot time ("Could not allocate order=0 extent").
>> + * Once hvmloader is modified to cope with that situation without
>> + * printing warning messages, QEMU_SPARE_PAGES can be removed.
> Yes, please properly modify hvmloader to do this instead of trying to
> workaround this over and over again.

At some point I might get to this.

> But didn't Stefano make some changes to libxl too to handle the extra
> ram needed?

Not that I have seen.

>> + */
>> +#define QEMU_SPARE_PAGES 16
>> +
> This is still arbitrary and prone to breakage.

Yes, but turns out not to be the important part.

>
>>  struct restore_ctx {
>>      unsigned long max_mfn; /* max mfn of the current host machine */
>>      unsigned long hvirt_start; /* virtual starting address of the hypervisor */
>> @@ -209,12 +216,44 @@ static int uncanonicalize_pagetable(
>>          if (!ctx->hvm && ctx->superpages)
>>              rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
>>          else
>> +        {
...
>> +            }
> This is not maintainable. Trying to do smart things inside libxc
> without libxl knowing it is prone to breakage.

Some how I forgot to include that this is a bug in master
(4.6-unstable).  Much more info in
my reply to Andrew Cooper (short form below).


[root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
Parsing config from /home/don/e1000x8.xfg
got a tsc mode string: "native_paravirt"
[root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481  100%
[root@dcs-xen-52 don]# xl restore e1000x8.save                          
Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
 Savefile contains xl domain config in JSON format
Parsing config from <saved>
xc: error: Failed to allocate memory for batch.!: Internal error
libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
restoring domain: Cannot allocate memory
libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
(re-)build domain: -3
libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
guest with domid 2
libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
destroy domain 2 following failed creation
[root@dcs-xen-52 don]#                         


   -Don Slutz


> Wei.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:09 [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Don Slutz
  2015-04-13 16:20 ` Wei Liu
  2015-04-13 16:25 ` Andrew Cooper
@ 2015-04-14  3:46 ` Hongyang Yang
  2015-04-14  8:46   ` Wei Liu
  2015-04-15 14:30 ` Ian Campbell
  3 siblings, 1 reply; 47+ messages in thread
From: Hongyang Yang @ 2015-04-14  3:46 UTC (permalink / raw)
  To: Don Slutz, xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Shriram Rajagopalan

This patch also fix a triple fault when guests running under COLO mode.
(XEN) d0v1 Over-allocation for domain 1: 524545 > 524544
(XEN) memory.c:155:d0v1 Could not allocate order=0 extent: id=1 memflags=0 (181 
of 235)
(XEN) d1v1 Triple fault - invoking HVM shutdown action 1

On 04/14/2015 12:09 AM, Don Slutz wrote:
> If QEMU has called on xc_domain_setmaxmem to add more memory for
> option ROMs, domain restore needs to also increase the memory.
>
> Signed-off-by: Don Slutz <dslutz@verizon.com>
> ---
> To see the hvmloader loader issue:
>
>    xl cre -p e1000x8.xfg
>    xl save e1000x8 e1000x8.save
>    xl restore e1000x8.save
>
> With e1000x8.xfg:
> -------------------------------------------------------------------------------
> builder = "hvm"
> device_model_args_hvm = [
>   "-monitor",
>   "pty",
>   "-boot",
>   "menu=on",
> ]
> device_model_version = "qemu-xen"
> disk = [
>   "/dev/etherd/e500.1,,xvda",
>   "/dev/etherd/e500.2,,xvdb",
>   "/dev/etherd/e500.3,,xvde",
>   "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD1.iso,,hdc,cdrom",
>   "/local-isos/iso/centos/x86_64/CentOS-6.3-x86_64-bin-DVD2.iso,,hdd,cdrom",
> ]
> maxmem = "8192"
> memory = "8192"
> name = "e1000x8"
> serial = "pty"
> tsc_mode = "native_paravirt"
> uuid = "e5000105-3d83-c962-07ae-2bc46c3644a0"
> videoram = "16"
> vif = [
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:a0",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:aa",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:b4",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:be",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:c8",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:d2",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:dc",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:e6",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:f0",
>   "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:fa",
> ]
> viridian = 0
> xen_platform_pci = 1
> mmio_hole = 2048
> vcpus = 1
> maxvcpus = 6
> on_poweroff = "preserve"
> on_reboot = "preserve"
> on_watchdog = "preserve"
> on_crash = "preserve"
> -------------------------------------------------------------------------------
>
>
>   tools/libxc/xc_domain_restore.c | 75 +++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
> index 2ab9f46..28b4fa6 100644
> --- a/tools/libxc/xc_domain_restore.c
> +++ b/tools/libxc/xc_domain_restore.c
> @@ -47,6 +47,13 @@
>   #include <xen/hvm/ioreq.h>
>   #include <xen/hvm/params.h>
>
> +/* Leave some slack so that hvmloader does not complain about lack of
> + * memory at boot time ("Could not allocate order=0 extent").
> + * Once hvmloader is modified to cope with that situation without
> + * printing warning messages, QEMU_SPARE_PAGES can be removed.
> + */
> +#define QEMU_SPARE_PAGES 16
> +
>   struct restore_ctx {
>       unsigned long max_mfn; /* max mfn of the current host machine */
>       unsigned long hvirt_start; /* virtual starting address of the hypervisor */
> @@ -209,12 +216,44 @@ static int uncanonicalize_pagetable(
>           if (!ctx->hvm && ctx->superpages)
>               rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
>           else
> +        {
> +            xc_domaininfo_t info;
> +            unsigned long free_pages;
> +
> +            if ((xc_domain_getinfolist(xch, dom, 1, &info) != 1) ||
> +                (info.domain != dom)) {
> +                ERROR("Failed xc_domain_getinfolist for batch (uncanonicalize_pagetable)\n");
> +                errno = ENOMEM;
> +                return 0;
> +            }
> +            free_pages = info.max_pages - info.tot_pages;
> +            if (free_pages > QEMU_SPARE_PAGES) {
> +                free_pages -= QEMU_SPARE_PAGES;
> +            } else {
> +                free_pages = 0;
> +            }
> +            if (free_pages < nr_mfns)
> +            {
> +                DPRINTF("Adjust memory for batch (uncanonicalize_pagetable): free_pages=%lu nr_mfns=%d max_pages=%lu tot_pages=%lu max_mfn=%lu\n",
> +                        free_pages, nr_mfns, (unsigned long)info.max_pages,
> +                        (unsigned long)info.tot_pages, ctx->max_mfn);
> +                if (xc_domain_setmaxmem(xch, dom,
> +                                        ((info.max_pages + nr_mfns - free_pages)
> +                                         << (XC_PAGE_SHIFT - 10))) < 0)
> +                {
> +                    ERROR("Failed xc_domain_setmaxmem for batch (uncanonicalize_pagetable)\n");
> +                    errno = ENOMEM;
> +                    return 0;
> +                }
> +            }
>               rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
>                                                     ctx->p2m_batch);
> +        }
>
>           if (rc)
>           {
> -            ERROR("Failed to allocate memory for batch.!\n");
> +            ERROR("Failed to allocate memory for batch. rc=%d nr_mfns=%d!\n",
> +                  rc, nr_mfns);
>               errno = ENOMEM;
>               return 0;
>           }
> @@ -1241,12 +1280,44 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx,
>           if (!ctx->hvm && ctx->superpages)
>               rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
>           else
> +        {
> +            xc_domaininfo_t info;
> +            unsigned long free_pages;
> +
> +            if ((xc_domain_getinfolist(xch, dom, 1, &info) != 1) ||
> +                (info.domain != dom)) {
> +                ERROR("Failed xc_domain_getinfolist for apply_batch\n");
> +                errno = ENOMEM;
> +                return -1;
> +            }
> +            free_pages = info.max_pages - info.tot_pages;
> +            if (free_pages > QEMU_SPARE_PAGES) {
> +                free_pages -= QEMU_SPARE_PAGES;
> +            } else {
> +                free_pages = 0;
> +            }
> +            if (free_pages < nr_mfns)
> +            {
> +                DPRINTF("Adjust memory for apply_batch: free_pages=%lu nr_mfns=%d max_pages=%lu tot_pages=%lu max_mfn=%lu\n",
> +                        free_pages, nr_mfns, (unsigned long)info.max_pages,
> +                        (unsigned long)info.tot_pages, ctx->max_mfn);
> +                if (xc_domain_setmaxmem(xch, dom,
> +                                        ((info.max_pages + nr_mfns - free_pages)
> +                                         << (XC_PAGE_SHIFT - 10))) < 0)
> +                {
> +                    ERROR("Failed xc_domain_setmaxmem for apply_batch\n");
> +                    errno = ENOMEM;
> +                    return -1;
> +                }
> +            }
>               rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
>                                                     ctx->p2m_batch);
> +        }
>
>           if (rc)
>           {
> -            ERROR("Failed to allocate memory for batch.!\n");
> +            ERROR("Failed to allocate memory for apply_batch. rc=%d nr_mfns=%d max_mfn=%lu!\n",
> +                  rc, nr_mfns, ctx->max_mfn);
>               errno = ENOMEM;
>               return -1;
>           }
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  3:46 ` Hongyang Yang
@ 2015-04-14  8:46   ` Wei Liu
  0 siblings, 0 replies; 47+ messages in thread
From: Wei Liu @ 2015-04-14  8:46 UTC (permalink / raw)
  To: Hongyang Yang
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan

On Tue, Apr 14, 2015 at 11:46:55AM +0800, Hongyang Yang wrote:
> This patch also fix a triple fault when guests running under COLO mode.
> (XEN) d0v1 Over-allocation for domain 1: 524545 > 524544
> (XEN) memory.c:155:d0v1 Could not allocate order=0 extent: id=1 memflags=0
> (181 of 235)
> (XEN) d1v1 Triple fault - invoking HVM shutdown action 1
> 

Presumably this is due to accounting error or some other latent bug.

This patch bumps the limit of max memory, which covers the real cause of
your issue. This is prone to breakage in the future.

Wei.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 23:51   ` Don Slutz
@ 2015-04-14  8:53     ` Wei Liu
  2015-04-14 17:34       ` Don Slutz
  2015-04-14  8:53     ` Andrew Cooper
  1 sibling, 1 reply; 47+ messages in thread
From: Wei Liu @ 2015-04-14  8:53 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Mon, Apr 13, 2015 at 07:51:31PM -0400, Don Slutz wrote:
> On 04/13/15 12:25, Andrew Cooper wrote:
> > On 13/04/15 17:09, Don Slutz wrote:
> >> If QEMU has called on xc_domain_setmaxmem to add more memory for
> >> option ROMs, domain restore needs to also increase the memory.
> >>
> >> Signed-off-by: Don Slutz <dslutz@verizon.com>
> > hvmloader has no interaction with xc_domain_restore().
> 
> I did not mean to imply that it did.  Somehow I lost the fact that this
> is a bug in master:
> 
> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
> Parsing config from /home/don/e1000x8.xfg
> got a tsc mode string: "native_paravirt"
> [root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
> Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
> xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481  100%
> [root@dcs-xen-52 don]# xl restore e1000x8.save                          
> Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
>  Savefile contains xl domain config in JSON format
> Parsing config from <saved>
> xc: error: Failed to allocate memory for batch.!: Internal error
> libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
> restoring domain: Cannot allocate memory
> libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
> (re-)build domain: -3
> libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
> libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
> guest with domid 2
> libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
> destroy domain 2 following failed creation
> [root@dcs-xen-52 don]#                         
> 
> The hvmloader part turns out to be a "warning message" that is ok to
> ignore.  However "xl save" followed by "xl restore" is currently broken
> without some fix.
> 
> > It is xl's job to propagate the current memory as part of migration.  If
> > qemu has had to bump it up, this should be reflected in the domain
> > config file.
> 
> Not at all sure how QEMU (either in dom0 or a driver domain) is expected
> to change a file (the domain config file).  My guess is that you are
> saying that "xl save" (before xc_domain_save) is the correct place to
> record (or add extra info).   However it looks to me that all the data
> needed is in the save file, but
> I could be wrong.
> 
> The page data is correctly saved into the save file (migration stream). 
> However when
> the new domain is created, it's size is "wrong".  You cannot adjust any
> of the config info to fix the size, because the option ROM space to
> reserve is not an existing config option.   So if I am following you
> correctly, libxl needs to add and process more info to handle this case.
> 

Thanks for the analysis. I think what you need to do is to adjust the
memory size during domain creation in libxl, then the relevant data is
saved at creation time. It should not be modified during restore.

There is already various adjustments to memory related values in libxl.
Grep video_memkb and mex_memkb in libxl.

Is there a way to know how much more memory each option rom needs? If
so, you can correctly account for the extra memory you need. This would
be an ideal fix to this problem.

Wei.

> > The migration path should not be hacked up to fix a higher level
> > toolstack bug.
> 
> I do not see how QEMU is part of the "higher level toolstack".  If you
> mean xl vs xc, then
> I can see "xl save" some how doing the work.  This patch works for me,
> but I am happy to
> explore other ways to fix this bug.
> 
>    -Don Slutz
> 
> > ~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 23:51   ` Don Slutz
  2015-04-14  8:53     ` Wei Liu
@ 2015-04-14  8:53     ` Andrew Cooper
  2015-04-14  9:22       ` Hongyang Yang
  1 sibling, 1 reply; 47+ messages in thread
From: Andrew Cooper @ 2015-04-14  8:53 UTC (permalink / raw)
  To: Don Slutz, xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Shriram Rajagopalan, Yang Hongyang

On 14/04/15 00:51, Don Slutz wrote:
> On 04/13/15 12:25, Andrew Cooper wrote:
>> On 13/04/15 17:09, Don Slutz wrote:
>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>>> option ROMs, domain restore needs to also increase the memory.
>>>
>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>> hvmloader has no interaction with xc_domain_restore().
> I did not mean to imply that it did.  Somehow I lost the fact that this
> is a bug in master:
>
> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
> Parsing config from /home/don/e1000x8.xfg
> got a tsc mode string: "native_paravirt"
> [root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
> Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
> xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481  100%
> [root@dcs-xen-52 don]# xl restore e1000x8.save                          
> Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
>  Savefile contains xl domain config in JSON format
> Parsing config from <saved>
> xc: error: Failed to allocate memory for batch.!: Internal error
> libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
> restoring domain: Cannot allocate memory
> libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
> (re-)build domain: -3
> libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
> libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
> guest with domid 2
> libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
> destroy domain 2 following failed creation
> [root@dcs-xen-52 don]#                         
>
> The hvmloader part turns out to be a "warning message" that is ok to
> ignore.  However "xl save" followed by "xl restore" is currently broken
> without some fix.
>
>> It is xl's job to propagate the current memory as part of migration.  If
>> qemu has had to bump it up, this should be reflected in the domain
>> config file.
> Not at all sure how QEMU (either in dom0 or a driver domain) is expected
> to change a file (the domain config file).  My guess is that you are
> saying that "xl save" (before xc_domain_save) is the correct place to
> record (or add extra info).   However it looks to me that all the data
> needed is in the save file, but
> I could be wrong.
>
> The page data is correctly saved into the save file (migration stream). 
> However when
> the new domain is created, it's size is "wrong".  You cannot adjust any
> of the config info to fix the size, because the option ROM space to
> reserve is not an existing config option.   So if I am following you
> correctly, libxl needs to add and process more info to handle this case.
>
>> The migration path should not be hacked up to fix a higher level
>> toolstack bug.
> I do not see how QEMU is part of the "higher level toolstack".  If you
> mean xl vs xc, then
> I can see "xl save" some how doing the work.  This patch works for me,
> but I am happy to
> explore other ways to fix this bug.

If I understand correctly, the steps are this:

* 'xl create' makes a VM of size $FOO
* qemu bumps the size to $FOO+$N
* 'xl save' writes $FOO+$N of page data, but the xl config file at the
start of the image still says $FOO
* 'xl restore' creates a vm of size $FOO, then instructs
xc_domain_restore() to put $FOO+$N pages into it.

I would argue first, that qemu should not play in this area to start with.

However, the real bug here is that the domain configuration written by
xl save is inaccurate.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  8:53     ` Andrew Cooper
@ 2015-04-14  9:22       ` Hongyang Yang
  2015-04-14  9:28         ` Andrew Cooper
  2015-04-14  9:29         ` Wei Liu
  0 siblings, 2 replies; 47+ messages in thread
From: Hongyang Yang @ 2015-04-14  9:22 UTC (permalink / raw)
  To: Andrew Cooper, Don Slutz, xen-devel
  Cc: Shriram Rajagopalan, Ian Jackson, Wei Liu, Ian Campbell,
	Stefano Stabellini



On 04/14/2015 04:53 PM, Andrew Cooper wrote:
> On 14/04/15 00:51, Don Slutz wrote:
>> On 04/13/15 12:25, Andrew Cooper wrote:
>>> On 13/04/15 17:09, Don Slutz wrote:
>>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>>>> option ROMs, domain restore needs to also increase the memory.
>>>>
>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>> hvmloader has no interaction with xc_domain_restore().
>> I did not mean to imply that it did.  Somehow I lost the fact that this
>> is a bug in master:
>>
>> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
>> Parsing config from /home/don/e1000x8.xfg
>> got a tsc mode string: "native_paravirt"
>> [root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
>> Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
>> xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481  100%
>> [root@dcs-xen-52 don]# xl restore e1000x8.save
>> Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
>>   Savefile contains xl domain config in JSON format
>> Parsing config from <saved>
>> xc: error: Failed to allocate memory for batch.!: Internal error
>> libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
>> restoring domain: Cannot allocate memory
>> libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
>> (re-)build domain: -3
>> libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
>> libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
>> guest with domid 2
>> libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
>> destroy domain 2 following failed creation
>> [root@dcs-xen-52 don]#
>>
>> The hvmloader part turns out to be a "warning message" that is ok to
>> ignore.  However "xl save" followed by "xl restore" is currently broken
>> without some fix.
>>
>>> It is xl's job to propagate the current memory as part of migration.  If
>>> qemu has had to bump it up, this should be reflected in the domain
>>> config file.
>> Not at all sure how QEMU (either in dom0 or a driver domain) is expected
>> to change a file (the domain config file).  My guess is that you are
>> saying that "xl save" (before xc_domain_save) is the correct place to
>> record (or add extra info).   However it looks to me that all the data
>> needed is in the save file, but
>> I could be wrong.
>>
>> The page data is correctly saved into the save file (migration stream).
>> However when
>> the new domain is created, it's size is "wrong".  You cannot adjust any
>> of the config info to fix the size, because the option ROM space to
>> reserve is not an existing config option.   So if I am following you
>> correctly, libxl needs to add and process more info to handle this case.
>>
>>> The migration path should not be hacked up to fix a higher level
>>> toolstack bug.
>> I do not see how QEMU is part of the "higher level toolstack".  If you
>> mean xl vs xc, then
>> I can see "xl save" some how doing the work.  This patch works for me,
>> but I am happy to
>> explore other ways to fix this bug.
>
> If I understand correctly, the steps are this:
>
> * 'xl create' makes a VM of size $FOO
> * qemu bumps the size to $FOO+$N
> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
> start of the image still says $FOO
> * 'xl restore' creates a vm of size $FOO, then instructs
> xc_domain_restore() to put $FOO+$N pages into it.
>
> I would argue first, that qemu should not play in this area to start with.
>
> However, the real bug here is that the domain configuration written by
> xl save is inaccurate.

There's a case like COLO:
1. Both Primary/Secondary VM are created first with the same config file
    which makes a VM of size $FOO
2. qemu bumps the size to $FOO+$N
3. 'save' writes $FOO+$N of page data
4. 'restore' put $FOO+$N pages into $FOO pages which will cause error

Even if you fix the configuration, the bug still exists because the config
only been transferred from Primary to Secondary once at the very beginning
when you execute 'xl remus' command. The problem is how to let the secondary
(restore) side knows the size change? Through a migration command(which is
easier in v2 migration) or some other solution?
Form this point of view, I think Don's solution is one way to solve the
problem.

>
> ~Andrew
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:22       ` Hongyang Yang
@ 2015-04-14  9:28         ` Andrew Cooper
  2015-04-14  9:42           ` Hongyang Yang
  2015-04-14  9:29         ` Wei Liu
  1 sibling, 1 reply; 47+ messages in thread
From: Andrew Cooper @ 2015-04-14  9:28 UTC (permalink / raw)
  To: Hongyang Yang, Don Slutz, xen-devel
  Cc: Shriram Rajagopalan, Ian Jackson, Wei Liu, Ian Campbell,
	Stefano Stabellini

On 14/04/15 10:22, Hongyang Yang wrote:
>
>
> On 04/14/2015 04:53 PM, Andrew Cooper wrote:
>> On 14/04/15 00:51, Don Slutz wrote:
>>> On 04/13/15 12:25, Andrew Cooper wrote:
>>>> On 13/04/15 17:09, Don Slutz wrote:
>>>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>>>>> option ROMs, domain restore needs to also increase the memory.
>>>>>
>>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>>> hvmloader has no interaction with xc_domain_restore().
>>> I did not mean to imply that it did.  Somehow I lost the fact that this
>>> is a bug in master:
>>>
>>> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
>>> Parsing config from /home/don/e1000x8.xfg
>>> got a tsc mode string: "native_paravirt"
>>> [root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
>>> Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
>>> xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481 
>>> 100%
>>> [root@dcs-xen-52 don]# xl restore e1000x8.save
>>> Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
>>>   Savefile contains xl domain config in JSON format
>>> Parsing config from <saved>
>>> xc: error: Failed to allocate memory for batch.!: Internal error
>>> libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
>>> restoring domain: Cannot allocate memory
>>> libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
>>> (re-)build domain: -3
>>> libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
>>> libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
>>> guest with domid 2
>>> libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
>>> destroy domain 2 following failed creation
>>> [root@dcs-xen-52 don]#
>>>
>>> The hvmloader part turns out to be a "warning message" that is ok to
>>> ignore.  However "xl save" followed by "xl restore" is currently broken
>>> without some fix.
>>>
>>>> It is xl's job to propagate the current memory as part of
>>>> migration.  If
>>>> qemu has had to bump it up, this should be reflected in the domain
>>>> config file.
>>> Not at all sure how QEMU (either in dom0 or a driver domain) is
>>> expected
>>> to change a file (the domain config file).  My guess is that you are
>>> saying that "xl save" (before xc_domain_save) is the correct place to
>>> record (or add extra info).   However it looks to me that all the data
>>> needed is in the save file, but
>>> I could be wrong.
>>>
>>> The page data is correctly saved into the save file (migration stream).
>>> However when
>>> the new domain is created, it's size is "wrong".  You cannot adjust any
>>> of the config info to fix the size, because the option ROM space to
>>> reserve is not an existing config option.   So if I am following you
>>> correctly, libxl needs to add and process more info to handle this
>>> case.
>>>
>>>> The migration path should not be hacked up to fix a higher level
>>>> toolstack bug.
>>> I do not see how QEMU is part of the "higher level toolstack".  If you
>>> mean xl vs xc, then
>>> I can see "xl save" some how doing the work.  This patch works for me,
>>> but I am happy to
>>> explore other ways to fix this bug.
>>
>> If I understand correctly, the steps are this:
>>
>> * 'xl create' makes a VM of size $FOO
>> * qemu bumps the size to $FOO+$N
>> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
>> start of the image still says $FOO
>> * 'xl restore' creates a vm of size $FOO, then instructs
>> xc_domain_restore() to put $FOO+$N pages into it.
>>
>> I would argue first, that qemu should not play in this area to start
>> with.
>>
>> However, the real bug here is that the domain configuration written by
>> xl save is inaccurate.
>
> There's a case like COLO:
> 1. Both Primary/Secondary VM are created first with the same config file
>    which makes a VM of size $FOO
> 2. qemu bumps the size to $FOO+$N
> 3. 'save' writes $FOO+$N of page data
> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
>
> Even if you fix the configuration, the bug still exists because the
> config
> only been transferred from Primary to Secondary once at the very
> beginning
> when you execute 'xl remus' command. The problem is how to let the
> secondary
> (restore) side knows the size change? Through a migration
> command(which is
> easier in v2 migration) or some other solution?
> Form this point of view, I think Don's solution is one way to solve the
> problem.

Funny you should ask that.  Migrationv2 for libxl moves the JSON config
blob into the libxl stream, rather than being a singleshot action at the
very start.  From that point, it becomes plausible to send a new JSON
blob when an update on the source side occurs.

However, I am still firmly of the opinion that is an xl/libxl bug to be
fixed at that level, not hacked around in the restore code.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:22       ` Hongyang Yang
  2015-04-14  9:28         ` Andrew Cooper
@ 2015-04-14  9:29         ` Wei Liu
  2015-04-14  9:40           ` Hongyang Yang
  1 sibling, 1 reply; 47+ messages in thread
From: Wei Liu @ 2015-04-14  9:29 UTC (permalink / raw)
  To: Hongyang Yang
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan

On Tue, Apr 14, 2015 at 05:22:31PM +0800, Hongyang Yang wrote:
[...]
> >If I understand correctly, the steps are this:
> >
> >* 'xl create' makes a VM of size $FOO
> >* qemu bumps the size to $FOO+$N
> >* 'xl save' writes $FOO+$N of page data, but the xl config file at the
> >start of the image still says $FOO
> >* 'xl restore' creates a vm of size $FOO, then instructs
> >xc_domain_restore() to put $FOO+$N pages into it.
> >
> >I would argue first, that qemu should not play in this area to start with.
> >
> >However, the real bug here is that the domain configuration written by
> >xl save is inaccurate.
> 
> There's a case like COLO:
> 1. Both Primary/Secondary VM are created first with the same config file
>    which makes a VM of size $FOO
> 2. qemu bumps the size to $FOO+$N
> 3. 'save' writes $FOO+$N of page data
> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
> 
> Even if you fix the configuration, the bug still exists because the config
> only been transferred from Primary to Secondary once at the very beginning
> when you execute 'xl remus' command. The problem is how to let the secondary
> (restore) side knows the size change? Through a migration command(which is
> easier in v2 migration) or some other solution?

As I said in my reply to Don, the extra memory can be saved during
domain creation. That would solve this problem.

Wei.

> Form this point of view, I think Don's solution is one way to solve the
> problem.
> 
> >
> >~Andrew
> >.
> >
> 
> -- 
> Thanks,
> Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:29         ` Wei Liu
@ 2015-04-14  9:40           ` Hongyang Yang
  2015-04-14  9:52             ` Wei Liu
  0 siblings, 1 reply; 47+ messages in thread
From: Hongyang Yang @ 2015-04-14  9:40 UTC (permalink / raw)
  To: Wei Liu
  Cc: Ian Campbell, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan



On 04/14/2015 05:29 PM, Wei Liu wrote:
> On Tue, Apr 14, 2015 at 05:22:31PM +0800, Hongyang Yang wrote:
> [...]
>>> If I understand correctly, the steps are this:
>>>
>>> * 'xl create' makes a VM of size $FOO
>>> * qemu bumps the size to $FOO+$N
>>> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
>>> start of the image still says $FOO
>>> * 'xl restore' creates a vm of size $FOO, then instructs
>>> xc_domain_restore() to put $FOO+$N pages into it.
>>>
>>> I would argue first, that qemu should not play in this area to start with.
>>>
>>> However, the real bug here is that the domain configuration written by
>>> xl save is inaccurate.
>>
>> There's a case like COLO:
>> 1. Both Primary/Secondary VM are created first with the same config file
>>     which makes a VM of size $FOO
>> 2. qemu bumps the size to $FOO+$N
>> 3. 'save' writes $FOO+$N of page data
>> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
>>
>> Even if you fix the configuration, the bug still exists because the config
>> only been transferred from Primary to Secondary once at the very beginning
>> when you execute 'xl remus' command. The problem is how to let the secondary
>> (restore) side knows the size change? Through a migration command(which is
>> easier in v2 migration) or some other solution?
>
> As I said in my reply to Don, the extra memory can be saved during
> domain creation. That would solve this problem.

After migration we'll enter COLO mode, which will continously send migration
stream to Secondary. Domain creation only happens before doing live migration.

>
> Wei.
>
>> Form this point of view, I think Don's solution is one way to solve the
>> problem.
>>
>>>
>>> ~Andrew
>>> .
>>>
>>
>> --
>> Thanks,
>> Yang.
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:28         ` Andrew Cooper
@ 2015-04-14  9:42           ` Hongyang Yang
  2015-04-15 14:32             ` Ian Campbell
  0 siblings, 1 reply; 47+ messages in thread
From: Hongyang Yang @ 2015-04-14  9:42 UTC (permalink / raw)
  To: Andrew Cooper, Don Slutz, xen-devel
  Cc: Shriram Rajagopalan, Ian Jackson, Wei Liu, Ian Campbell,
	Stefano Stabellini



On 04/14/2015 05:28 PM, Andrew Cooper wrote:
> On 14/04/15 10:22, Hongyang Yang wrote:
>>
>>
>> On 04/14/2015 04:53 PM, Andrew Cooper wrote:
>>> On 14/04/15 00:51, Don Slutz wrote:
>>>> On 04/13/15 12:25, Andrew Cooper wrote:
>>>>> On 13/04/15 17:09, Don Slutz wrote:
>>>>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>>>>>> option ROMs, domain restore needs to also increase the memory.
>>>>>>
>>>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>>>> hvmloader has no interaction with xc_domain_restore().
>>>> I did not mean to imply that it did.  Somehow I lost the fact that this
>>>> is a bug in master:
>>>>
>>>> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
>>>> Parsing config from /home/don/e1000x8.xfg
>>>> got a tsc mode string: "native_paravirt"
>>>> [root@dcs-xen-52 don]# xl save e1000x8 e1000x8.save
>>>> Saving to e1000x8.save new xl format (info 0x1/0x0/3506)
>>>> xc: Saving memory: iter 0 (last sent 0 skipped 0): 1044481/1044481
>>>> 100%
>>>> [root@dcs-xen-52 don]# xl restore e1000x8.save
>>>> Loading new save file e1000x8.save (new xl fmt info 0x1/0x0/3506)
>>>>    Savefile contains xl domain config in JSON format
>>>> Parsing config from <saved>
>>>> xc: error: Failed to allocate memory for batch.!: Internal error
>>>> libxl: error: libxl_create.c:1057:libxl__xc_domain_restore_done:
>>>> restoring domain: Cannot allocate memory
>>>> libxl: error: libxl_create.c:1129:domcreate_rebuild_done: cannot
>>>> (re-)build domain: -3
>>>> libxl: error: libxl.c:1576:libxl__destroy_domid: non-existant domain 2
>>>> libxl: error: libxl.c:1540:domain_destroy_callback: unable to destroy
>>>> guest with domid 2
>>>> libxl: error: libxl_create.c:1490:domcreate_destruction_cb: unable to
>>>> destroy domain 2 following failed creation
>>>> [root@dcs-xen-52 don]#
>>>>
>>>> The hvmloader part turns out to be a "warning message" that is ok to
>>>> ignore.  However "xl save" followed by "xl restore" is currently broken
>>>> without some fix.
>>>>
>>>>> It is xl's job to propagate the current memory as part of
>>>>> migration.  If
>>>>> qemu has had to bump it up, this should be reflected in the domain
>>>>> config file.
>>>> Not at all sure how QEMU (either in dom0 or a driver domain) is
>>>> expected
>>>> to change a file (the domain config file).  My guess is that you are
>>>> saying that "xl save" (before xc_domain_save) is the correct place to
>>>> record (or add extra info).   However it looks to me that all the data
>>>> needed is in the save file, but
>>>> I could be wrong.
>>>>
>>>> The page data is correctly saved into the save file (migration stream).
>>>> However when
>>>> the new domain is created, it's size is "wrong".  You cannot adjust any
>>>> of the config info to fix the size, because the option ROM space to
>>>> reserve is not an existing config option.   So if I am following you
>>>> correctly, libxl needs to add and process more info to handle this
>>>> case.
>>>>
>>>>> The migration path should not be hacked up to fix a higher level
>>>>> toolstack bug.
>>>> I do not see how QEMU is part of the "higher level toolstack".  If you
>>>> mean xl vs xc, then
>>>> I can see "xl save" some how doing the work.  This patch works for me,
>>>> but I am happy to
>>>> explore other ways to fix this bug.
>>>
>>> If I understand correctly, the steps are this:
>>>
>>> * 'xl create' makes a VM of size $FOO
>>> * qemu bumps the size to $FOO+$N
>>> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
>>> start of the image still says $FOO
>>> * 'xl restore' creates a vm of size $FOO, then instructs
>>> xc_domain_restore() to put $FOO+$N pages into it.
>>>
>>> I would argue first, that qemu should not play in this area to start
>>> with.
>>>
>>> However, the real bug here is that the domain configuration written by
>>> xl save is inaccurate.
>>
>> There's a case like COLO:
>> 1. Both Primary/Secondary VM are created first with the same config file
>>     which makes a VM of size $FOO
>> 2. qemu bumps the size to $FOO+$N
>> 3. 'save' writes $FOO+$N of page data
>> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
>>
>> Even if you fix the configuration, the bug still exists because the
>> config
>> only been transferred from Primary to Secondary once at the very
>> beginning
>> when you execute 'xl remus' command. The problem is how to let the
>> secondary
>> (restore) side knows the size change? Through a migration
>> command(which is
>> easier in v2 migration) or some other solution?
>> Form this point of view, I think Don's solution is one way to solve the
>> problem.
>
> Funny you should ask that.  Migrationv2 for libxl moves the JSON config
> blob into the libxl stream, rather than being a singleshot action at the
> very start.  From that point, it becomes plausible to send a new JSON
> blob when an update on the source side occurs.

That should solve the problem, but Migrationv2 for libxl won't be in upstream
for the moment, the bug still exists, is there a way to solve the problem
now under v1 migration?

>
> However, I am still firmly of the opinion that is an xl/libxl bug to be
> fixed at that level, not hacked around in the restore code.
>
> ~Andrew
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:40           ` Hongyang Yang
@ 2015-04-14  9:52             ` Wei Liu
  2015-04-14 17:43               ` Don Slutz
  0 siblings, 1 reply; 47+ messages in thread
From: Wei Liu @ 2015-04-14  9:52 UTC (permalink / raw)
  To: Hongyang Yang
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan

On Tue, Apr 14, 2015 at 05:40:24PM +0800, Hongyang Yang wrote:
> 
> 
> On 04/14/2015 05:29 PM, Wei Liu wrote:
> >On Tue, Apr 14, 2015 at 05:22:31PM +0800, Hongyang Yang wrote:
> >[...]
> >>>If I understand correctly, the steps are this:
> >>>
> >>>* 'xl create' makes a VM of size $FOO
> >>>* qemu bumps the size to $FOO+$N
> >>>* 'xl save' writes $FOO+$N of page data, but the xl config file at the
> >>>start of the image still says $FOO
> >>>* 'xl restore' creates a vm of size $FOO, then instructs
> >>>xc_domain_restore() to put $FOO+$N pages into it.
> >>>
> >>>I would argue first, that qemu should not play in this area to start with.
> >>>
> >>>However, the real bug here is that the domain configuration written by
> >>>xl save is inaccurate.
> >>
> >>There's a case like COLO:
> >>1. Both Primary/Secondary VM are created first with the same config file
> >>    which makes a VM of size $FOO
> >>2. qemu bumps the size to $FOO+$N
> >>3. 'save' writes $FOO+$N of page data
> >>4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
> >>
> >>Even if you fix the configuration, the bug still exists because the config
> >>only been transferred from Primary to Secondary once at the very beginning
> >>when you execute 'xl remus' command. The problem is how to let the secondary
> >>(restore) side knows the size change? Through a migration command(which is
> >>easier in v2 migration) or some other solution?
> >
> >As I said in my reply to Don, the extra memory can be saved during
> >domain creation. That would solve this problem.
> 
> After migration we'll enter COLO mode, which will continously send migration
> stream to Secondary. Domain creation only happens before doing live migration.
> 

Because now the correct value is recorded during domain creation, it will
always be sent to the other end, so there is no need for this kind of
hack.

Wei.

> >
> >Wei.
> >
> >>Form this point of view, I think Don's solution is one way to solve the
> >>problem.
> >>
> >>>
> >>>~Andrew
> >>>.
> >>>
> >>
> >>--
> >>Thanks,
> >>Yang.
> >.
> >
> 
> -- 
> Thanks,
> Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  8:53     ` Wei Liu
@ 2015-04-14 17:34       ` Don Slutz
  2015-04-14 17:51         ` Wei Liu
  2015-04-15  9:46         ` Stefano Stabellini
  0 siblings, 2 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-14 17:34 UTC (permalink / raw)
  To: Wei Liu
  Cc: Ian Campbell, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	xen-devel, Shriram Rajagopalan, Yang Hongyang

On 04/14/15 04:53, Wei Liu wrote:
> On Mon, Apr 13, 2015 at 07:51:31PM -0400, Don Slutz wrote:
>> On 04/13/15 12:25, Andrew Cooper wrote:
>>> On 13/04/15 17:09, Don Slutz wrote:
>>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
>>>> option ROMs, domain restore needs to also increase the memory.
>>>>
>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>> hvmloader has no interaction with xc_domain_restore().
>> I did not mean to imply that it did.  Somehow I lost the fact that this
>> is a bug in master:
>>
>> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg

>> The page data is correctly saved into the save file (migration stream). 
>> However when
>> the new domain is created, it's size is "wrong".  You cannot adjust any
>> of the config info to fix the size, because the option ROM space to
>> reserve is not an existing config option.   So if I am following you
>> correctly, libxl needs to add and process more info to handle this case.
>>
> Thanks for the analysis. I think what you need to do is to adjust the
> memory size during domain creation in libxl, then the relevant data is
> saved at creation time. It should not be modified during restore.
>
> There is already various adjustments to memory related values in libxl.
> Grep video_memkb and mex_memkb in libxl.

I am assuming you mean max_memkb (since I cannot find mex_memkb).  And it
has the "hack" adjustment of "max_memkb + LIBXL_MAXMEM_CONSTANT".

> Is there a way to know how much more memory each option rom needs? If
> so, you can correctly account for the extra memory you need. This would
> be an ideal fix to this problem.

I do not know of a way to get this info.  It can change based on the
QEMU version.


The static define:

tools/libxl/libxl_internal.h:#define LIBXL_MAXMEM_CONSTANT 1024

Is the the old xl "hack" that allows Xen 4.5 (and before) to support up
to 4 e1000 nics.


   -Don Slutz

> Wei.
>
>>> The migration path should not be hacked up to fix a higher level
>>> toolstack bug.
>> I do not see how QEMU is part of the "higher level toolstack".  If you
>> mean xl vs xc, then
>> I can see "xl save" some how doing the work.  This patch works for me,
>> but I am happy to
>> explore other ways to fix this bug.
>>
>>    -Don Slutz
>>
>>> ~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:52             ` Wei Liu
@ 2015-04-14 17:43               ` Don Slutz
  2015-04-14 17:54                 ` Wei Liu
  0 siblings, 1 reply; 47+ messages in thread
From: Don Slutz @ 2015-04-14 17:43 UTC (permalink / raw)
  To: Wei Liu, Hongyang Yang
  Cc: Ian Campbell, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	xen-devel, Shriram Rajagopalan

On 04/14/15 05:52, Wei Liu wrote:
> On Tue, Apr 14, 2015 at 05:40:24PM +0800, Hongyang Yang wrote:
>>
>> On 04/14/2015 05:29 PM, Wei Liu wrote:
>>> On Tue, Apr 14, 2015 at 05:22:31PM +0800, Hongyang Yang wrote:
>>> [...]
>>>>> If I understand correctly, the steps are this:
>>>>>
>>>>> * 'xl create' makes a VM of size $FOO
>>>>> * qemu bumps the size to $FOO+$N
>>>>> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
>>>>> start of the image still says $FOO
>>>>> * 'xl restore' creates a vm of size $FOO, then instructs
>>>>> xc_domain_restore() to put $FOO+$N pages into it.
>>>>>
>>>>> I would argue first, that qemu should not play in this area to start with.


With the size of the ROMs unknown outside of QEMU, I do not know how to
correctly compute this in libxl.

>>>>> However, the real bug here is that the domain configuration written by
>>>>> xl save is inaccurate.

I do not 100% agree.  Since xc_domain_setmaxmem() could have been done,
this should
be part of the full domain configuration.  However right now there is no
way to include
this value in any of the domain configuration structures.

>>>> There's a case like COLO:
>>>> 1. Both Primary/Secondary VM are created first with the same config file
>>>>    which makes a VM of size $FOO
>>>> 2. qemu bumps the size to $FOO+$N

This happens very early.  I do see it reflected in PoD
(xc_domain_get_pod_target()) data.

>>>> 3. 'save' writes $FOO+$N of page data
>>>> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
>>>>
>>>> Even if you fix the configuration, the bug still exists because the config
>>>> only been transferred from Primary to Secondary once at the very beginning
>>>> when you execute 'xl remus' command.

I am hopeful that QEMU has already done it's adjusting before you can do
this
command.  Not having used it, I do not know for sure.

>>>>  The problem is how to let the secondary
>>>> (restore) side knows the size change? Through a migration command(which is
>>>> easier in v2 migration) or some other solution?
>>> As I said in my reply to Don, the extra memory can be saved during
>>> domain creation. That would solve this problem.

The memory does get saved, the extra info is missing.

>> After migration we'll enter COLO mode, which will continously send migration
>> stream to Secondary. Domain creation only happens before doing live migration.
>>
> Because now the correct value is recorded during domain creation, it will
> always be sent to the other end, so there is no need for this kind of
> hack.

I will be looking into how to add this info (max_memkb
(xc_domain_getinfo) or
max_pages ()xc_domain_getinfolist) to the v1 migration in a compatible way
with a usage in restore.


   -Don Slutz

> Wei.
>
>>> Wei.
>>>
>>>> Form this point of view, I think Don's solution is one way to solve the
>>>> problem.
>>>>
>>>>> ~Andrew
>>>>> .
>>>>>
>>>> --
>>>> Thanks,
>>>> Yang.
>>> .
>>>
>> -- 
>> Thanks,
>> Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14 17:34       ` Don Slutz
@ 2015-04-14 17:51         ` Wei Liu
  2015-04-15  9:46         ` Stefano Stabellini
  1 sibling, 0 replies; 47+ messages in thread
From: Wei Liu @ 2015-04-14 17:51 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Tue, Apr 14, 2015 at 01:34:43PM -0400, Don Slutz wrote:
> On 04/14/15 04:53, Wei Liu wrote:
> > On Mon, Apr 13, 2015 at 07:51:31PM -0400, Don Slutz wrote:
> >> On 04/13/15 12:25, Andrew Cooper wrote:
> >>> On 13/04/15 17:09, Don Slutz wrote:
> >>>> If QEMU has called on xc_domain_setmaxmem to add more memory for
> >>>> option ROMs, domain restore needs to also increase the memory.
> >>>>
> >>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
> >>> hvmloader has no interaction with xc_domain_restore().
> >> I did not mean to imply that it did.  Somehow I lost the fact that this
> >> is a bug in master:
> >>
> >> [root@dcs-xen-52 don]# xl cre -p /home/don/e1000x8.xfg
> 
> >> The page data is correctly saved into the save file (migration stream). 
> >> However when
> >> the new domain is created, it's size is "wrong".  You cannot adjust any
> >> of the config info to fix the size, because the option ROM space to
> >> reserve is not an existing config option.   So if I am following you
> >> correctly, libxl needs to add and process more info to handle this case.
> >>
> > Thanks for the analysis. I think what you need to do is to adjust the
> > memory size during domain creation in libxl, then the relevant data is
> > saved at creation time. It should not be modified during restore.
> >
> > There is already various adjustments to memory related values in libxl.
> > Grep video_memkb and mex_memkb in libxl.
> 
> I am assuming you mean max_memkb (since I cannot find mex_memkb).  And it
> has the "hack" adjustment of "max_memkb + LIBXL_MAXMEM_CONSTANT".
> 

No, I mean proper accounting.

> > Is there a way to know how much more memory each option rom needs? If
> > so, you can correctly account for the extra memory you need. This would
> > be an ideal fix to this problem.
> 
> I do not know of a way to get this info.  It can change based on the
> QEMU version.
> 

Hmm... We need to figure out another way.

> 
> The static define:
> 
> tools/libxl/libxl_internal.h:#define LIBXL_MAXMEM_CONSTANT 1024
> 
> Is the the old xl "hack" that allows Xen 4.5 (and before) to support up
> to 4 e1000 nics.
> 

Let's not add more similar hacks again. Removing an old hack doesn't
justify adding a more complex one.

Wei.

> 
>    -Don Slutz
> 
> > Wei.
> >
> >>> The migration path should not be hacked up to fix a higher level
> >>> toolstack bug.
> >> I do not see how QEMU is part of the "higher level toolstack".  If you
> >> mean xl vs xc, then
> >> I can see "xl save" some how doing the work.  This patch works for me,
> >> but I am happy to
> >> explore other ways to fix this bug.
> >>
> >>    -Don Slutz
> >>
> >>> ~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14 17:43               ` Don Slutz
@ 2015-04-14 17:54                 ` Wei Liu
  2015-04-14 22:06                   ` [PATCH v2 1/1] tools: Handle xc_maxmem adjustments Don Slutz
  2015-04-15 14:34                   ` [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Ian Campbell
  0 siblings, 2 replies; 47+ messages in thread
From: Wei Liu @ 2015-04-14 17:54 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, George Dunlap,
	Andrew Cooper, Ian Jackson, xen-devel, Shriram Rajagopalan,
	Hongyang Yang

On Tue, Apr 14, 2015 at 01:43:38PM -0400, Don Slutz wrote:
> On 04/14/15 05:52, Wei Liu wrote:
> > On Tue, Apr 14, 2015 at 05:40:24PM +0800, Hongyang Yang wrote:
> >>
> >> On 04/14/2015 05:29 PM, Wei Liu wrote:
> >>> On Tue, Apr 14, 2015 at 05:22:31PM +0800, Hongyang Yang wrote:
> >>> [...]
> >>>>> If I understand correctly, the steps are this:
> >>>>>
> >>>>> * 'xl create' makes a VM of size $FOO
> >>>>> * qemu bumps the size to $FOO+$N
> >>>>> * 'xl save' writes $FOO+$N of page data, but the xl config file at the
> >>>>> start of the image still says $FOO
> >>>>> * 'xl restore' creates a vm of size $FOO, then instructs
> >>>>> xc_domain_restore() to put $FOO+$N pages into it.
> >>>>>
> >>>>> I would argue first, that qemu should not play in this area to start with.
> 
> 
> With the size of the ROMs unknown outside of QEMU, I do not know how to
> correctly compute this in libxl.
> 
> >>>>> However, the real bug here is that the domain configuration written by
> >>>>> xl save is inaccurate.
> 
> I do not 100% agree.  Since xc_domain_setmaxmem() could have been done,
> this should
> be part of the full domain configuration.  However right now there is no
> way to include
> this value in any of the domain configuration structures.
> 
> >>>> There's a case like COLO:
> >>>> 1. Both Primary/Secondary VM are created first with the same config file
> >>>>    which makes a VM of size $FOO
> >>>> 2. qemu bumps the size to $FOO+$N
> 
> This happens very early.  I do see it reflected in PoD
> (xc_domain_get_pod_target()) data.
> 
> >>>> 3. 'save' writes $FOO+$N of page data
> >>>> 4. 'restore' put $FOO+$N pages into $FOO pages which will cause error
> >>>>
> >>>> Even if you fix the configuration, the bug still exists because the config
> >>>> only been transferred from Primary to Secondary once at the very beginning
> >>>> when you execute 'xl remus' command.
> 
> I am hopeful that QEMU has already done it's adjusting before you can do
> this
> command.  Not having used it, I do not know for sure.
> 
> >>>>  The problem is how to let the secondary
> >>>> (restore) side knows the size change? Through a migration command(which is
> >>>> easier in v2 migration) or some other solution?
> >>> As I said in my reply to Don, the extra memory can be saved during
> >>> domain creation. That would solve this problem.
> 
> The memory does get saved, the extra info is missing.
> 
> >> After migration we'll enter COLO mode, which will continously send migration
> >> stream to Secondary. Domain creation only happens before doing live migration.
> >>
> > Because now the correct value is recorded during domain creation, it will
> > always be sent to the other end, so there is no need for this kind of
> > hack.
> 
> I will be looking into how to add this info (max_memkb
> (xc_domain_getinfo) or
> max_pages ()xc_domain_getinfolist) to the v1 migration in a compatible way
> with a usage in restore.
> 

Let's see if we can record this in xc image format. I haven't looked,
but George mentioned that it might be possible to do so.

There are certainly other options than the one you propose. I think we
need to agree on a fix before proceeding...

Wei.

> 
>    -Don Slutz
> 
> > Wei.
> >
> >>> Wei.
> >>>
> >>>> Form this point of view, I think Don's solution is one way to solve the
> >>>> problem.
> >>>>
> >>>>> ~Andrew
> >>>>> .
> >>>>>
> >>>> --
> >>>> Thanks,
> >>>> Yang.
> >>> .
> >>>
> >> -- 
> >> Thanks,
> >> Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-14 17:54                 ` Wei Liu
@ 2015-04-14 22:06                   ` Don Slutz
  2015-04-15  9:53                     ` Andrew Cooper
  2015-04-15 14:34                   ` [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Ian Campbell
  1 sibling, 1 reply; 47+ messages in thread
From: Don Slutz @ 2015-04-14 22:06 UTC (permalink / raw)
  To: xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Don Slutz, Shriram Rajagopalan, Yang Hongyang

This fixes an issue where "xl save" followed by "xl restore" reports:
"xc: error: Failed to allocate memory for batch.!: Internal error"

One of the ways to get into this state is to have more then 4 e1000
nics configured.

Signed-off-by: Don Slutz <dslutz@verizon.com>
---
Changes from v1 to v2:

Renamed from "xc_domain_restore: Allow QEMU to increase memory"

Reworked completely to have xc_domain_save save the domain config
data that is needed.  And xc_domain_restore to use the new saved
data in recreating the domain.

 tools/libxc/xc_domain_restore.c | 21 +++++++++++++++++++++
 tools/libxc/xc_domain_save.c    | 16 ++++++++++++++++
 tools/libxc/xg_save_restore.h   |  2 ++
 3 files changed, 39 insertions(+)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index a382701..359fe41 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -769,6 +769,19 @@ static void pagebuf_free(pagebuf_t* buf)
     }
 }
 
+static int xc_maxmem_restore(xc_interface *xch, struct restore_ctx *ctx,
+                             int dom, int fd)
+{
+    uint64_t maxkb;
+
+    if ( RDEXACT(fd, &maxkb, sizeof(maxkb)) )
+    {
+        PERROR("Error when reading maxmem");
+        return -1;
+    }
+    return xc_domain_setmaxmem(xch, dom, maxkb);
+}
+
 static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx,
                            pagebuf_t* buf, int fd, uint32_t dom)
 {
@@ -1013,6 +1026,14 @@ static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx,
         }
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
 
+    case XC_SAVE_ID_XC_MAXMEM:
+        DPRINTF("xc_domain_restore start maxmem\n");
+        if ( xc_maxmem_restore(xch, ctx, dom, fd) ) {
+            PERROR("error reading/restoring maxmem");
+            return -1;
+        }
+        return pagebuf_get_one(xch, ctx, buf, fd, dom);
+
     default:
         if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
             ERROR("Max batch size exceeded (%d). Giving up.", count);
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 254fdb3..381b95e 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -782,6 +782,16 @@ static xen_pfn_t *map_and_save_p2m_table(xc_interface *xch,
     return success ? p2m : NULL;
 }
 
+static int xc_maxmem_save(int io_fd, uint64_t maxkb)
+{
+    int marker = XC_SAVE_ID_XC_MAXMEM;
+
+    if ( write_exact(io_fd, &marker, sizeof(marker)) ||
+         write_exact(io_fd, &maxkb, sizeof(maxkb)) )
+        return -1;
+    return 0;
+}
+
 /* must be done AFTER suspend_and_state() */
 static int save_tsc_info(xc_interface *xch, uint32_t dom, int io_fd)
 {
@@ -1098,6 +1108,12 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
 
     print_stats(xch, dom, 0, &time_stats, &shadow_stats, 0);
 
+    if ( xc_maxmem_save(io_fd, info.max_memkb) < 0 )
+    {
+        PERROR("Error when writing to state file (maxmem)");
+        goto out;
+    }
+
     tmem_saved = xc_tmem_save(xch, dom, io_fd, live, XC_SAVE_ID_TMEM);
     if ( tmem_saved == -1 )
     {
diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h
index bdd9009..4123a84 100644
--- a/tools/libxc/xg_save_restore.h
+++ b/tools/libxc/xg_save_restore.h
@@ -262,6 +262,8 @@
 /* These are a pair; it is an error for one to exist without the other */
 #define XC_SAVE_ID_HVM_IOREQ_SERVER_PFN -19
 #define XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES -20
+/* QEMU auto size handling */
+#define XC_SAVE_ID_XC_MAXMEM          -21
 
 /*
 ** We process save/restore/migrate in batches of pages; the below
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14 17:34       ` Don Slutz
  2015-04-14 17:51         ` Wei Liu
@ 2015-04-15  9:46         ` Stefano Stabellini
  2015-04-15 10:09           ` Ian Campbell
  1 sibling, 1 reply; 47+ messages in thread
From: Stefano Stabellini @ 2015-04-15  9:46 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Tue, 14 Apr 2015, Don Slutz wrote:
> On 04/14/15 04:53, Wei Liu wrote:
> > Is there a way to know how much more memory each option rom needs? If
> > so, you can correctly account for the extra memory you need. This would
> > be an ideal fix to this problem.
> 
> I do not know of a way to get this info.  It can change based on the
> QEMU version.

Indeed it would be fragile to rely on a fixed size for each option rom,
given that they come out of the QEMU tree and can change at any time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-14 22:06                   ` [PATCH v2 1/1] tools: Handle xc_maxmem adjustments Don Slutz
@ 2015-04-15  9:53                     ` Andrew Cooper
  2015-04-15  9:57                       ` Stefano Stabellini
  2015-04-15 10:16                       ` George Dunlap
  0 siblings, 2 replies; 47+ messages in thread
From: Andrew Cooper @ 2015-04-15  9:53 UTC (permalink / raw)
  To: Don Slutz, xen-devel
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Shriram Rajagopalan, Yang Hongyang

On 14/04/15 23:06, Don Slutz wrote:
> This fixes an issue where "xl save" followed by "xl restore" reports:
> "xc: error: Failed to allocate memory for batch.!: Internal error"
>
> One of the ways to get into this state is to have more then 4 e1000
> nics configured.
>
> Signed-off-by: Don Slutz <dslutz@verizon.com>

I still don't think this is the correct solution, although I will
concede that this is a far better patch than v1.

Going back to the original problem, why does Qemu need to change maxmem
in the first place?  You talk about e1000 option roms, but the option
roms themselves must be allocated in an appropriate PCI bridge window.

As a result, there is necessarily already ram backing, which can be
ballooned back in.  Currently, all ram behind the PCI MMIO is ballooned
out hvmloader but still accounted to the domain, and otherwise wasted.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15  9:53                     ` Andrew Cooper
@ 2015-04-15  9:57                       ` Stefano Stabellini
  2015-04-15 10:03                         ` Andrew Cooper
  2015-04-15 10:16                       ` George Dunlap
  1 sibling, 1 reply; 47+ messages in thread
From: Stefano Stabellini @ 2015-04-15  9:57 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Wed, 15 Apr 2015, Andrew Cooper wrote:
> On 14/04/15 23:06, Don Slutz wrote:
> > This fixes an issue where "xl save" followed by "xl restore" reports:
> > "xc: error: Failed to allocate memory for batch.!: Internal error"
> >
> > One of the ways to get into this state is to have more then 4 e1000
> > nics configured.
> >
> > Signed-off-by: Don Slutz <dslutz@verizon.com>
> 
> I still don't think this is the correct solution, although I will
> concede that this is a far better patch than v1.
> 
> Going back to the original problem, why does Qemu need to change maxmem
> in the first place?  You talk about e1000 option roms, but the option
> roms themselves must be allocated in an appropriate PCI bridge window.

QEMU allocates actual memory to store the option roms using
xc_domain_populate_physmap_exact, increasing the total amount of ram
allocated to the guest. QEMU needs to increase maxmem too, otherwise
memory allocations will fail after a few option roms.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15  9:57                       ` Stefano Stabellini
@ 2015-04-15 10:03                         ` Andrew Cooper
  2015-04-15 10:21                           ` George Dunlap
  2015-04-15 11:48                           ` Stefano Stabellini
  0 siblings, 2 replies; 47+ messages in thread
From: Andrew Cooper @ 2015-04-15 10:03 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Wei Liu, Ian Campbell, Ian Jackson, Don Slutz, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On 15/04/15 10:57, Stefano Stabellini wrote:
> On Wed, 15 Apr 2015, Andrew Cooper wrote:
>> On 14/04/15 23:06, Don Slutz wrote:
>>> This fixes an issue where "xl save" followed by "xl restore" reports:
>>> "xc: error: Failed to allocate memory for batch.!: Internal error"
>>>
>>> One of the ways to get into this state is to have more then 4 e1000
>>> nics configured.
>>>
>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>> I still don't think this is the correct solution, although I will
>> concede that this is a far better patch than v1.
>>
>> Going back to the original problem, why does Qemu need to change maxmem
>> in the first place?  You talk about e1000 option roms, but the option
>> roms themselves must be allocated in an appropriate PCI bridge window.
> QEMU allocates actual memory to store the option roms using
> xc_domain_populate_physmap_exact, increasing the total amount of ram
> allocated to the guest. QEMU needs to increase maxmem too, otherwise
> memory allocations will fail after a few option roms.

It doesn't need to.  The domain already has that much ram allocated to
itself and otherwise wasted.  Qemu should reuse that ram rather than
wasting more.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15  9:46         ` Stefano Stabellini
@ 2015-04-15 10:09           ` Ian Campbell
  2015-04-15 11:15             ` Hongyang Yang
  0 siblings, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 10:09 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Wei Liu, Andrew Cooper, Ian Jackson, Don Slutz, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On Wed, 2015-04-15 at 10:46 +0100, Stefano Stabellini wrote:
> On Tue, 14 Apr 2015, Don Slutz wrote:
> > On 04/14/15 04:53, Wei Liu wrote:
> > > Is there a way to know how much more memory each option rom needs? If
> > > so, you can correctly account for the extra memory you need. This would
> > > be an ideal fix to this problem.
> > 
> > I do not know of a way to get this info.  It can change based on the
> > QEMU version.
> 
> Indeed it would be fragile to rely on a fixed size for each option rom,
> given that they come out of the QEMU tree and can change at any time.

Only having dipped into this thread so far it seems to me we need some
way for qemu to communicate the result of its manipulations of maxmem
into the migration stream explicitly to be picked up by the other end.

Ian

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15  9:53                     ` Andrew Cooper
  2015-04-15  9:57                       ` Stefano Stabellini
@ 2015-04-15 10:16                       ` George Dunlap
  2015-04-15 10:25                         ` George Dunlap
  1 sibling, 1 reply; 47+ messages in thread
From: George Dunlap @ 2015-04-15 10:16 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Wed, Apr 15, 2015 at 10:53 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 14/04/15 23:06, Don Slutz wrote:
>> This fixes an issue where "xl save" followed by "xl restore" reports:
>> "xc: error: Failed to allocate memory for batch.!: Internal error"
>>
>> One of the ways to get into this state is to have more then 4 e1000
>> nics configured.
>>
>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>
> I still don't think this is the correct solution, although I will
> concede that this is a far better patch than v1.
>
> Going back to the original problem, why does Qemu need to change maxmem
> in the first place?  You talk about e1000 option roms, but the option
> roms themselves must be allocated in an appropriate PCI bridge window.
>
> As a result, there is necessarily already ram backing, which can be
> ballooned back in.  Currently, all ram behind the PCI MMIO is ballooned
> out hvmloader but still accounted to the domain, and otherwise wasted.

First, I think you should avoid using the word "balloon" unless you're
actually talking about a balloon -- i.e., a pool of memory allocated
from the guest OS by a kernel driver, behind which there is no actual
ram.

The ram behind the PCI MMIO hole is relocated to highmem, not
ballooned, just like it might be in a real BIOS.  Are you saying that
this is not reflected anywhere in the e820 map, so the guest OS
doesn't know that such ram exists?

Secondly, I agree in general that the original solution -- having qemu
ask the hypervisor directly for more ram -- isn't a good one.  It
would have been better if it could have requested that from libxl
somehow.

Without having actually reviewed the patch, I think this solution is a
decent one.  But if we could update it in the libxl domain config in a
way that was backwards-compatible, that would be fine too.  I don't
think we should change maxmem in the domain config -- I think there
should be another field, maxpages or something, which talks about the
hypervisor side.

Maybe we could get in the habit of saying "memory" when we talk about
the illusion we're giving to the guest, and "pages" when we're talking
about the actual number of pages allocated within Xen?

 -George

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15 10:03                         ` Andrew Cooper
@ 2015-04-15 10:21                           ` George Dunlap
  2015-04-15 11:48                           ` Stefano Stabellini
  1 sibling, 0 replies; 47+ messages in thread
From: George Dunlap @ 2015-04-15 10:21 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Wed, Apr 15, 2015 at 11:03 AM, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 15/04/15 10:57, Stefano Stabellini wrote:
>> On Wed, 15 Apr 2015, Andrew Cooper wrote:
>>> On 14/04/15 23:06, Don Slutz wrote:
>>>> This fixes an issue where "xl save" followed by "xl restore" reports:
>>>> "xc: error: Failed to allocate memory for batch.!: Internal error"
>>>>
>>>> One of the ways to get into this state is to have more then 4 e1000
>>>> nics configured.
>>>>
>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>> I still don't think this is the correct solution, although I will
>>> concede that this is a far better patch than v1.
>>>
>>> Going back to the original problem, why does Qemu need to change maxmem
>>> in the first place?  You talk about e1000 option roms, but the option
>>> roms themselves must be allocated in an appropriate PCI bridge window.
>> QEMU allocates actual memory to store the option roms using
>> xc_domain_populate_physmap_exact, increasing the total amount of ram
>> allocated to the guest. QEMU needs to increase maxmem too, otherwise
>> memory allocations will fail after a few option roms.
>
> It doesn't need to.  The domain already has that much ram allocated to
> itself and otherwise wasted.  Qemu should reuse that ram rather than
> wasting more.

I would argue that we should start trying to make "memory" mean "pages
that the guest sees as RAM".  Saying "Give this guest 8G of memory"
and then having that eaten away by all the extraneous memory
requirements (VGA ram, &c) seems like a less good option.  If some of
that memory is not exposed to the guest at the moment because, for
instance, the e820 is not being updated, then that's a bug we should
fix separately.

Alternately, we should go the other way and make "memory" mean "Total
pages of RAM the guest can ever have", and take *all* the extra
requirements (VGA, option roms, &c) out of that.

 -George

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15 10:16                       ` George Dunlap
@ 2015-04-15 10:25                         ` George Dunlap
  2015-04-15 10:30                           ` Andrew Cooper
  0 siblings, 1 reply; 47+ messages in thread
From: George Dunlap @ 2015-04-15 10:25 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Wed, Apr 15, 2015 at 11:16 AM, George Dunlap <dunlapg@umich.edu> wrote:
> Without having actually reviewed the patch, I think this solution is a
> decent one.  But if we could update it in the libxl domain config in a
> way that was backwards-compatible, that would be fine too.  I don't
> think we should change maxmem in the domain config -- I think there
> should be another field, maxpages or something, which talks about the
> hypervisor side.

One other advantage of the solution Don gives here is that it could
(in theory) handle the case where maxmem changes *while the guest is
migrating*.  I suspect actually that if you change maxmem during
migration at the moment, you may get some rather strange effects.
This would be especially important for things like COLO or remus.

 -George

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15 10:25                         ` George Dunlap
@ 2015-04-15 10:30                           ` Andrew Cooper
  0 siblings, 0 replies; 47+ messages in thread
From: Andrew Cooper @ 2015-04-15 10:30 UTC (permalink / raw)
  To: George Dunlap
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On 15/04/15 11:25, George Dunlap wrote:
> On Wed, Apr 15, 2015 at 11:16 AM, George Dunlap <dunlapg@umich.edu> wrote:
>> Without having actually reviewed the patch, I think this solution is a
>> decent one.  But if we could update it in the libxl domain config in a
>> way that was backwards-compatible, that would be fine too.  I don't
>> think we should change maxmem in the domain config -- I think there
>> should be another field, maxpages or something, which talks about the
>> hypervisor side.
> One other advantage of the solution Don gives here is that it could
> (in theory) handle the case where maxmem changes *while the guest is
> migrating*.  I suspect actually that if you change maxmem during
> migration at the moment, you may get some rather strange effects.
> This would be especially important for things like COLO or remus.

That case will blow up very spectacularly for both PV and HVM domains,
in part because of changes not being reflected in the logdirty bitmap. 
Even a domain which actively changes the size of its balloon while live
migration loop is functioning will blow up.

There is a long thread from 6? months ago which discusses the necessary
steps to make it function, but it is not trivial to fix, and certainly
wasn't something we were going to fix in the first pass of migration v2.

~Andrew

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 10:09           ` Ian Campbell
@ 2015-04-15 11:15             ` Hongyang Yang
  2015-04-15 11:27               ` Stefano Stabellini
  0 siblings, 1 reply; 47+ messages in thread
From: Hongyang Yang @ 2015-04-15 11:15 UTC (permalink / raw)
  To: Ian Campbell, Stefano Stabellini
  Cc: Wei Liu, Andrew Cooper, Ian Jackson, Don Slutz, xen-devel,
	Shriram Rajagopalan



On 04/15/2015 06:09 PM, Ian Campbell wrote:
> On Wed, 2015-04-15 at 10:46 +0100, Stefano Stabellini wrote:
>> On Tue, 14 Apr 2015, Don Slutz wrote:
>>> On 04/14/15 04:53, Wei Liu wrote:
>>>> Is there a way to know how much more memory each option rom needs? If
>>>> so, you can correctly account for the extra memory you need. This would
>>>> be an ideal fix to this problem.
>>>
>>> I do not know of a way to get this info.  It can change based on the
>>> QEMU version.
>>
>> Indeed it would be fragile to rely on a fixed size for each option rom,
>> given that they come out of the QEMU tree and can change at any time.
>
> Only having dipped into this thread so far it seems to me we need some
> way for qemu to communicate the result of its manipulations of maxmem
> into the migration stream explicitly to be picked up by the other end.

Totally agreed.

>
> Ian
>
> .
>

-- 
Thanks,
Yang.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 11:15             ` Hongyang Yang
@ 2015-04-15 11:27               ` Stefano Stabellini
  2015-04-15 11:56                 ` Ian Campbell
  0 siblings, 1 reply; 47+ messages in thread
From: Stefano Stabellini @ 2015-04-15 11:27 UTC (permalink / raw)
  To: Hongyang Yang
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan

On Wed, 15 Apr 2015, Hongyang Yang wrote:
> On 04/15/2015 06:09 PM, Ian Campbell wrote:
> > On Wed, 2015-04-15 at 10:46 +0100, Stefano Stabellini wrote:
> > > On Tue, 14 Apr 2015, Don Slutz wrote:
> > > > On 04/14/15 04:53, Wei Liu wrote:
> > > > > Is there a way to know how much more memory each option rom needs? If
> > > > > so, you can correctly account for the extra memory you need. This
> > > > > would
> > > > > be an ideal fix to this problem.
> > > > 
> > > > I do not know of a way to get this info.  It can change based on the
> > > > QEMU version.
> > > 
> > > Indeed it would be fragile to rely on a fixed size for each option rom,
> > > given that they come out of the QEMU tree and can change at any time.
> > 
> > Only having dipped into this thread so far it seems to me we need some
> > way for qemu to communicate the result of its manipulations of maxmem
> > into the migration stream explicitly to be picked up by the other end.
> 
> Totally agreed.

Xen knows exactly the maxmem setting for the domain, I don't think we
need one more notification from QEMU?
Libxl/Xen could just write the maxmem value to the migration stream.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15 10:03                         ` Andrew Cooper
  2015-04-15 10:21                           ` George Dunlap
@ 2015-04-15 11:48                           ` Stefano Stabellini
  2015-04-15 21:43                             ` Don Slutz
  1 sibling, 1 reply; 47+ messages in thread
From: Stefano Stabellini @ 2015-04-15 11:48 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Stefano Stabellini, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Yang Hongyang

On Wed, 15 Apr 2015, Andrew Cooper wrote:
> On 15/04/15 10:57, Stefano Stabellini wrote:
> > On Wed, 15 Apr 2015, Andrew Cooper wrote:
> >> On 14/04/15 23:06, Don Slutz wrote:
> >>> This fixes an issue where "xl save" followed by "xl restore" reports:
> >>> "xc: error: Failed to allocate memory for batch.!: Internal error"
> >>>
> >>> One of the ways to get into this state is to have more then 4 e1000
> >>> nics configured.
> >>>
> >>> Signed-off-by: Don Slutz <dslutz@verizon.com>
> >> I still don't think this is the correct solution, although I will
> >> concede that this is a far better patch than v1.
> >>
> >> Going back to the original problem, why does Qemu need to change maxmem
> >> in the first place?  You talk about e1000 option roms, but the option
> >> roms themselves must be allocated in an appropriate PCI bridge window.
> > QEMU allocates actual memory to store the option roms using
> > xc_domain_populate_physmap_exact, increasing the total amount of ram
> > allocated to the guest. QEMU needs to increase maxmem too, otherwise
> > memory allocations will fail after a few option roms.
> 
> It doesn't need to.  The domain already has that much ram allocated to
> itself and otherwise wasted.  Qemu should reuse that ram rather than
> wasting more.

QEMU has no knowledge about what the e820 contains and considers the
memory relocated from the PCI hole to the top of ram as valid guest
memory. Libxl tells QEMU how big the PCI hole is by passing
max-ram-below-4g to QEMU.

I think that we should fix the guest e820 as it is actually valid guest
memory.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 11:27               ` Stefano Stabellini
@ 2015-04-15 11:56                 ` Ian Campbell
  2015-04-15 18:19                   ` Don Slutz
  0 siblings, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 11:56 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Wei Liu, Andrew Cooper, Ian Jackson, Don Slutz, xen-devel,
	Shriram Rajagopalan, Hongyang Yang

On Wed, 2015-04-15 at 12:27 +0100, Stefano Stabellini wrote:
> On Wed, 15 Apr 2015, Hongyang Yang wrote:
> > On 04/15/2015 06:09 PM, Ian Campbell wrote:
> > > On Wed, 2015-04-15 at 10:46 +0100, Stefano Stabellini wrote:
> > > > On Tue, 14 Apr 2015, Don Slutz wrote:
> > > > > On 04/14/15 04:53, Wei Liu wrote:
> > > > > > Is there a way to know how much more memory each option rom needs? If
> > > > > > so, you can correctly account for the extra memory you need. This
> > > > > > would
> > > > > > be an ideal fix to this problem.
> > > > > 
> > > > > I do not know of a way to get this info.  It can change based on the
> > > > > QEMU version.
> > > > 
> > > > Indeed it would be fragile to rely on a fixed size for each option rom,
> > > > given that they come out of the QEMU tree and can change at any time.
> > > 
> > > Only having dipped into this thread so far it seems to me we need some
> > > way for qemu to communicate the result of its manipulations of maxmem
> > > into the migration stream explicitly to be picked up by the other end.
> > 
> > Totally agreed.
> 
> Xen knows exactly the maxmem setting for the domain, I don't think we
> need one more notification from QEMU?
> Libxl/Xen could just write the maxmem value to the migration stream.

If that is all which is needed and not the delta between the notional
maxmem which the guest was configured with and what we actually gave it
due to things such as option ROMs, then sure.

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-13 16:09 [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Don Slutz
                   ` (2 preceding siblings ...)
  2015-04-14  3:46 ` Hongyang Yang
@ 2015-04-15 14:30 ` Ian Campbell
  2015-04-15 18:17   ` Don Slutz
  3 siblings, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 14:30 UTC (permalink / raw)
  To: Don Slutz
  Cc: Wei Liu, Stefano Stabellini, Ian Jackson, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On Mon, 2015-04-13 at 12:09 -0400, Don Slutz wrote:
>  
> +/* Leave some slack so that hvmloader does not complain about lack of
> + * memory at boot time ("Could not allocate order=0 extent").
> + * Once hvmloader is modified to cope with that situation without
> + * printing warning messages, QEMU_SPARE_PAGES can be removed.

How and where is hvmloader involved in a domain restore? 

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14  9:42           ` Hongyang Yang
@ 2015-04-15 14:32             ` Ian Campbell
  2015-04-15 20:41               ` Don Slutz
  0 siblings, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 14:32 UTC (permalink / raw)
  To: Hongyang Yang
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan

On Tue, 2015-04-14 at 17:42 +0800, Hongyang Yang wrote:
> 
> On 04/14/2015 05:28 PM, Andrew Cooper wrote:

> > Funny you should ask that.  Migrationv2 for libxl moves the JSON config
> > blob into the libxl stream, rather than being a singleshot action at the
> > very start.  From that point, it becomes plausible to send a new JSON
> > blob when an update on the source side occurs.
> 
> That should solve the problem, but Migrationv2 for libxl won't be in upstream
> for the moment, the bug still exists, is there a way to solve the problem
> now under v1 migration?

As far as I am concerned migration v2 is a prerequisite for COLO. We
won't be taking any major changes to the v1 code at this point.

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-14 17:54                 ` Wei Liu
  2015-04-14 22:06                   ` [PATCH v2 1/1] tools: Handle xc_maxmem adjustments Don Slutz
@ 2015-04-15 14:34                   ` Ian Campbell
  2015-04-15 16:36                     ` Wei Liu
  1 sibling, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 14:34 UTC (permalink / raw)
  To: Wei Liu
  Cc: Stefano Stabellini, George Dunlap, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Hongyang Yang

On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> Let's see if we can record this in xc image format. I haven't looked,
> but George mentioned that it might be possible to do so.

Can this not be done at the save stage in the bit where we update the
JSON to reflect the actual configuration?

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 14:34                   ` [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Ian Campbell
@ 2015-04-15 16:36                     ` Wei Liu
  2015-04-15 16:45                       ` Ian Campbell
  0 siblings, 1 reply; 47+ messages in thread
From: Wei Liu @ 2015-04-15 16:36 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Wei Liu, Stefano Stabellini, George Dunlap, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan,
	Hongyang Yang

On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
> On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> > Let's see if we can record this in xc image format. I haven't looked,
> > but George mentioned that it might be possible to do so.
> 
> Can this not be done at the save stage in the bit where we update the
> JSON to reflect the actual configuration?
> 

Yes, it's possible to do that. The value is not in libxl idl simply
because it's always not there.

But this might not help COLO / Remus.  If they don't use that API to
retrieve domain configuration, a stale value might still get sent to the
other end.

Wei.

> Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 16:36                     ` Wei Liu
@ 2015-04-15 16:45                       ` Ian Campbell
  2015-04-15 16:52                         ` Wei Liu
  0 siblings, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-15 16:45 UTC (permalink / raw)
  To: Wei Liu
  Cc: Stefano Stabellini, George Dunlap, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Hongyang Yang

On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
> On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
> > On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> > > Let's see if we can record this in xc image format. I haven't looked,
> > > but George mentioned that it might be possible to do so.
> > 
> > Can this not be done at the save stage in the bit where we update the
> > JSON to reflect the actual configuration?
> > 
> 
> Yes, it's possible to do that. The value is not in libxl idl simply
> because it's always not there.

We can't set the existing maxmem then?

> But this might not help COLO / Remus.  If they don't use that API to
> retrieve domain configuration, a stale value might still get sent to the
> other end.
> 
> Wei.
> 
> > Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 16:45                       ` Ian Campbell
@ 2015-04-15 16:52                         ` Wei Liu
  2015-04-15 18:24                           ` Don Slutz
  2015-04-16  9:00                           ` Ian Campbell
  0 siblings, 2 replies; 47+ messages in thread
From: Wei Liu @ 2015-04-15 16:52 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Wei Liu, Stefano Stabellini, George Dunlap, Andrew Cooper,
	Ian Jackson, Don Slutz, xen-devel, Shriram Rajagopalan,
	Hongyang Yang

On Wed, Apr 15, 2015 at 05:45:15PM +0100, Ian Campbell wrote:
> On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
> > On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
> > > On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> > > > Let's see if we can record this in xc image format. I haven't looked,
> > > > but George mentioned that it might be possible to do so.
> > > 
> > > Can this not be done at the save stage in the bit where we update the
> > > JSON to reflect the actual configuration?
> > > 
> > 
> > Yes, it's possible to do that. The value is not in libxl idl simply
> > because it's always not there.
> 
> We can't set the existing maxmem then?
> 

If the "we" in your question means an applications that use libxl's
public interface, then no. It's not in IDL. It's not in xenstore. That
value is currently managed by libxl + libxc + xen.

Wei.

> > But this might not help COLO / Remus.  If they don't use that API to
> > retrieve domain configuration, a stale value might still get sent to the
> > other end.
> > 
> > Wei.
> > 
> > > Ian.
> 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 14:30 ` Ian Campbell
@ 2015-04-15 18:17   ` Don Slutz
  0 siblings, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-15 18:17 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Wei Liu, Stefano Stabellini, Ian Jackson, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On 04/15/15 10:30, Ian Campbell wrote:
> On Mon, 2015-04-13 at 12:09 -0400, Don Slutz wrote:
>>  
>> +/* Leave some slack so that hvmloader does not complain about lack of
>> + * memory at boot time ("Could not allocate order=0 extent").
>> + * Once hvmloader is modified to cope with that situation without
>> + * printing warning messages, QEMU_SPARE_PAGES can be removed.
> 
> How and where is hvmloader involved in a domain restore? 
> 

If the domU is created paused, saved, and restored, hvmloader will be
the 1st thing to execute.  This comment is in QEMU and this old (bad)
way needed to leave room for hvmloader.

My understanding is that when xc_domain_populate_physmap_exact() returns
an error (which outputs and error on the console), hvmloader
will "correctly handle this.

   -Don Slutz

> Ian.
> 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 11:56                 ` Ian Campbell
@ 2015-04-15 18:19                   ` Don Slutz
  0 siblings, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-15 18:19 UTC (permalink / raw)
  To: Ian Campbell, Stefano Stabellini
  Cc: Wei Liu, Andrew Cooper, Ian Jackson, xen-devel,
	Shriram Rajagopalan, Hongyang Yang

On 04/15/15 07:56, Ian Campbell wrote:
> On Wed, 2015-04-15 at 12:27 +0100, Stefano Stabellini wrote:
>> On Wed, 15 Apr 2015, Hongyang Yang wrote:
>>> On 04/15/2015 06:09 PM, Ian Campbell wrote:
>>>> On Wed, 2015-04-15 at 10:46 +0100, Stefano Stabellini wrote:
>>>>> On Tue, 14 Apr 2015, Don Slutz wrote:
>>>>>> On 04/14/15 04:53, Wei Liu wrote:
>>>>>>> Is there a way to know how much more memory each option rom needs? If
>>>>>>> so, you can correctly account for the extra memory you need. This
>>>>>>> would
>>>>>>> be an ideal fix to this problem.
>>>>>>
>>>>>> I do not know of a way to get this info.  It can change based on the
>>>>>> QEMU version.
>>>>>
>>>>> Indeed it would be fragile to rely on a fixed size for each option rom,
>>>>> given that they come out of the QEMU tree and can change at any time.
>>>>
>>>> Only having dipped into this thread so far it seems to me we need some
>>>> way for qemu to communicate the result of its manipulations of maxmem
>>>> into the migration stream explicitly to be picked up by the other end.
>>>
>>> Totally agreed.
>>
>> Xen knows exactly the maxmem setting for the domain, I don't think we
>> need one more notification from QEMU?
>> Libxl/Xen could just write the maxmem value to the migration stream.
> 
> If that is all which is needed and not the delta between the notional
> maxmem which the guest was configured with and what we actually gave it
> due to things such as option ROMs, then sure.
> 

This is what v2 of this patch does.

   -Don Slutz

> Ian.
> 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 16:52                         ` Wei Liu
@ 2015-04-15 18:24                           ` Don Slutz
  2015-04-16  9:00                           ` Ian Campbell
  1 sibling, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-15 18:24 UTC (permalink / raw)
  To: Wei Liu, Ian Campbell
  Cc: Stefano Stabellini, George Dunlap, Andrew Cooper, Ian Jackson,
	xen-devel, Shriram Rajagopalan, Hongyang Yang

On 04/15/15 12:52, Wei Liu wrote:
> On Wed, Apr 15, 2015 at 05:45:15PM +0100, Ian Campbell wrote:
>> On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
>>> On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
>>>> On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
>>>>> Let's see if we can record this in xc image format. I haven't looked,
>>>>> but George mentioned that it might be possible to do so.
>>>>
>>>> Can this not be done at the save stage in the bit where we update the
>>>> JSON to reflect the actual configuration?
>>>>
>>>
>>> Yes, it's possible to do that. The value is not in libxl idl simply
>>> because it's always not there.
>>
>> We can't set the existing maxmem then?
>>
> 
> If the "we" in your question means an applications that use libxl's
> public interface, then no. It's not in IDL. It's not in xenstore. That
> value is currently managed by libxl + libxc + xen.
> 

And just to be clear all this is related to LIBXL_MAXMEM_CONSTANT.
Which is not in JSON, nor xenstore, nor libxc, nor xen.

   -Don Slutz

> Wei.
> 
>>> But this might not help COLO / Remus.  If they don't use that API to
>>> retrieve domain configuration, a stale value might still get sent to the
>>> other end.
>>>
>>> Wei.
>>>
>>>> Ian.
>>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 14:32             ` Ian Campbell
@ 2015-04-15 20:41               ` Don Slutz
  0 siblings, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-15 20:41 UTC (permalink / raw)
  To: Ian Campbell, Hongyang Yang
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	xen-devel, Shriram Rajagopalan

On 04/15/15 10:32, Ian Campbell wrote:
> On Tue, 2015-04-14 at 17:42 +0800, Hongyang Yang wrote:
>>
>> On 04/14/2015 05:28 PM, Andrew Cooper wrote:
> 
>>> Funny you should ask that.  Migrationv2 for libxl moves the JSON config
>>> blob into the libxl stream, rather than being a singleshot action at the
>>> very start.  From that point, it becomes plausible to send a new JSON
>>> blob when an update on the source side occurs.
>>
>> That should solve the problem, but Migrationv2 for libxl won't be in upstream
>> for the moment, the bug still exists, is there a way to solve the problem
>> now under v1 migration?
> 
> As far as I am concerned migration v2 is a prerequisite for COLO. We
> won't be taking any major changes to the v1 code at this point.
> 

So is:

Date: Tue, 14 Apr 2015 18:06:26 -0400
Subject: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
Thread-Topic: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
Thread-Index: AdB2/0pJoAZieMWqTomVEBNbV7iiEA==
Message-ID: <1429049186-27343-1-git-send-email-dslutz@verizon.com>
References: <20150414175437.GB11717@zion.uk.xensource.com>
In-Reply-To: <20150414175437.GB11717@zion.uk.xensource.com>

a major change to the v1 code?

   -Don Slutz


> Ian.
> 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/1] tools: Handle xc_maxmem adjustments
  2015-04-15 11:48                           ` Stefano Stabellini
@ 2015-04-15 21:43                             ` Don Slutz
  0 siblings, 0 replies; 47+ messages in thread
From: Don Slutz @ 2015-04-15 21:43 UTC (permalink / raw)
  To: Stefano Stabellini, Andrew Cooper
  Cc: Wei Liu, Ian Campbell, Ian Jackson, xen-devel,
	Shriram Rajagopalan, Yang Hongyang

On 04/15/15 07:48, Stefano Stabellini wrote:
> On Wed, 15 Apr 2015, Andrew Cooper wrote:
>> On 15/04/15 10:57, Stefano Stabellini wrote:
>>> On Wed, 15 Apr 2015, Andrew Cooper wrote:
>>>> On 14/04/15 23:06, Don Slutz wrote:
>>>>> This fixes an issue where "xl save" followed by "xl restore" reports:
>>>>> "xc: error: Failed to allocate memory for batch.!: Internal error"
>>>>>
>>>>> One of the ways to get into this state is to have more then 4 e1000
>>>>> nics configured.
>>>>>
>>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>>> I still don't think this is the correct solution, although I will
>>>> concede that this is a far better patch than v1.
>>>>
>>>> Going back to the original problem, why does Qemu need to change maxmem
>>>> in the first place?  You talk about e1000 option roms, but the option
>>>> roms themselves must be allocated in an appropriate PCI bridge window.
>>> QEMU allocates actual memory to store the option roms using
>>> xc_domain_populate_physmap_exact, increasing the total amount of ram
>>> allocated to the guest. QEMU needs to increase maxmem too, otherwise
>>> memory allocations will fail after a few option roms.
>>
>> It doesn't need to.  The domain already has that much ram allocated to
>> itself and otherwise wasted.  Qemu should reuse that ram rather than
>> wasting more.

This "otherwise wasted" I do not understand at all.

for:

maxmem = "2056"
memory = "2056"
mmio_hole = "3072"
videoram = "8"
vif = [
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:a0",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:aa",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:b4",
 "model=e1000,bridge=xenbr0,mac=00:0c:29:86:44:be",
]


xc: detail: VIRTUAL MEMORY ARRANGEMENT:
xc: detail:   Loader:   0000000000100000->00000000001c4a1c
xc: detail:   Modules:  0000000000000000->0000000000000000
xc: detail:   TOTAL:    0000000000000000->0000000080000000
xc: detail:   ENTRY:    0000000000100038
xc: detail: PHYSICAL MEMORY ALLOCATION:
xc: detail:   4KB PAGES: 0x0000000000000200
xc: detail:   2MB PAGES: 0x00000000000001ff
xc: detail:   1GB PAGES: 0x0000000000000001


xl list:

e1000x4-2g                                  36  2057     1     --p---
    0.0

2057 is 2056 + LIBXL_MAXMEM_CONSTANT

As reported by xc_domain_getinfo()

info.max_memkb == 2106432 ==> pages 526608
info.nr_pages  == 526592  ==> 2106368 KiB

So there are 16 "wasted" pages, part of the pages from physical address
0xa0000 to 0xbffff.

hvmloader:

(d36) [2015-04-15 21:03:45] BIOS map:
(d36) [2015-04-15 21:03:45]  10000-100d3: Scratch space
(d36) [2015-04-15 21:03:45]  c0000-fffff: Main BIOS
(d36) [2015-04-15 21:03:45] E820 table:
(d36) [2015-04-15 21:03:45]  [00]: 00000000:00000000 -
00000000:000a0000: RAM
(d36) [2015-04-15 21:03:45]  HOLE: 00000000:000a0000 - 00000000:000c0000
(d36) [2015-04-15 21:03:45]  [01]: 00000000:000c0000 -
00000000:00100000: RESERVED
(d36) [2015-04-15 21:03:45]  [02]: 00000000:00100000 -
00000000:40000000: RAM
(d36) [2015-04-15 21:03:45]  HOLE: 00000000:40000000 - 00000000:fc000000
(d36) [2015-04-15 21:03:45]  [03]: 00000000:fc000000 -
00000001:00000000: RESERVED
(d36) [2015-04-15 21:03:45]  [04]: 00000001:00000000 -
00000001:40000000: RAM


Linux reports:

BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000003ffff000 (usable)
 BIOS-e820: 000000003ffff000 - 0000000040000000 (reserved)
 BIOS-e820: 00000000fc000000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000000140000000 (usable)


Note: the e820 map is missing the VGA hole of 0xa0000 - 0xc0000.

So I see 16 pages < 1GiB below 4GiB and 1GiB above 4Gib
8 MiB of video RAM, which is real close to the 2056 KiB
of ram the guest was configured with.

If I drop all the e1000 nics (the QEMU option ROMs
that contain code that is executed by the guest to PXE boot
from SeaBIOS):

xc: detail: VIRTUAL MEMORY ARRANGEMENT:
xc: detail:   Loader:   0000000000100000->00000000001c4a1c
xc: detail:   Modules:  0000000000000000->0000000000000000
xc: detail:   TOTAL:    0000000000000000->0000000080000000
xc: detail:   ENTRY:    0000000000100038
xc: detail: PHYSICAL MEMORY ALLOCATION:
xc: detail:   4KB PAGES: 0x0000000000000200
xc: detail:   2MB PAGES: 0x00000000000001ff
xc: detail:   1GB PAGES: 0x0000000000000001


(d2) [2015-04-15 21:35:18]  c0000-fffff: Main BIOS
(d2) [2015-04-15 21:35:18] E820 table:
(d2) [2015-04-15 21:35:18]  [00]: 00000000:00000000 - 00000000:000a0000: RAM
(d2) [2015-04-15 21:35:18]  HOLE: 00000000:000a0000 - 00000000:000c0000
(d2) [2015-04-15 21:35:18]  [01]: 00000000:000c0000 - 00000000:00100000:
RESERVED
(d2) [2015-04-15 21:35:18]  [02]: 00000000:00100000 - 00000000:40000000: RAM
(d2) [2015-04-15 21:35:18]  HOLE: 00000000:40000000 - 00000000:fc000000
(d2) [2015-04-15 21:35:18]  [03]: 00000000:fc000000 - 00000001:00000000:
RESERVED
(d2) [2015-04-15 21:35:18]  [04]: 00000001:00000000 - 00000001:40000000: RAM
(d2) [2015-04-15 21:35:18] Invoking SeaBIOS ...
...
(d2) [2015-04-15 21:35:21] Returned 258048 bytes of ZoneHigh
(d2) [2015-04-15 21:35:21] e820 map has 7 items:
(d2) [2015-04-15 21:35:21]   0: 0000000000000000 - 000000000009fc00 = 1 RAM
(d2) [2015-04-15 21:35:21]   1: 000000000009fc00 - 00000000000a0000 = 2
RESERVED
(d2) [2015-04-15 21:35:21]   2: 00000000000f0000 - 0000000000100000 = 2
RESERVED
(d2) [2015-04-15 21:35:21]   3: 0000000000100000 - 000000003ffff000 = 1 RAM
(d2) [2015-04-15 21:35:21]   4: 000000003ffff000 - 0000000040000000 = 2
RESERVED
(d2) [2015-04-15 21:35:21]   5: 00000000fc000000 - 0000000100000000 = 2
RESERVED
(d2) [2015-04-15 21:35:21]   6: 0000000100000000 - 0000000140000000 = 1 RAM

And linux:

BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
 BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000003ffff000 (usable)
 BIOS-e820: 000000003ffff000 - 0000000040000000 (reserved)
 BIOS-e820: 00000000fc000000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000000140000000 (usable)



max_pages=526592 (2106368(0x202400) KiB) nr_pages=526336
(2105344(0x202000) KiB) free_pages=256

And with LIBXL_MAXMEM_CONSTANT=1024, the 256 "free (wasted?)"
pages are this.

> 
> QEMU has no knowledge about what the e820 contains and considers the
> memory relocated from the PCI hole to the top of ram as valid guest
> memory. Libxl tells QEMU how big the PCI hole is by passing
> max-ram-below-4g to QEMU.
> 
> I think that we should fix the guest e820 as it is actually valid guest
> memory.
> 

If you are talking about the 1GiB over 4GiB in my example, it is already
in the e820 map.

So I do not understand the "wasted" ram issue.

    -Don Slutz

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-15 16:52                         ` Wei Liu
  2015-04-15 18:24                           ` Don Slutz
@ 2015-04-16  9:00                           ` Ian Campbell
  2015-04-16  9:14                             ` George Dunlap
  1 sibling, 1 reply; 47+ messages in thread
From: Ian Campbell @ 2015-04-16  9:00 UTC (permalink / raw)
  To: Wei Liu
  Cc: Stefano Stabellini, George Dunlap, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Hongyang Yang

On Wed, 2015-04-15 at 17:52 +0100, Wei Liu wrote:
> On Wed, Apr 15, 2015 at 05:45:15PM +0100, Ian Campbell wrote:
> > On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
> > > On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
> > > > On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> > > > > Let's see if we can record this in xc image format. I haven't looked,
> > > > > but George mentioned that it might be possible to do so.
> > > > 
> > > > Can this not be done at the save stage in the bit where we update the
> > > > JSON to reflect the actual configuration?
> > > > 
> > > 
> > > Yes, it's possible to do that. The value is not in libxl idl simply
> > > because it's always not there.
> > 
> > We can't set the existing maxmem then?
> > 
> 
> If the "we" in your question means an applications that use libxl's
> public interface, then no. It's not in IDL. It's not in xenstore. That
> value is currently managed by libxl + libxc + xen.

I was thinking of libxl_domain_build_info.max_memkb, which is in a
struct which is marshalled over the wire and which could be updated on
save to reflect the actual usage.

Does using that field not work for some reason?

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-16  9:00                           ` Ian Campbell
@ 2015-04-16  9:14                             ` George Dunlap
  2015-04-16  9:29                               ` Ian Campbell
  0 siblings, 1 reply; 47+ messages in thread
From: George Dunlap @ 2015-04-16  9:14 UTC (permalink / raw)
  To: Ian Campbell, Wei Liu
  Cc: Stefano Stabellini, Andrew Cooper, Ian Jackson, Don Slutz,
	xen-devel, Shriram Rajagopalan, Hongyang Yang

On 04/16/2015 10:00 AM, Ian Campbell wrote:
> On Wed, 2015-04-15 at 17:52 +0100, Wei Liu wrote:
>> On Wed, Apr 15, 2015 at 05:45:15PM +0100, Ian Campbell wrote:
>>> On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
>>>> On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
>>>>> On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
>>>>>> Let's see if we can record this in xc image format. I haven't looked,
>>>>>> but George mentioned that it might be possible to do so.
>>>>>
>>>>> Can this not be done at the save stage in the bit where we update the
>>>>> JSON to reflect the actual configuration?
>>>>>
>>>>
>>>> Yes, it's possible to do that. The value is not in libxl idl simply
>>>> because it's always not there.
>>>
>>> We can't set the existing maxmem then?
>>>
>>
>> If the "we" in your question means an applications that use libxl's
>> public interface, then no. It's not in IDL. It's not in xenstore. That
>> value is currently managed by libxl + libxc + xen.
> 
> I was thinking of libxl_domain_build_info.max_memkb, which is in a
> struct which is marshalled over the wire and which could be updated on
> save to reflect the actual usage.
> 
> Does using that field not work for some reason?

The problem, I think, is that max_memkb says how much memory is
*reported to the guest*.  So what happens when the VM on the other side
reboots?  Won't libxl use this newly-increased max_memkb as the memory
to be reported *to the guest*?  And then qemu will have to allocate *yet
more* memory for its roms?

Meaning that the size of the VM will increase by a few kB (MB?) every
time it reboots?

This is why I think we should try to start making a clear distinction
between "what the guest sees as RAM" and "what xen sees as memory"; And
I proposed using "memory" for what the guest sees, and "pages" for what
Xen sees.

 -George

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory
  2015-04-16  9:14                             ` George Dunlap
@ 2015-04-16  9:29                               ` Ian Campbell
  0 siblings, 0 replies; 47+ messages in thread
From: Ian Campbell @ 2015-04-16  9:29 UTC (permalink / raw)
  To: George Dunlap
  Cc: Wei Liu, Stefano Stabellini, Andrew Cooper, Ian Jackson,
	Don Slutz, xen-devel, Shriram Rajagopalan, Hongyang Yang

On Thu, 2015-04-16 at 10:14 +0100, George Dunlap wrote:
> On 04/16/2015 10:00 AM, Ian Campbell wrote:
> > On Wed, 2015-04-15 at 17:52 +0100, Wei Liu wrote:
> >> On Wed, Apr 15, 2015 at 05:45:15PM +0100, Ian Campbell wrote:
> >>> On Wed, 2015-04-15 at 17:36 +0100, Wei Liu wrote:
> >>>> On Wed, Apr 15, 2015 at 03:34:48PM +0100, Ian Campbell wrote:
> >>>>> On Tue, 2015-04-14 at 18:54 +0100, Wei Liu wrote:
> >>>>>> Let's see if we can record this in xc image format. I haven't looked,
> >>>>>> but George mentioned that it might be possible to do so.
> >>>>>
> >>>>> Can this not be done at the save stage in the bit where we update the
> >>>>> JSON to reflect the actual configuration?
> >>>>>
> >>>>
> >>>> Yes, it's possible to do that. The value is not in libxl idl simply
> >>>> because it's always not there.
> >>>
> >>> We can't set the existing maxmem then?
> >>>
> >>
> >> If the "we" in your question means an applications that use libxl's
> >> public interface, then no. It's not in IDL. It's not in xenstore. That
> >> value is currently managed by libxl + libxc + xen.
> > 
> > I was thinking of libxl_domain_build_info.max_memkb, which is in a
> > struct which is marshalled over the wire and which could be updated on
> > save to reflect the actual usage.
> > 
> > Does using that field not work for some reason?
> 
> The problem, I think, is that max_memkb says how much memory is
> *reported to the guest*.  So what happens when the VM on the other side
> reboots?  Won't libxl use this newly-increased max_memkb as the memory
> to be reported *to the guest*?  And then qemu will have to allocate *yet
> more* memory for its roms?

Ah, yes, I think you are right, that's pretty dumb.

> Meaning that the size of the VM will increase by a few kB (MB?) every
> time it reboots?
> 
> This is why I think we should try to start making a clear distinction
> between "what the guest sees as RAM" and "what xen sees as memory"; And
> I proposed using "memory" for what the guest sees, and "pages" for what
> Xen sees.

That might help, as might my remembering about
docs/misc/libxl_memory.txt more often.

Ian.

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2015-04-16  9:29 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-13 16:09 [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Don Slutz
2015-04-13 16:20 ` Wei Liu
2015-04-13 23:51   ` Don Slutz
2015-04-13 16:25 ` Andrew Cooper
2015-04-13 23:51   ` Don Slutz
2015-04-14  8:53     ` Wei Liu
2015-04-14 17:34       ` Don Slutz
2015-04-14 17:51         ` Wei Liu
2015-04-15  9:46         ` Stefano Stabellini
2015-04-15 10:09           ` Ian Campbell
2015-04-15 11:15             ` Hongyang Yang
2015-04-15 11:27               ` Stefano Stabellini
2015-04-15 11:56                 ` Ian Campbell
2015-04-15 18:19                   ` Don Slutz
2015-04-14  8:53     ` Andrew Cooper
2015-04-14  9:22       ` Hongyang Yang
2015-04-14  9:28         ` Andrew Cooper
2015-04-14  9:42           ` Hongyang Yang
2015-04-15 14:32             ` Ian Campbell
2015-04-15 20:41               ` Don Slutz
2015-04-14  9:29         ` Wei Liu
2015-04-14  9:40           ` Hongyang Yang
2015-04-14  9:52             ` Wei Liu
2015-04-14 17:43               ` Don Slutz
2015-04-14 17:54                 ` Wei Liu
2015-04-14 22:06                   ` [PATCH v2 1/1] tools: Handle xc_maxmem adjustments Don Slutz
2015-04-15  9:53                     ` Andrew Cooper
2015-04-15  9:57                       ` Stefano Stabellini
2015-04-15 10:03                         ` Andrew Cooper
2015-04-15 10:21                           ` George Dunlap
2015-04-15 11:48                           ` Stefano Stabellini
2015-04-15 21:43                             ` Don Slutz
2015-04-15 10:16                       ` George Dunlap
2015-04-15 10:25                         ` George Dunlap
2015-04-15 10:30                           ` Andrew Cooper
2015-04-15 14:34                   ` [PATCH 1/1] xc_domain_restore: Allow QEMU to increase memory Ian Campbell
2015-04-15 16:36                     ` Wei Liu
2015-04-15 16:45                       ` Ian Campbell
2015-04-15 16:52                         ` Wei Liu
2015-04-15 18:24                           ` Don Slutz
2015-04-16  9:00                           ` Ian Campbell
2015-04-16  9:14                             ` George Dunlap
2015-04-16  9:29                               ` Ian Campbell
2015-04-14  3:46 ` Hongyang Yang
2015-04-14  8:46   ` Wei Liu
2015-04-15 14:30 ` Ian Campbell
2015-04-15 18:17   ` Don Slutz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.