All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers
       [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
@ 2015-03-25 20:08 ` Jim Fehlig
  2015-04-01  9:55   ` [libvirt] " Martin Kletzander
  2015-03-25 20:08 ` [PATCH 2/3] libxl: acquire a job when destroying a domain Jim Fehlig
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Jim Fehlig @ 2015-03-25 20:08 UTC (permalink / raw)
  To: libvir-list; +Cc: Jim Fehlig, xen-devel

Let callers of libxlDomainStart decide when it is appropriate to
acquire a job on the associated virDomainObj.

Signed-off-by: Jim Fehlig <jfehlig@suse.com>
---
 src/libxl/libxl_domain.c | 24 ++++++++--------------
 src/libxl/libxl_driver.c | 53 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index 8feb4dc..eca1ff0 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -907,16 +907,13 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
 
     libxl_domain_config_init(&d_config);
 
-    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
-        return ret;
-
     cfg = libxlDriverConfigGet(driver);
     /* If there is a managed saved state restore it instead of starting
      * from scratch. The old state is removed once the restoring succeeded. */
     if (restore_fd < 0) {
         managed_save_path = libxlDomainManagedSavePath(driver, vm);
         if (managed_save_path == NULL)
-            goto endjob;
+            goto cleanup;
 
         if (virFileExists(managed_save_path)) {
 
@@ -924,7 +921,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
                                                        managed_save_path,
                                                        &def, &hdr);
             if (managed_save_fd < 0)
-                goto endjob;
+                goto cleanup;
 
             restore_fd = managed_save_fd;
 
@@ -938,7 +935,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
                                _("cannot restore domain '%s' uuid %s from a file"
                                  " which belongs to domain '%s' uuid %s"),
                                vm->def->name, vm_uuidstr, def->name, def_uuidstr);
-                goto endjob;
+                goto cleanup;
             }
 
             virDomainObjAssignDef(vm, def, true, NULL);
@@ -955,18 +952,18 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
 
     if (libxlBuildDomainConfig(driver->reservedVNCPorts, vm->def,
                                cfg->ctx, &d_config) < 0)
-        goto endjob;
+        goto cleanup;
 
     if (cfg->autoballoon && libxlDomainFreeMem(cfg->ctx, &d_config) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("libxenlight failed to get free memory for domain '%s'"),
                        d_config.c_info.name);
-        goto endjob;
+        goto cleanup;
     }
 
     if (virHostdevPrepareDomainDevices(hostdev_mgr, LIBXL_DRIVER_NAME,
                                        vm->def, VIR_HOSTDEV_SP_PCI) < 0)
-        goto endjob;
+        goto cleanup;
 
     /* Unlock virDomainObj while creating the domain */
     virObjectUnlock(vm);
@@ -998,7 +995,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
             virReportError(VIR_ERR_INTERNAL_ERROR,
                            _("libxenlight failed to restore domain '%s'"),
                            d_config.c_info.name);
-        goto endjob;
+        goto cleanup;
     }
 
     /*
@@ -1045,7 +1042,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
         libxlDomainEventQueue(driver, event);
 
     ret = 0;
-    goto endjob;
+    goto cleanup;
 
  cleanup_dom:
     if (priv->deathW) {
@@ -1056,10 +1053,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm,
     vm->def->id = -1;
     virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, VIR_DOMAIN_SHUTOFF_FAILED);
 
- endjob:
-    if (!libxlDomainObjEndJob(driver, vm))
-        vm = NULL;
-
+ cleanup:
     libxl_domain_config_dispose(&d_config);
     VIR_FREE(dom_xml);
     VIR_FREE(managed_save_path);
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index 05f6eb1..da4c1c7 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -303,18 +303,26 @@ libxlAutostartDomain(virDomainObjPtr vm,
     virObjectLock(vm);
     virResetLastError();
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
+        virObjectUnlock(vm);
+        return ret;
+    }
+
     if (vm->autostart && !virDomainObjIsActive(vm) &&
         libxlDomainStart(driver, vm, false, -1) < 0) {
         err = virGetLastError();
         VIR_ERROR(_("Failed to autostart VM '%s': %s"),
                   vm->def->name,
                   err ? err->message : _("unknown error"));
-        goto cleanup;
+        goto endjob;
     }
 
     ret = 0;
- cleanup:
-    virObjectUnlock(vm);
+
+ endjob:
+    if (libxlDomainObjEndJob(driver, vm))
+        virObjectUnlock(vm);
+
     return ret;
 }
 
@@ -885,17 +893,26 @@ libxlDomainCreateXML(virConnectPtr conn, const char *xml,
         goto cleanup;
     def = NULL;
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
+        virDomainObjListRemove(driver->domains, vm);
+        vm = NULL;
+        goto cleanup;
+    }
+
     if (libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0,
                      -1) < 0) {
         virDomainObjListRemove(driver->domains, vm);
-        vm = NULL;
-        goto cleanup;
+        goto endjob;
     }
 
     dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
     if (dom)
         dom->id = vm->def->id;
 
+ endjob:
+    if (!libxlDomainObjEndJob(driver, vm))
+        vm = NULL;
+
  cleanup:
     virDomainDefFree(def);
     if (vm)
@@ -1681,7 +1698,7 @@ libxlDomainRestoreFlags(virConnectPtr conn, const char *from,
 
     fd = libxlDomainSaveImageOpen(driver, cfg, from, &def, &hdr);
     if (fd < 0)
-        return -1;
+        goto cleanup;
 
     if (virDomainRestoreFlagsEnsureACL(conn, def) < 0)
         goto cleanup;
@@ -1695,11 +1712,20 @@ libxlDomainRestoreFlags(virConnectPtr conn, const char *from,
 
     def = NULL;
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
+        if (!vm->persistent) {
+            virDomainObjListRemove(driver->domains, vm);
+            vm = NULL;
+        }
+        goto cleanup;
+    }
+
     ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_SAVE_PAUSED) != 0, fd);
-    if (ret < 0 && !vm->persistent) {
+    if (ret < 0 && !vm->persistent)
         virDomainObjListRemove(driver->domains, vm);
+
+    if (!libxlDomainObjEndJob(driver, vm))
         vm = NULL;
-    }
 
  cleanup:
     if (VIR_CLOSE(fd) < 0)
@@ -2567,17 +2593,24 @@ libxlDomainCreateWithFlags(virDomainPtr dom,
     if (virDomainCreateWithFlagsEnsureACL(dom->conn, vm->def) < 0)
         goto cleanup;
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
+        goto cleanup;
+
     if (virDomainObjIsActive(vm)) {
         virReportError(VIR_ERR_OPERATION_INVALID,
                        "%s", _("Domain is already running"));
-        goto cleanup;
+        goto endjob;
     }
 
     ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1);
     if (ret < 0)
-        goto cleanup;
+        goto endjob;
     dom->id = vm->def->id;
 
+ endjob:
+    if (!libxlDomainObjEndJob(driver, vm))
+        vm = NULL;
+
  cleanup:
     if (vm)
         virObjectUnlock(vm);
-- 
1.8.4.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] libxl: acquire a job when destroying a domain
       [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
  2015-03-25 20:08 ` [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers Jim Fehlig
@ 2015-03-25 20:08 ` Jim Fehlig
  2015-03-25 20:08 ` [PATCH 3/3] libxl: drop virDomainObj lock " Jim Fehlig
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-25 20:08 UTC (permalink / raw)
  To: libvir-list; +Cc: Jim Fehlig, xen-devel

A job should be acquired at the beginning of a domain destroy operation,
not at the end when cleaning up the domain.  Fix two occurances of this
late job acquisition in the libxl driver.  Doing so renders
libxlDomainCleanup unused, so it is removed.

Signed-off-by: Jim Fehlig <jfehlig@suse.com>
---
 src/libxl/libxl_domain.c | 49 +++++++++++++++++-------------------------------
 src/libxl/libxl_domain.h |  4 ----
 src/libxl/libxl_driver.c | 20 ++++++++++++--------
 3 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index eca1ff0..e240eae 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -371,6 +371,9 @@ libxlDomainShutdownThread(void *opaque)
 
     cfg = libxlDriverConfigGet(driver);
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
+        goto cleanup;
+
     if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) {
         dom_event = virDomainEventLifecycleNewFromObj(vm,
                                            VIR_DOMAIN_EVENT_STOPPED,
@@ -384,7 +387,7 @@ libxlDomainShutdownThread(void *opaque)
             goto restart;
         case VIR_DOMAIN_LIFECYCLE_PRESERVE:
         case VIR_DOMAIN_LIFECYCLE_LAST:
-            goto cleanup;
+            goto endjob;
         }
     } else if (xl_reason == LIBXL_SHUTDOWN_REASON_CRASH) {
         dom_event = virDomainEventLifecycleNewFromObj(vm,
@@ -399,7 +402,7 @@ libxlDomainShutdownThread(void *opaque)
             goto restart;
         case VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE:
         case VIR_DOMAIN_LIFECYCLE_CRASH_LAST:
-            goto cleanup;
+            goto endjob;
         case VIR_DOMAIN_LIFECYCLE_CRASH_COREDUMP_DESTROY:
             libxlDomainAutoCoreDump(driver, vm);
             goto destroy;
@@ -420,11 +423,11 @@ libxlDomainShutdownThread(void *opaque)
             goto restart;
         case VIR_DOMAIN_LIFECYCLE_PRESERVE:
         case VIR_DOMAIN_LIFECYCLE_LAST:
-            goto cleanup;
+            goto endjob;
         }
     } else {
         VIR_INFO("Unhandled shutdown_reason %d", xl_reason);
-        goto cleanup;
+        goto endjob;
     }
 
  destroy:
@@ -433,13 +436,11 @@ libxlDomainShutdownThread(void *opaque)
         dom_event = NULL;
     }
     libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
-    if (libxlDomainCleanupJob(driver, vm, reason)) {
-        if (!vm->persistent) {
-            virDomainObjListRemove(driver->domains, vm);
-            vm = NULL;
-        }
-    }
-    goto cleanup;
+    libxlDomainCleanup(driver, vm, reason);
+    if (!vm->persistent)
+        virDomainObjListRemove(driver->domains, vm);
+
+    goto endjob;
 
  restart:
     if (dom_event) {
@@ -447,13 +448,17 @@ libxlDomainShutdownThread(void *opaque)
         dom_event = NULL;
     }
     libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
-    libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
+    libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
     if (libxlDomainStart(driver, vm, false, -1) < 0) {
         virErrorPtr err = virGetLastError();
         VIR_ERROR(_("Failed to restart VM '%s': %s"),
                   vm->def->name, err ? err->message : _("unknown error"));
     }
 
+ endjob:
+    if (!libxlDomainObjEndJob(driver, vm))
+        vm = NULL;
+
  cleanup:
     if (vm)
         virObjectUnlock(vm);
@@ -690,26 +695,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver,
 }
 
 /*
- * Cleanup function for domain that has reached shutoff state.
- * Executed in the context of a job.
- *
- * virDomainObjPtr should be locked on invocation
- * Returns true if references remain on virDomainObjPtr, false otherwise.
- */
-bool
-libxlDomainCleanupJob(libxlDriverPrivatePtr driver,
-                      virDomainObjPtr vm,
-                      virDomainShutoffReason reason)
-{
-    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_DESTROY) < 0)
-        return true;
-
-    libxlDomainCleanup(driver, vm, reason);
-
-    return libxlDomainObjEndJob(driver, vm);
-}
-
-/*
  * Core dump domain to default dump path.
  *
  * virDomainObjPtr must be locked on invocation
diff --git a/src/libxl/libxl_domain.h b/src/libxl/libxl_domain.h
index a032e46..30855a2 100644
--- a/src/libxl/libxl_domain.h
+++ b/src/libxl/libxl_domain.h
@@ -108,10 +108,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver,
                    virDomainObjPtr vm,
                    virDomainShutoffReason reason);
 
-bool
-libxlDomainCleanupJob(libxlDriverPrivatePtr driver,
-                      virDomainObjPtr vm,
-                      virDomainShutoffReason reason);
 /*
  * Note: Xen 4.3 removed the const from the event handler signature.
  * Detect which signature to use based on
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index da4c1c7..a1977c2 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -1238,10 +1238,13 @@ libxlDomainDestroyFlags(virDomainPtr dom,
     if (virDomainDestroyFlagsEnsureACL(dom->conn, vm->def) < 0)
         goto cleanup;
 
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
+        goto cleanup;
+
     if (!virDomainObjIsActive(vm)) {
         virReportError(VIR_ERR_OPERATION_INVALID,
                        "%s", _("Domain is not running"));
-        goto cleanup;
+        goto endjob;
     }
 
     event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
@@ -1250,18 +1253,19 @@ libxlDomainDestroyFlags(virDomainPtr dom,
     if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("Failed to destroy domain '%d'"), vm->def->id);
-        goto cleanup;
+        goto endjob;
     }
 
-    if (libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED)) {
-        if (!vm->persistent) {
-            virDomainObjListRemove(driver->domains, vm);
-            vm = NULL;
-        }
-    }
+    libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED);
+    if (!vm->persistent)
+        virDomainObjListRemove(driver->domains, vm);
 
     ret = 0;
 
+ endjob:
+    if (!libxlDomainObjEndJob(driver, vm))
+        vm = NULL;
+
  cleanup:
     if (vm)
         virObjectUnlock(vm);
-- 
1.8.4.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] libxl: drop virDomainObj lock when destroying a domain
       [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
  2015-03-25 20:08 ` [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers Jim Fehlig
  2015-03-25 20:08 ` [PATCH 2/3] libxl: acquire a job when destroying a domain Jim Fehlig
@ 2015-03-25 20:08 ` Jim Fehlig
  2015-03-26 15:14   ` Ian Campbell
       [not found]   ` <1427382869.13935.61.camel@citrix.com>
       [not found] ` <1427314116-14451-3-git-send-email-jfehlig@suse.com>
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-25 20:08 UTC (permalink / raw)
  To: libvir-list; +Cc: Jim Fehlig, xen-devel

A destroy operation can take considerable time on large memory
domains due to scrubbing the domain' memory.  The operation is
running in the context of a job, so unlocking the domain and
allowing query operations is safe.

Signed-off-by: Jim Fehlig <jfehlig@suse.com>
---
 src/libxl/libxl_domain.c | 4 ++++
 src/libxl/libxl_driver.c | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index e240eae..aef0157 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -435,7 +435,9 @@ libxlDomainShutdownThread(void *opaque)
         libxlDomainEventQueue(driver, dom_event);
         dom_event = NULL;
     }
+    virObjectUnlock(vm);
     libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
+    virObjectLock(vm);
     libxlDomainCleanup(driver, vm, reason);
     if (!vm->persistent)
         virDomainObjListRemove(driver->domains, vm);
@@ -447,7 +449,9 @@ libxlDomainShutdownThread(void *opaque)
         libxlDomainEventQueue(driver, dom_event);
         dom_event = NULL;
     }
+    virObjectUnlock(vm);
     libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
+    virObjectLock(vm);
     libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
     if (libxlDomainStart(driver, vm, false, -1) < 0) {
         virErrorPtr err = virGetLastError();
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index a1977c2..21e20bb 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -1250,7 +1250,10 @@ libxlDomainDestroyFlags(virDomainPtr dom,
     event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
 
-    if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) {
+    virObjectUnlock(vm);
+    ret = libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
+    virObjectLock(vm);
+    if (ret < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("Failed to destroy domain '%d'"), vm->def->id);
         goto endjob;
-- 
1.8.4.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] libxl: acquire a job when destroying a domain
       [not found] ` <1427314116-14451-3-git-send-email-jfehlig@suse.com>
@ 2015-03-26  1:32   ` Konrad Rzeszutek Wilk
       [not found]   ` <20150326013242.GA8605@konrad-lan.dumpdata.com>
  1 sibling, 0 replies; 15+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-03-26  1:32 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel

On Wed, Mar 25, 2015 at 02:08:35PM -0600, Jim Fehlig wrote:
> A job should be acquired at the beginning of a domain destroy operation,
> not at the end when cleaning up the domain.  Fix two occurances of this
> late job acquisition in the libxl driver.  Doing so renders
> libxlDomainCleanup unused, so it is removed.
> 
> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
> ---
>  src/libxl/libxl_domain.c | 49 +++++++++++++++++-------------------------------
>  src/libxl/libxl_domain.h |  4 ----
>  src/libxl/libxl_driver.c | 20 ++++++++++++--------
>  3 files changed, 29 insertions(+), 44 deletions(-)
> 
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index eca1ff0..e240eae 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -371,6 +371,9 @@ libxlDomainShutdownThread(void *opaque)
>  
>      cfg = libxlDriverConfigGet(driver);
>  
> +    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
> +        goto cleanup;
> +

Won't this a deadlock with 'libxlDomainAutoCoreDump'  (line 410) (which also
calls :

 727     if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
 728         goto cleanup;

well, not deadlock - but spin up to 30 seconds? and then give up?

Perhaps this patch should be folded in?

>From 9f2bac0c28815fc51850290c4b1962881691bfca Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 25 Mar 2015 22:34:51 -0400
Subject: [PATCH] squash

---
 src/libxl/libxl_domain.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
index aef0157..774b070 100644
--- a/src/libxl/libxl_domain.c
+++ b/src/libxl/libxl_domain.c
@@ -723,15 +723,11 @@ libxlDomainAutoCoreDump(libxlDriverPrivatePtr driver,
                     timestr) < 0)
         goto cleanup;
 
-    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
-        goto cleanup;
-
     /* Unlock virDomainObj while dumping core */
     virObjectUnlock(vm);
     libxl_domain_core_dump(cfg->ctx, vm->def->id, dumpfile, NULL);
     virObjectLock(vm);
 
-    ignore_value(libxlDomainObjEndJob(driver, vm));
     ret = 0;
 
  cleanup:
-- 
1.8.4.2

>      if (xl_reason == LIBXL_SHUTDOWN_REASON_POWEROFF) {
>          dom_event = virDomainEventLifecycleNewFromObj(vm,
>                                             VIR_DOMAIN_EVENT_STOPPED,
> @@ -384,7 +387,7 @@ libxlDomainShutdownThread(void *opaque)
>              goto restart;
>          case VIR_DOMAIN_LIFECYCLE_PRESERVE:
>          case VIR_DOMAIN_LIFECYCLE_LAST:
> -            goto cleanup;
> +            goto endjob;
>          }
>      } else if (xl_reason == LIBXL_SHUTDOWN_REASON_CRASH) {
>          dom_event = virDomainEventLifecycleNewFromObj(vm,
> @@ -399,7 +402,7 @@ libxlDomainShutdownThread(void *opaque)
>              goto restart;
>          case VIR_DOMAIN_LIFECYCLE_CRASH_PRESERVE:
>          case VIR_DOMAIN_LIFECYCLE_CRASH_LAST:
> -            goto cleanup;
> +            goto endjob;
>          case VIR_DOMAIN_LIFECYCLE_CRASH_COREDUMP_DESTROY:
>              libxlDomainAutoCoreDump(driver, vm);
>              goto destroy;
> @@ -420,11 +423,11 @@ libxlDomainShutdownThread(void *opaque)
>              goto restart;
>          case VIR_DOMAIN_LIFECYCLE_PRESERVE:
>          case VIR_DOMAIN_LIFECYCLE_LAST:
> -            goto cleanup;
> +            goto endjob;
>          }
>      } else {
>          VIR_INFO("Unhandled shutdown_reason %d", xl_reason);
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>   destroy:
> @@ -433,13 +436,11 @@ libxlDomainShutdownThread(void *opaque)
>          dom_event = NULL;
>      }
>      libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
> -    if (libxlDomainCleanupJob(driver, vm, reason)) {
> -        if (!vm->persistent) {
> -            virDomainObjListRemove(driver->domains, vm);
> -            vm = NULL;
> -        }
> -    }
> -    goto cleanup;
> +    libxlDomainCleanup(driver, vm, reason);
> +    if (!vm->persistent)
> +        virDomainObjListRemove(driver->domains, vm);
> +
> +    goto endjob;
>  
>   restart:
>      if (dom_event) {
> @@ -447,13 +448,17 @@ libxlDomainShutdownThread(void *opaque)
>          dom_event = NULL;
>      }
>      libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
> -    libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> +    libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
>      if (libxlDomainStart(driver, vm, false, -1) < 0) {
>          virErrorPtr err = virGetLastError();
>          VIR_ERROR(_("Failed to restart VM '%s': %s"),
>                    vm->def->name, err ? err->message : _("unknown error"));
>      }
>  
> + endjob:
> +    if (!libxlDomainObjEndJob(driver, vm))
> +        vm = NULL;
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> @@ -690,26 +695,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver,
>  }
>  
>  /*
> - * Cleanup function for domain that has reached shutoff state.
> - * Executed in the context of a job.
> - *
> - * virDomainObjPtr should be locked on invocation
> - * Returns true if references remain on virDomainObjPtr, false otherwise.
> - */
> -bool
> -libxlDomainCleanupJob(libxlDriverPrivatePtr driver,
> -                      virDomainObjPtr vm,
> -                      virDomainShutoffReason reason)
> -{
> -    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_DESTROY) < 0)
> -        return true;
> -
> -    libxlDomainCleanup(driver, vm, reason);
> -
> -    return libxlDomainObjEndJob(driver, vm);
> -}
> -
> -/*
>   * Core dump domain to default dump path.
>   *
>   * virDomainObjPtr must be locked on invocation
> diff --git a/src/libxl/libxl_domain.h b/src/libxl/libxl_domain.h
> index a032e46..30855a2 100644
> --- a/src/libxl/libxl_domain.h
> +++ b/src/libxl/libxl_domain.h
> @@ -108,10 +108,6 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver,
>                     virDomainObjPtr vm,
>                     virDomainShutoffReason reason);
>  
> -bool
> -libxlDomainCleanupJob(libxlDriverPrivatePtr driver,
> -                      virDomainObjPtr vm,
> -                      virDomainShutoffReason reason);
>  /*
>   * Note: Xen 4.3 removed the const from the event handler signature.
>   * Detect which signature to use based on
> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
> index da4c1c7..a1977c2 100644
> --- a/src/libxl/libxl_driver.c
> +++ b/src/libxl/libxl_driver.c
> @@ -1238,10 +1238,13 @@ libxlDomainDestroyFlags(virDomainPtr dom,
>      if (virDomainDestroyFlagsEnsureACL(dom->conn, vm->def) < 0)
>          goto cleanup;
>  
> +    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
>      if (!virDomainObjIsActive(vm)) {
>          virReportError(VIR_ERR_OPERATION_INVALID,
>                         "%s", _("Domain is not running"));
> -        goto cleanup;
> +        goto endjob;
>      }
>  
>      event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
> @@ -1250,18 +1253,19 @@ libxlDomainDestroyFlags(virDomainPtr dom,
>      if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) {
>          virReportError(VIR_ERR_INTERNAL_ERROR,
>                         _("Failed to destroy domain '%d'"), vm->def->id);
> -        goto cleanup;
> +        goto endjob;
>      }
>  
> -    if (libxlDomainCleanupJob(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED)) {
> -        if (!vm->persistent) {
> -            virDomainObjListRemove(driver->domains, vm);
> -            vm = NULL;
> -        }
> -    }
> +    libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED);
> +    if (!vm->persistent)
> +        virDomainObjListRemove(driver->domains, vm);
>  
>      ret = 0;
>  
> + endjob:
> +    if (!libxlDomainObjEndJob(driver, vm))
> +        vm = NULL;
> +
>   cleanup:
>      if (vm)
>          virObjectUnlock(vm);
> -- 
> 1.8.4.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] libxl: drop virDomainObj lock when destroying a domain
  2015-03-25 20:08 ` [PATCH 3/3] libxl: drop virDomainObj lock " Jim Fehlig
@ 2015-03-26 15:14   ` Ian Campbell
       [not found]   ` <1427382869.13935.61.camel@citrix.com>
  1 sibling, 0 replies; 15+ messages in thread
From: Ian Campbell @ 2015-03-26 15:14 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel

On Wed, 2015-03-25 at 14:08 -0600, Jim Fehlig wrote:
> A destroy operation can take considerable time on large memory
> domains due to scrubbing the domain' memory.  The operation is
> running in the context of a job, so unlocking the domain and
> allowing query operations is safe.
> 
> Signed-off-by: Jim Fehlig <jfehlig@suse.com>

I don't really know enough about the libvirt job/obj locking to comment
on the previous patches or that aspect of this one, but in general the
idea of dropping the lock before calling libxl_destroy seems reasonable
to me.

In principal you could use the async stuff (the final NULL to the call),
but you'd still be wanting to drop the lock for that period, so it's not
clear it's any better from your PoV.

Ian.

> ---
>  src/libxl/libxl_domain.c | 4 ++++
>  src/libxl/libxl_driver.c | 5 ++++-
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index e240eae..aef0157 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -435,7 +435,9 @@ libxlDomainShutdownThread(void *opaque)
>          libxlDomainEventQueue(driver, dom_event);
>          dom_event = NULL;
>      }
> +    virObjectUnlock(vm);
>      libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
> +    virObjectLock(vm);
>      libxlDomainCleanup(driver, vm, reason);
>      if (!vm->persistent)
>          virDomainObjListRemove(driver->domains, vm);
> @@ -447,7 +449,9 @@ libxlDomainShutdownThread(void *opaque)
>          libxlDomainEventQueue(driver, dom_event);
>          dom_event = NULL;
>      }
> +    virObjectUnlock(vm);
>      libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
> +    virObjectLock(vm);
>      libxlDomainCleanup(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
>      if (libxlDomainStart(driver, vm, false, -1) < 0) {
>          virErrorPtr err = virGetLastError();
> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
> index a1977c2..21e20bb 100644
> --- a/src/libxl/libxl_driver.c
> +++ b/src/libxl/libxl_driver.c
> @@ -1250,7 +1250,10 @@ libxlDomainDestroyFlags(virDomainPtr dom,
>      event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED,
>                                       VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
>  
> -    if (libxl_domain_destroy(cfg->ctx, vm->def->id, NULL) < 0) {
> +    virObjectUnlock(vm);
> +    ret = libxl_domain_destroy(cfg->ctx, vm->def->id, NULL);
> +    virObjectLock(vm);
> +    if (ret < 0) {
>          virReportError(VIR_ERR_INTERNAL_ERROR,
>                         _("Failed to destroy domain '%d'"), vm->def->id);
>          goto endjob;

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] libxl: acquire a job when destroying a domain
       [not found]   ` <20150326013242.GA8605@konrad-lan.dumpdata.com>
@ 2015-03-26 21:29     ` Jim Fehlig
       [not found]     ` <55147A4F.2000903@suse.com>
  1 sibling, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-26 21:29 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: libvir-list, xen-devel

Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 25, 2015 at 02:08:35PM -0600, Jim Fehlig wrote:
>   
>> A job should be acquired at the beginning of a domain destroy operation,
>> not at the end when cleaning up the domain.  Fix two occurances of this
>> late job acquisition in the libxl driver.  Doing so renders
>> libxlDomainCleanup unused, so it is removed.
>>     

Just noticed this should be libxlDomainCleanupJob.

>> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
>> ---
>>  src/libxl/libxl_domain.c | 49 +++++++++++++++++-------------------------------
>>  src/libxl/libxl_domain.h |  4 ----
>>  src/libxl/libxl_driver.c | 20 ++++++++++++--------
>>  3 files changed, 29 insertions(+), 44 deletions(-)
>>
>> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
>> index eca1ff0..e240eae 100644
>> --- a/src/libxl/libxl_domain.c
>> +++ b/src/libxl/libxl_domain.c
>> @@ -371,6 +371,9 @@ libxlDomainShutdownThread(void *opaque)
>>  
>>      cfg = libxlDriverConfigGet(driver);
>>  
>> +    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
>> +        goto cleanup;
>> +
>>     
>
> Won't this a deadlock with 'libxlDomainAutoCoreDump'  (line 410) (which also
> calls :
>
>  727     if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
>  728         goto cleanup;
>
> well, not deadlock - but spin up to 30 seconds? and then give up?
>   

Yes, another good catch!

> Perhaps this patch should be folded in?
>
> From 9f2bac0c28815fc51850290c4b1962881691bfca Mon Sep 17 00:00:00 2001
> From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Date: Wed, 25 Mar 2015 22:34:51 -0400
> Subject: [PATCH] squash
>
> ---
>  src/libxl/libxl_domain.c | 4 ----
>  1 file changed, 4 deletions(-)
>
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index aef0157..774b070 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -723,15 +723,11 @@ libxlDomainAutoCoreDump(libxlDriverPrivatePtr driver,
>                      timestr) < 0)
>          goto cleanup;
>  
> -    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
> -        goto cleanup;
> -
>      /* Unlock virDomainObj while dumping core */
>      virObjectUnlock(vm);
>      libxl_domain_core_dump(cfg->ctx, vm->def->id, dumpfile, NULL);
>      virObjectLock(vm);
>  
> -    ignore_value(libxlDomainObjEndJob(driver, vm));
>   

I've squashed this in my branch and fixed the commit message typo.

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] libxl: drop virDomainObj lock when destroying a domain
       [not found]   ` <1427382869.13935.61.camel@citrix.com>
@ 2015-03-26 23:16     ` Jim Fehlig
  0 siblings, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-26 23:16 UTC (permalink / raw)
  To: Ian Campbell; +Cc: libvir-list, xen-devel

Ian Campbell wrote:
> On Wed, 2015-03-25 at 14:08 -0600, Jim Fehlig wrote:
>   
>> A destroy operation can take considerable time on large memory
>> domains due to scrubbing the domain' memory.  The operation is
>> running in the context of a job, so unlocking the domain and
>> allowing query operations is safe.
>>
>> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
>>     
>
> I don't really know enough about the libvirt job/obj locking to comment
> on the previous patches or that aspect of this one, but in general the
> idea of dropping the lock before calling libxl_destroy seems reasonable
> to me.
>
> In principal you could use the async stuff (the final NULL to the call),
> but you'd still be wanting to drop the lock for that period, so it's not
> clear it's any better from your PoV.
>   

I tried the async approach as well, but in the end decided it was no
better.  In case you're interested, I pushed the async variant to
libxl-dom-destroy-async branch in my github fork

https://github.com/jfehlig/libvirt

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] libxl: domain destroy fixes
       [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
                   ` (3 preceding siblings ...)
       [not found] ` <1427314116-14451-3-git-send-email-jfehlig@suse.com>
@ 2015-03-27 18:16 ` Konrad Rzeszutek Wilk
  2015-03-27 18:34   ` Jim Fehlig
       [not found]   ` <5515A29C.5040802@suse.com>
  2015-03-30 14:00 ` Anthony PERARD
  5 siblings, 2 replies; 15+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-03-27 18:16 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel

On Wed, Mar 25, 2015 at 02:08:33PM -0600, Jim Fehlig wrote:
> This small series of patches fixes some issues wrt domain destroy in
> the libxl driver.  The primary motivation for this work is to
> prevent locking the virDomainObj during long running destroy operations
> on large memory domains.
> 
> Patch 1 moves job acquisition from libxlDomainStart to it's callers so
> they have more control over when the job is acquired.  Patch 2 fixes a
> few spots where we never acquired a job during domain destroy.  Patch 3
> contains the interesting change, where the virDomainObj is unlocked
> during the long-running destroy operation.
> 
> This series wraps up my work to improve parallel OpenStack Tempest runs
> against the libxl driver.  With libvirt.git master + this series + a
> patched libxl [1], I've successfully run a reproducer that was hitting
> the same issues encountered by Tempest.
> 
> [1] libxl commits from xen.git: 93699882d, f1335f0d, 4783c99a, 1c91d6fba,
> and 188e9c54.  I'll contact the stable branch maintainers and ask them
> to include these commits in the next Xen 4.4.x and 4.5.x releases.
> 
> Jim Fehlig (3):
>   libxl: Move job acquisition in libxlDomainStart to callers
>   libxl: acquire a job when destroying a domain
>   libxl: drop virDomainObj lock when destroying a domain

I am no expert at this- but I dug through the code to understand how
the job and locking is done and now I am more comfortable with it.

Since I am new to this I went through all of the the callsites (which used
the job now) from the driver to make sure that there are no chained calls
(one function calling another which also uses a mutex or job locking).

I only found one culprit (libxlDomainAutoCoreDump being called from
 libxlDomainShutdownThread).

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

> 
>  src/libxl/libxl_domain.c | 77 +++++++++++++++++++----------------------------
>  src/libxl/libxl_domain.h |  4 ---
>  src/libxl/libxl_driver.c | 78 ++++++++++++++++++++++++++++++++++++------------
>  3 files changed, 89 insertions(+), 70 deletions(-)
> 
> -- 
> 1.8.4.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] libxl: domain destroy fixes
  2015-03-27 18:16 ` [PATCH 0/3] libxl: domain destroy fixes Konrad Rzeszutek Wilk
@ 2015-03-27 18:34   ` Jim Fehlig
       [not found]   ` <5515A29C.5040802@suse.com>
  1 sibling, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-27 18:34 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: libvir-list, xen-devel

Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 25, 2015 at 02:08:33PM -0600, Jim Fehlig wrote:
>   
>> This small series of patches fixes some issues wrt domain destroy in
>> the libxl driver.  The primary motivation for this work is to
>> prevent locking the virDomainObj during long running destroy operations
>> on large memory domains.
>>
>> Patch 1 moves job acquisition from libxlDomainStart to it's callers so
>> they have more control over when the job is acquired.  Patch 2 fixes a
>> few spots where we never acquired a job during domain destroy.  Patch 3
>> contains the interesting change, where the virDomainObj is unlocked
>> during the long-running destroy operation.
>>
>> This series wraps up my work to improve parallel OpenStack Tempest runs
>> against the libxl driver.  With libvirt.git master + this series + a
>> patched libxl [1], I've successfully run a reproducer that was hitting
>> the same issues encountered by Tempest.
>>
>> [1] libxl commits from xen.git: 93699882d, f1335f0d, 4783c99a, 1c91d6fba,
>> and 188e9c54.  I'll contact the stable branch maintainers and ask them
>> to include these commits in the next Xen 4.4.x and 4.5.x releases.
>>
>> Jim Fehlig (3):
>>   libxl: Move job acquisition in libxlDomainStart to callers
>>   libxl: acquire a job when destroying a domain
>>   libxl: drop virDomainObj lock when destroying a domain
>>     
>
> I am no expert at this- but I dug through the code to understand how
> the job and locking is done and now I am more comfortable with it.
>
> Since I am new to this I went through all of the the callsites (which used
> the job now) from the driver to make sure that there are no chained calls
> (one function calling another which also uses a mutex or job locking).
>
> I only found one culprit (libxlDomainAutoCoreDump being called from
>  libxlDomainShutdownThread).
>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>   

Thanks for taking time to review the series and familiarize yourself
with the libvirt libxl code!  As mentioned, I squashed in your
libxlDomainAutoCoreDump fix in 2/3.

Do any other libvirt devs have time for a quick review?  I'd like to
push this series, and the dom0 ballooning fix, for 1.2.14 if there are
no objections.  The latter was reviewed by Stefano Stabellini before the
freeze

https://www.redhat.com/archives/libvir-list/2015-March/msg01248.html

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] libxl: domain destroy fixes
       [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
                   ` (4 preceding siblings ...)
  2015-03-27 18:16 ` [PATCH 0/3] libxl: domain destroy fixes Konrad Rzeszutek Wilk
@ 2015-03-30 14:00 ` Anthony PERARD
  5 siblings, 0 replies; 15+ messages in thread
From: Anthony PERARD @ 2015-03-30 14:00 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel

On Wed, Mar 25, 2015 at 02:08:33PM -0600, Jim Fehlig wrote:
> This small series of patches fixes some issues wrt domain destroy in
> the libxl driver.  The primary motivation for this work is to
> prevent locking the virDomainObj during long running destroy operations
> on large memory domains.
> 
> Patch 1 moves job acquisition from libxlDomainStart to it's callers so
> they have more control over when the job is acquired.  Patch 2 fixes a
> few spots where we never acquired a job during domain destroy.  Patch 3
> contains the interesting change, where the virDomainObj is unlocked
> during the long-running destroy operation.
> 
> This series wraps up my work to improve parallel OpenStack Tempest runs
> against the libxl driver.  With libvirt.git master + this series + a
> patched libxl [1], I've successfully run a reproducer that was hitting
> the same issues encountered by Tempest.
> 
> [1] libxl commits from xen.git: 93699882d, f1335f0d, 4783c99a, 1c91d6fba,
> and 188e9c54.  I'll contact the stable branch maintainers and ask them
> to include these commits in the next Xen 4.4.x and 4.5.x releases.

Hi,

I gave a try to this series with OpenStack Tempest. And it is much better.

Thanks!

-- 
Anthony PERARD

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] libxl: domain destroy fixes
       [not found]   ` <5515A29C.5040802@suse.com>
@ 2015-03-30 19:13     ` Jim Fehlig
  0 siblings, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-03-30 19:13 UTC (permalink / raw)
  To: libvir-list; +Cc: xen-devel

Jim Fehlig wrote:
> Konrad Rzeszutek Wilk wrote:
>   
>> On Wed, Mar 25, 2015 at 02:08:33PM -0600, Jim Fehlig wrote:
>>   
>>     
>>> This small series of patches fixes some issues wrt domain destroy in
>>> the libxl driver.  The primary motivation for this work is to
>>> prevent locking the virDomainObj during long running destroy operations
>>> on large memory domains.
>>>
>>> Patch 1 moves job acquisition from libxlDomainStart to it's callers so
>>> they have more control over when the job is acquired.  Patch 2 fixes a
>>> few spots where we never acquired a job during domain destroy.  Patch 3
>>> contains the interesting change, where the virDomainObj is unlocked
>>> during the long-running destroy operation.
>>>
>>> This series wraps up my work to improve parallel OpenStack Tempest runs
>>> against the libxl driver.  With libvirt.git master + this series + a
>>> patched libxl [1], I've successfully run a reproducer that was hitting
>>> the same issues encountered by Tempest.
>>>
>>> [1] libxl commits from xen.git: 93699882d, f1335f0d, 4783c99a, 1c91d6fba,
>>> and 188e9c54.  I'll contact the stable branch maintainers and ask them
>>> to include these commits in the next Xen 4.4.x and 4.5.x releases.
>>>
>>> Jim Fehlig (3):
>>>   libxl: Move job acquisition in libxlDomainStart to callers
>>>   libxl: acquire a job when destroying a domain
>>>   libxl: drop virDomainObj lock when destroying a domain
>>>     
>>>       
>> I am no expert at this- but I dug through the code to understand how
>> the job and locking is done and now I am more comfortable with it.
>>
>> Since I am new to this I went through all of the the callsites (which used
>> the job now) from the driver to make sure that there are no chained calls
>> (one function calling another which also uses a mutex or job locking).
>>
>> I only found one culprit (libxlDomainAutoCoreDump being called from
>>  libxlDomainShutdownThread).
>>
>> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>>   
>>     
>
> Thanks for taking time to review the series and familiarize yourself
> with the libvirt libxl code!  As mentioned, I squashed in your
> libxlDomainAutoCoreDump fix in 2/3.
>
> Do any other libvirt devs have time for a quick review?  I'd like to
> push this series, and the dom0 ballooning fix, for 1.2.14 if there are
> no objections.  The latter was reviewed by Stefano Stabellini before the
> freeze
>
> https://www.redhat.com/archives/libvir-list/2015-March/msg01248.html
>   

Ping!

I'd like to include these fixes for 1.2.14.  The patches have been
"Reviewed-by" Konrad and Stefano.  Anthony also responded today that his
OpenStack Tempest runs are much happier

https://www.redhat.com/archives/libvir-list/2015-March/msg01540.html

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [libvirt] [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers
  2015-03-25 20:08 ` [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers Jim Fehlig
@ 2015-04-01  9:55   ` Martin Kletzander
  2015-04-01 17:29     ` Jim Fehlig
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Kletzander @ 2015-04-01  9:55 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 3006 bytes --]

On Wed, Mar 25, 2015 at 02:08:34PM -0600, Jim Fehlig wrote:
>Let callers of libxlDomainStart decide when it is appropriate to
>acquire a job on the associated virDomainObj.
>

This makes sense, I see many bugs this fixes, but how come
libxlDomainShutdownThread(), libxlDomainRestoreFlags() and
libxlDoMigrateReceive() don't need to start the job when they all call
libxlDomainStart()?

>Signed-off-by: Jim Fehlig <jfehlig@suse.com>
>---
> src/libxl/libxl_domain.c | 24 ++++++++--------------
> src/libxl/libxl_driver.c | 53 +++++++++++++++++++++++++++++++++++++++---------
> 2 files changed, 52 insertions(+), 25 deletions(-)
>
>diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>index 05f6eb1..da4c1c7 100644
>--- a/src/libxl/libxl_driver.c
>+++ b/src/libxl/libxl_driver.c
>@@ -885,17 +893,26 @@ libxlDomainCreateXML(virConnectPtr conn, const char *xml,
>         goto cleanup;
>     def = NULL;
>
>+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
>+        virDomainObjListRemove(driver->domains, vm);
>+        vm = NULL;
>+        goto cleanup;
>+    }
>+

This should be acquired before virDomainObjListAdd() since you need to
check whether it's active after creating the job.  If I'm wrong, then
virDomainObjListRemove() should be only called if the vm is not
persistent (as CreateXML can be called on persistent ones as well),
shouldn't it?

[...]
>@@ -1695,11 +1712,20 @@ libxlDomainRestoreFlags(virConnectPtr conn, const char *from,
>
>     def = NULL;
>
>+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
>+        if (!vm->persistent) {
>+            virDomainObjListRemove(driver->domains, vm);
>+            vm = NULL;
>+        }
>+        goto cleanup;
>+    }
>+

Same here, I guess.

>     ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_SAVE_PAUSED) != 0, fd);
>-    if (ret < 0 && !vm->persistent) {
>+    if (ret < 0 && !vm->persistent)
>         virDomainObjListRemove(driver->domains, vm);
>+
>+    if (!libxlDomainObjEndJob(driver, vm))
>         vm = NULL;
>-    }
>
>  cleanup:
>     if (VIR_CLOSE(fd) < 0)
>@@ -2567,17 +2593,24 @@ libxlDomainCreateWithFlags(virDomainPtr dom,
>     if (virDomainCreateWithFlagsEnsureACL(dom->conn, vm->def) < 0)
>         goto cleanup;
>
>+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0)
>+        goto cleanup;
>+
>     if (virDomainObjIsActive(vm)) {
>         virReportError(VIR_ERR_OPERATION_INVALID,
>                        "%s", _("Domain is already running"));
>-        goto cleanup;
>+        goto endjob;
>     }
>
>     ret = libxlDomainStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1);
>     if (ret < 0)
>-        goto cleanup;
>+        goto endjob;
>     dom->id = vm->def->id;
>
>+ endjob:
>+    if (!libxlDomainObjEndJob(driver, vm))
>+        vm = NULL;
>+
>  cleanup:
>     if (vm)
>         virObjectUnlock(vm);
>--
>1.8.4.5
>
>--
>libvir-list mailing list
>libvir-list@redhat.com
>https://www.redhat.com/mailman/listinfo/libvir-list

[-- Attachment #1.2: Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [libvirt] [PATCH 2/3] libxl: acquire a job when destroying a domain
       [not found]     ` <55147A4F.2000903@suse.com>
@ 2015-04-01 10:12       ` Martin Kletzander
       [not found]       ` <20150401101205.GC30610@wheatley>
  1 sibling, 0 replies; 15+ messages in thread
From: Martin Kletzander @ 2015-04-01 10:12 UTC (permalink / raw)
  To: Jim Fehlig; +Cc: libvir-list, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 765 bytes --]

On Thu, Mar 26, 2015 at 03:29:51PM -0600, Jim Fehlig wrote:
>Konrad Rzeszutek Wilk wrote:
>> On Wed, Mar 25, 2015 at 02:08:35PM -0600, Jim Fehlig wrote:
>>
>>> A job should be acquired at the beginning of a domain destroy operation,
>>> not at the end when cleaning up the domain.  Fix two occurances of this

And s/occurances/occurrences/ here.

It looks fine, though, with the squash-in.

Also, if you want to have a look at some other things that might be
fixed here, plus some speed-up gained, have a look at my commit
540c339a, that does some similar things in the QEMU driver.

>>> late job acquisition in the libxl driver.  Doing so renders
>>> libxlDomainCleanup unused, so it is removed.
>>>
>
>Just noticed this should be libxlDomainCleanupJob.
>

Yes :)

[-- Attachment #1.2: Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [libvirt] [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers
  2015-04-01  9:55   ` [libvirt] " Martin Kletzander
@ 2015-04-01 17:29     ` Jim Fehlig
  0 siblings, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-04-01 17:29 UTC (permalink / raw)
  To: Martin Kletzander; +Cc: libvir-list, xen-devel

Martin Kletzander wrote:
> On Wed, Mar 25, 2015 at 02:08:34PM -0600, Jim Fehlig wrote:
>> Let callers of libxlDomainStart decide when it is appropriate to
>> acquire a job on the associated virDomainObj.
>>
>
> This makes sense, I see many bugs this fixes, but how come
> libxlDomainShutdownThread(), libxlDomainRestoreFlags() and
> libxlDoMigrateReceive() don't need to start the job when they all call
> libxlDomainStart()?

Commit 0521d658 starts a job for libxlDomainShutdownThread.  This patch
adds starting a job in libxlDomainRestoreFlags.  For migration, I'll
need to work on a follow-up series that fixes job handling in general.

>
>> Signed-off-by: Jim Fehlig <jfehlig@suse.com>
>> ---
>> src/libxl/libxl_domain.c | 24 ++++++++--------------
>> src/libxl/libxl_driver.c | 53
>> +++++++++++++++++++++++++++++++++++++++---------
>> 2 files changed, 52 insertions(+), 25 deletions(-)
>>
>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>> index 05f6eb1..da4c1c7 100644
>> --- a/src/libxl/libxl_driver.c
>> +++ b/src/libxl/libxl_driver.c
>> @@ -885,17 +893,26 @@ libxlDomainCreateXML(virConnectPtr conn, const
>> char *xml,
>>         goto cleanup;
>>     def = NULL;
>>
>> +    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
>> +        virDomainObjListRemove(driver->domains, vm);
>> +        vm = NULL;
>> +        goto cleanup;
>> +    }
>> +
>
> This should be acquired before virDomainObjListAdd() since you need to
> check whether it's active after creating the job.

Acquiring the job requires a virDomainObj, which we get from the call to
virDomainObjListAdd().

> If I'm wrong, then
> virDomainObjListRemove() should be only called if the vm is not
> persistent (as CreateXML can be called on persistent ones as well),
> shouldn't it?

Yes, I think you are right.  This was coded up assuming CreateXML only
handled transient domains.

>
> [...]
>> @@ -1695,11 +1712,20 @@ libxlDomainRestoreFlags(virConnectPtr conn,
>> const char *from,
>>
>>     def = NULL;
>>
>> +    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) {
>> +        if (!vm->persistent) {
>> +            virDomainObjListRemove(driver->domains, vm);
>> +            vm = NULL;
>> +        }
>> +        goto cleanup;
>> +    }
>> +
>
> Same here, I guess.

Yes :-).  A virDomainObj is needed to acquire a job.

Thanks for the review.  I'll fix calling virDomainObjListRemove() on
persistent domains in V2.

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [libvirt] [PATCH 2/3] libxl: acquire a job when destroying a domain
       [not found]       ` <20150401101205.GC30610@wheatley>
@ 2015-04-01 21:37         ` Jim Fehlig
  0 siblings, 0 replies; 15+ messages in thread
From: Jim Fehlig @ 2015-04-01 21:37 UTC (permalink / raw)
  To: Martin Kletzander; +Cc: libvir-list, xen-devel

Martin Kletzander wrote:
> On Thu, Mar 26, 2015 at 03:29:51PM -0600, Jim Fehlig wrote:
>> Konrad Rzeszutek Wilk wrote:
>>> On Wed, Mar 25, 2015 at 02:08:35PM -0600, Jim Fehlig wrote:
>>>
>>>> A job should be acquired at the beginning of a domain destroy
>>>> operation,
>>>> not at the end when cleaning up the domain.  Fix two occurances of
>>>> this
>
> And s/occurances/occurrences/ here.
>
> It looks fine, though, with the squash-in.

Thanks.  I've sent a V2 addressing comments from you and Konrad

https://www.redhat.com/archives/libvir-list/2015-April/msg00072.html

>
> Also, if you want to have a look at some other things that might be
> fixed here, plus some speed-up gained, have a look at my commit
> 540c339a, that does some similar things in the QEMU driver.

Ah, that is nice work!  I recall seeing the series and thinking it was
pertinent to the libxl driver, but then forgot to take a closer look. 
Thanks for the reminder!  I'll work on a similar improvement in the
libxl driver.  But IMO that should not hold up this series, which is a
big improvement in itself.

Regards,
Jim

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-04-01 21:37 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1427314116-14451-1-git-send-email-jfehlig@suse.com>
2015-03-25 20:08 ` [PATCH 1/3] libxl: Move job acquisition in libxlDomainStart to callers Jim Fehlig
2015-04-01  9:55   ` [libvirt] " Martin Kletzander
2015-04-01 17:29     ` Jim Fehlig
2015-03-25 20:08 ` [PATCH 2/3] libxl: acquire a job when destroying a domain Jim Fehlig
2015-03-25 20:08 ` [PATCH 3/3] libxl: drop virDomainObj lock " Jim Fehlig
2015-03-26 15:14   ` Ian Campbell
     [not found]   ` <1427382869.13935.61.camel@citrix.com>
2015-03-26 23:16     ` Jim Fehlig
     [not found] ` <1427314116-14451-3-git-send-email-jfehlig@suse.com>
2015-03-26  1:32   ` [PATCH 2/3] libxl: acquire a job " Konrad Rzeszutek Wilk
     [not found]   ` <20150326013242.GA8605@konrad-lan.dumpdata.com>
2015-03-26 21:29     ` Jim Fehlig
     [not found]     ` <55147A4F.2000903@suse.com>
2015-04-01 10:12       ` [libvirt] " Martin Kletzander
     [not found]       ` <20150401101205.GC30610@wheatley>
2015-04-01 21:37         ` Jim Fehlig
2015-03-27 18:16 ` [PATCH 0/3] libxl: domain destroy fixes Konrad Rzeszutek Wilk
2015-03-27 18:34   ` Jim Fehlig
     [not found]   ` <5515A29C.5040802@suse.com>
2015-03-30 19:13     ` Jim Fehlig
2015-03-30 14:00 ` Anthony PERARD

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.