All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
@ 2015-09-11 10:42 Ian Campbell
  2015-09-11 10:50 ` Ian Jackson
  2015-09-11 13:44 ` Andrew Cooper
  0 siblings, 2 replies; 6+ messages in thread
From: Ian Campbell @ 2015-09-11 10:42 UTC (permalink / raw)
  To: ian.jackson, wei.liu2, xen-devel
  Cc: Shriram Rajagopalan, Andrew Cooper, Jim Fehlig, Yang Hongyang,
	Ian Campbell

The fd passed to us by libvirt for both save and restore has at least
O_NONBLOCK set, which libxl does not expect and therefore fails to
handle any EAGAIN which might arise.

This has been observed with migration v2, but if v1 used to work I
think that would be just be by luck and/or coincidence.

Unix convention (and the principal of least surprise) is usually to
ensure that an fd has no "strange" properties, such as being
non-blocking, when handing it to another component.

However for the convenience of the application arrange instead for
libxl to clear any unexpected flags on the file descriptors it is
given for save or restore and restore them to their original state at
the end. O_NDELAY could be similarly problematic so clear that as
well as O_NONBLOCK.

To do this introduce a pair of new helper functions one to modify+save
the flags and another to restore them and call them in the appropriate
places.

The migration v1 code appeared to do some things with O_NONBLOCK in
the checkpoint case. Migration v2 doesn't seem to do so, and in any
case I wouldn't expect it to be relying on libvirt's setting of
O_NONBLOCK when xl doesn't use that flag.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jim Fehlig <jfehlig@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Shriram Rajagopalan <rshriram@cs.ubc.ca>
Cc: Yang Hongyang <yanghy@cn.fujitsu.com>
---
For 4.6: This fixes migration with libvirt, which I think is worth
doing before the release.

For backports: Once "ts-xen-install: Rewrite /etc/hosts to comment out
127.0.1.1 entry" passes through osstest's pretest gate and has run on
some of the older branches we should then know if this is necessary
for migration v1. Or we could backport it regardless.
---
 tools/libxl/libxl.c          | 65 ++++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_create.c   | 23 +++++++++++++++-
 tools/libxl/libxl_internal.h | 13 +++++++++
 3 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 4f2eb24..d6efdd8 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -952,6 +952,12 @@ static void domain_suspend_cb(libxl__egc *egc,
                               libxl__domain_suspend_state *dss, int rc)
 {
     STATE_AO_GC(dss->ao);
+    int flrc;
+
+    flrc = libxl__fd_flags_restore(gc, dss->fd, dss->fdfl);
+    /* If suspend has failed already then report that error not this one. */
+    if (flrc && !rc) rc = flrc;
+
     libxl__ao_complete(egc,ao,rc);
 
 }
@@ -980,6 +986,11 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
     dss->live = flags & LIBXL_SUSPEND_LIVE;
     dss->debug = flags & LIBXL_SUSPEND_DEBUG;
 
+    rc = libxl__fd_flags_modify_save(gc, dss->fd,
+                                     ~(O_NONBLOCK|O_NDELAY), 0,
+                                     &dss->fdfl);
+    if (rc < 0) goto out_err;
+
     libxl__domain_save(egc, dss);
     return AO_INPROGRESS;
 
@@ -6507,6 +6518,60 @@ int libxl_fd_set_cloexec(libxl_ctx *ctx, int fd, int cloexec)
 int libxl_fd_set_nonblock(libxl_ctx *ctx, int fd, int nonblock)
   { return fd_set_flags(ctx,fd, F_GETFL,F_SETFL,"FL", O_NONBLOCK, nonblock); }
 
+int libxl__fd_flags_modify_save(libxl__gc *gc, int fd,
+                                int mask, int val, int *r_oldflags)
+{
+    int rc, ret, fdfl;
+
+    fdfl = fcntl(fd, F_GETFL);
+    if (fdfl < 0) {
+        LOGE(ERROR, "failed to fcntl.F_GETFL for fd %d", fd);
+        rc = ERROR_FAIL;
+        goto out_err;
+    }
+
+    LOG(DEBUG, "fnctl F_GETFL flags for fd %d are %x", fd, fdfl);
+
+    if (r_oldflags)
+        *r_oldflags = fdfl;
+
+    fdfl &= mask;
+    fdfl |= val;
+
+    LOG(DEBUG, "fnctl F_SETFL of fd %d to %x", fd, fdfl);
+
+    ret = fcntl(fd, F_SETFL, fdfl);
+    if (ret < 0) {
+        LOGE(ERROR, "failed to fcntl.F_SETFL for fd %d", fd);
+        rc = ERROR_FAIL;
+        goto out_err;
+    }
+
+    rc = 0;
+
+out_err:
+    return rc;
+}
+
+int libxl__fd_flags_restore(libxl__gc *gc, int fd, int fdfl)
+{
+    int ret, rc;
+
+    LOG(DEBUG, "fnctl F_SETFL of fd %d to %x", fd, fdfl);
+
+    ret = fcntl(fd, F_SETFL, fdfl);
+    if (ret < 0) {
+        LOGE(ERROR, "failed to fcntl.F_SETFL for fd %x", fd);
+        rc = ERROR_FAIL;
+        goto out_err;
+    }
+
+    rc = 0;
+
+out_err:
+    return rc;
+
+}
 
 void libxl_hwcap_copy(libxl_ctx *ctx,libxl_hwcap *dst, libxl_hwcap *src)
 {
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 5128160..099c7e8 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1555,6 +1555,7 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
 {
     AO_CREATE(ctx, 0, ao_how);
     libxl__app_domain_create_state *cdcs;
+    int rc;
 
     GCNEW(cdcs);
     cdcs->dcs.ao = ao;
@@ -1562,8 +1563,13 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
     libxl_domain_config_init(&cdcs->dcs.guest_config_saved);
     libxl_domain_config_copy(ctx, &cdcs->dcs.guest_config_saved, d_config);
     cdcs->dcs.restore_fd = cdcs->dcs.libxc_fd = restore_fd;
-    if (restore_fd > -1)
+    if (restore_fd > -1) {
         cdcs->dcs.restore_params = *params;
+        rc = libxl__fd_flags_modify_save(gc, cdcs->dcs.restore_fd,
+                                         ~(O_NONBLOCK|O_NDELAY), 0,
+                                         &cdcs->dcs.restore_fdfl);
+        if (rc < 0) goto out_err;
+    }
     cdcs->dcs.callback = domain_create_cb;
     libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how);
     cdcs->domid_out = domid;
@@ -1571,6 +1577,10 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
     initiate_domain_create(egc, &cdcs->dcs);
 
     return AO_INPROGRESS;
+
+ out_err:
+    return AO_CREATE_FAIL(rc);
+
 }
 
 static void domain_create_cb(libxl__egc *egc,
@@ -1578,10 +1588,21 @@ static void domain_create_cb(libxl__egc *egc,
                              int rc, uint32_t domid)
 {
     libxl__app_domain_create_state *cdcs = CONTAINER_OF(dcs, *cdcs, dcs);
+    int flrc;
     STATE_AO_GC(cdcs->dcs.ao);
 
     *cdcs->domid_out = domid;
 
+    if (dcs->restore_fd > -1) {
+        flrc = libxl__fd_flags_restore(gc,
+                dcs->restore_fd, dcs->restore_fdfl);
+        /*
+         * If restore has failed already then report that error not
+         * this one.
+         */
+        if (flrc && !rc) rc = flrc;
+    }
+
     libxl__ao_complete(egc, ao, rc);
 }
     
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 6ea6c83..1699f32 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -630,6 +630,17 @@ _hidden int libxl__pipe_nonblock(libxl_ctx *ctx, int fds[2]);
  * `not open'.  Ignores any errors.  Sets fds[] to -1. */
 _hidden void libxl__pipe_close(int fds[2]);
 
+/* Change the flags for the file description associated with fd to
+ *    (flags & mask) | val.
+ * If r_oldflags != NULL then sets *r_oldflags to the original set of
+ * flags.
+ */
+_hidden int libxl__fd_flags_modify_save(libxl__gc *gc, int fd,
+                                        int mask, int val, int *r_oldflags);
+/* Restores the flags for the file description associated with fd to
+ * to the previous value (returned by libxl__fd_flags_modify_save)
+ */
+_hidden int libxl__fd_flags_restore(libxl__gc *gc, int fd, int old_flags);
 
 /* Each of these logs errors and returns a libxl error code.
  * They do not mind if path is already removed.
@@ -3049,6 +3060,7 @@ struct libxl__domain_suspend_state {
 
     uint32_t domid;
     int fd;
+    int fdfl; /* original flags on fd */
     libxl_domain_type type;
     int live;
     int debug;
@@ -3390,6 +3402,7 @@ struct libxl__domain_create_state {
     libxl_domain_config *guest_config;
     libxl_domain_config guest_config_saved; /* vanilla config */
     int restore_fd, libxc_fd;
+    int restore_fdfl; /* original flags of restore_fd */
     libxl_domain_restore_params restore_params;
     libxl__domain_create_cb *callback;
     libxl_asyncprogress_how aop_console_how;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
  2015-09-11 10:42 [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards Ian Campbell
@ 2015-09-11 10:50 ` Ian Jackson
  2015-09-11 12:56   ` Wei Liu
  2015-09-11 13:44 ` Andrew Cooper
  1 sibling, 1 reply; 6+ messages in thread
From: Ian Jackson @ 2015-09-11 10:50 UTC (permalink / raw)
  To: Ian Campbell
  Cc: wei.liu2, Andrew Cooper, xen-devel, Jim Fehlig,
	Shriram Rajagopalan, Yang Hongyang

Ian Campbell writes ("[PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards"):
> The fd passed to us by libvirt for both save and restore has at least
> O_NONBLOCK set, which libxl does not expect and therefore fails to
> handle any EAGAIN which might arise.

Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>

> For 4.6: This fixes migration with libvirt, which I think is worth
> doing before the release.

Indeed.

> For backports: Once "ts-xen-install: Rewrite /etc/hosts to comment out
> 127.0.1.1 entry" passes through osstest's pretest gate and has run on
> some of the older branches we should then know if this is necessary
> for migration v1. Or we could backport it regardless.

Let us see what happens.

> +int libxl__fd_flags_modify_save(libxl__gc *gc, int fd,
> +                                int mask, int val, int *r_oldflags)
> +{

This is very ... comprehensive.

Ian.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
  2015-09-11 10:50 ` Ian Jackson
@ 2015-09-11 12:56   ` Wei Liu
  2015-09-11 14:11     ` Ian Campbell
  0 siblings, 1 reply; 6+ messages in thread
From: Wei Liu @ 2015-09-11 12:56 UTC (permalink / raw)
  To: Ian Jackson
  Cc: wei.liu2, Ian Campbell, Andrew Cooper, xen-devel, Jim Fehlig,
	Shriram Rajagopalan, Yang Hongyang

On Fri, Sep 11, 2015 at 11:50:14AM +0100, Ian Jackson wrote:
> Ian Campbell writes ("[PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards"):
> > The fd passed to us by libvirt for both save and restore has at least
> > O_NONBLOCK set, which libxl does not expect and therefore fails to
> > handle any EAGAIN which might arise.
> 
> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
> 
> > For 4.6: This fixes migration with libvirt, which I think is worth
> > doing before the release.
> 
> Indeed.
> 

+1

Acked-by: Wei Liu <wei.liu2@citrix.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
  2015-09-11 10:42 [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards Ian Campbell
  2015-09-11 10:50 ` Ian Jackson
@ 2015-09-11 13:44 ` Andrew Cooper
  2015-09-11 14:13   ` Ian Campbell
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Cooper @ 2015-09-11 13:44 UTC (permalink / raw)
  To: Ian Campbell, ian.jackson, wei.liu2, xen-devel
  Cc: Shriram Rajagopalan, Jim Fehlig, Yang Hongyang

On 11/09/15 11:42, Ian Campbell wrote:
> The fd passed to us by libvirt for both save and restore has at least
> O_NONBLOCK set, which libxl does not expect and therefore fails to
> handle any EAGAIN which might arise.
>
> This has been observed with migration v2, but if v1 used to work I
> think that would be just be by luck and/or coincidence.
>
> Unix convention (and the principal of least surprise) is usually to
> ensure that an fd has no "strange" properties, such as being
> non-blocking, when handing it to another component.
>
> However for the convenience of the application arrange instead for
> libxl to clear any unexpected flags on the file descriptors it is
> given for save or restore and restore them to their original state at
> the end. O_NDELAY could be similarly problematic so clear that as
> well as O_NONBLOCK.
>
> To do this introduce a pair of new helper functions one to modify+save
> the flags and another to restore them and call them in the appropriate
> places.
>
> The migration v1 code appeared to do some things with O_NONBLOCK in
> the checkpoint case. Migration v2 doesn't seem to do so, and in any
> case I wouldn't expect it to be relying on libvirt's setting of
> O_NONBLOCK when xl doesn't use that flag.
>
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Jim Fehlig <jfehlig@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> Cc: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
> For 4.6: This fixes migration with libvirt, which I think is worth
> doing before the release.
>
> For backports: Once "ts-xen-install: Rewrite /etc/hosts to comment out
> 127.0.1.1 entry" passes through osstest's pretest gate and has run on
> some of the older branches we should then know if this is necessary
> for migration v1. Or we could backport it regardless.

I don't believe any special consideration is needed for the legacy 
conversion case, as all other fds used there are created by components 
we control.

> ---
>   tools/libxl/libxl.c          | 65 ++++++++++++++++++++++++++++++++++++++++++++
>   tools/libxl/libxl_create.c   | 23 +++++++++++++++-
>   tools/libxl/libxl_internal.h | 13 +++++++++
>   3 files changed, 100 insertions(+), 1 deletion(-)
>
> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
> index 4f2eb24..d6efdd8 100644
> --- a/tools/libxl/libxl.c
> +++ b/tools/libxl/libxl.c
> @@ -952,6 +952,12 @@ static void domain_suspend_cb(libxl__egc *egc,
>                                 libxl__domain_suspend_state *dss, int rc)
>   {
>       STATE_AO_GC(dss->ao);
> +    int flrc;
> +
> +    flrc = libxl__fd_flags_restore(gc, dss->fd, dss->fdfl);
> +    /* If suspend has failed already then report that error not this one. */
> +    if (flrc && !rc) rc = flrc;
> +
>       libxl__ao_complete(egc,ao,rc);
>   
>   }
> @@ -980,6 +986,11 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
>       dss->live = flags & LIBXL_SUSPEND_LIVE;
>       dss->debug = flags & LIBXL_SUSPEND_DEBUG;
>   
> +    rc = libxl__fd_flags_modify_save(gc, dss->fd,
> +                                     ~(O_NONBLOCK|O_NDELAY), 0,
> +                                     &dss->fdfl);
> +    if (rc < 0) goto out_err;
> +
>       libxl__domain_save(egc, dss);
>       return AO_INPROGRESS;
>   
> @@ -6507,6 +6518,60 @@ int libxl_fd_set_cloexec(libxl_ctx *ctx, int fd, int cloexec)
>   int libxl_fd_set_nonblock(libxl_ctx *ctx, int fd, int nonblock)
>     { return fd_set_flags(ctx,fd, F_GETFL,F_SETFL,"FL", O_NONBLOCK, nonblock); }
>   
> +int libxl__fd_flags_modify_save(libxl__gc *gc, int fd,
> +                                int mask, int val, int *r_oldflags)
> +{
> +    int rc, ret, fdfl;
> +
> +    fdfl = fcntl(fd, F_GETFL);
> +    if (fdfl < 0) {
> +        LOGE(ERROR, "failed to fcntl.F_GETFL for fd %d", fd);
> +        rc = ERROR_FAIL;
> +        goto out_err;
> +    }
> +
> +    LOG(DEBUG, "fnctl F_GETFL flags for fd %d are %x", fd, fdfl);

%#x to distinguish decimal and hex numbers in the same message (and 
other debug messages)

~Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
  2015-09-11 12:56   ` Wei Liu
@ 2015-09-11 14:11     ` Ian Campbell
  0 siblings, 0 replies; 6+ messages in thread
From: Ian Campbell @ 2015-09-11 14:11 UTC (permalink / raw)
  To: Wei Liu, Ian Jackson
  Cc: Shriram Rajagopalan, Andrew Cooper, Jim Fehlig, Yang Hongyang, xen-devel

On Fri, 2015-09-11 at 13:56 +0100, Wei Liu wrote:
> On Fri, Sep 11, 2015 at 11:50:14AM +0100, Ian Jackson wrote:
> > Ian Campbell writes ("[PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY
> > on migration fd and reinstate afterwards"):
> > > The fd passed to us by libvirt for both save and restore has at least
> > > O_NONBLOCK set, which libxl does not expect and therefore fails to
> > > handle any EAGAIN which might arise.
> > 
> > Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
> > 
> > > For 4.6: This fixes migration with libvirt, which I think is worth
> > > doing before the release.
> > 
> > Indeed.
> > 
> 
> +1
> 
> Acked-by: Wei Liu <wei.liu2@citrix.com>

Pushed to staging and staging-4.6, thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards
  2015-09-11 13:44 ` Andrew Cooper
@ 2015-09-11 14:13   ` Ian Campbell
  0 siblings, 0 replies; 6+ messages in thread
From: Ian Campbell @ 2015-09-11 14:13 UTC (permalink / raw)
  To: Andrew Cooper, ian.jackson, wei.liu2, xen-devel
  Cc: Shriram Rajagopalan, Jim Fehlig, Yang Hongyang

On Fri, 2015-09-11 at 14:44 +0100, Andrew Cooper wrote:
> On 11/09/15 11:42, Ian Campbell wrote:
> > The fd passed to us by libvirt for both save and restore has at least
> > O_NONBLOCK set, which libxl does not expect and therefore fails to
> > handle any EAGAIN which might arise.
> > 
> > This has been observed with migration v2, but if v1 used to work I
> > think that would be just be by luck and/or coincidence.
> > 
> > Unix convention (and the principal of least surprise) is usually to
> > ensure that an fd has no "strange" properties, such as being
> > non-blocking, when handing it to another component.
> > 
> > However for the convenience of the application arrange instead for
> > libxl to clear any unexpected flags on the file descriptors it is
> > given for save or restore and restore them to their original state at
> > the end. O_NDELAY could be similarly problematic so clear that as
> > well as O_NONBLOCK.
> > 
> > To do this introduce a pair of new helper functions one to modify+save
> > the flags and another to restore them and call them in the appropriate
> > places.
> > 
> > The migration v1 code appeared to do some things with O_NONBLOCK in
> > the checkpoint case. Migration v2 doesn't seem to do so, and in any
> > case I wouldn't expect it to be relying on libvirt's setting of
> > O_NONBLOCK when xl doesn't use that flag.
> > 
> > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> > Cc: Jim Fehlig <jfehlig@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> > Cc: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > Cc: Yang Hongyang <yanghy@cn.fujitsu.com>
> > ---
> > For 4.6: This fixes migration with libvirt, which I think is worth
> > doing before the release.
> > 
> > For backports: Once "ts-xen-install: Rewrite /etc/hosts to comment out
> > 127.0.1.1 entry" passes through osstest's pretest gate and has run on
> > some of the older branches we should then know if this is necessary
> > for migration v1. Or we could backport it regardless.
> 
> I don't believe any special consideration is needed for the legacy 
> conversion case, as all other fds used there are created by components 
> we control.

Thanks, I was actually talking about actual migration v1 as in 4.5
migrating to 4.5, but the above is useful info nonetheless.

> > +    LOG(DEBUG, "fnctl F_GETFL flags for fd %d are %x", fd, fdfl);
> 
> %#x to distinguish decimal and hex numbers in the same message (and 
> other debug messages)

Gah, didn't see this until after I pushed, sorry. Will post a followup

Ian.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-09-11 14:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-11 10:42 [PATCH for-4.6] libxl: clear O_NONBLOCK|O_NDELAY on migration fd and reinstate afterwards Ian Campbell
2015-09-11 10:50 ` Ian Jackson
2015-09-11 12:56   ` Wei Liu
2015-09-11 14:11     ` Ian Campbell
2015-09-11 13:44 ` Andrew Cooper
2015-09-11 14:13   ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.